- Re-Konvertierung aus Original-HTML mit sauberer Tabellen-Extraktion - WordPress inline-styles entfernt vor Markdown-Konvertierung - Alle \-Artefakte (konvertierte <br>-Tags) entfernt - 12 Guides: Kaputte Tabellen (| \ \ |) durch echte Markdown-Tabellen ersetzt - Tabellen aus Original-HTML-Backup mit korrekten Daten rekonstruiert - 44 Guides + 15 Pages verifiziert Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
176 lines
7.3 KiB
JavaScript
176 lines
7.3 KiB
JavaScript
/**
|
|
* Re-Konvertierung: Direkt aus Original-HTML-Backup.
|
|
* Entfernt WordPress-Nav/Footer, extrahiert nur den Content-Block,
|
|
* konvertiert HTML-Tabellen sauber zu Markdown-Tabellen.
|
|
*/
|
|
import fs from "fs";
|
|
import path from "path";
|
|
import { NodeHtmlMarkdown } from "node-html-markdown";
|
|
|
|
const BACKUP_POSTS = path.resolve("backup/content/posts");
|
|
const BACKUP_PAGES = path.resolve("backup/content/pages");
|
|
const OUT_GUIDES = path.resolve("app/src/content/guides");
|
|
const OUT_PAGES = path.resolve("app/src/content/pages");
|
|
|
|
const SKIP_SLUGS = [
|
|
"guide-1-pla-perfekt-einstellen-2026-03-25",
|
|
"guide-2-stringing-reduzieren-2026-03-25",
|
|
"guide-1-warping-vermeiden-2026-03-26",
|
|
"guide-2-petg-ohne-frust-2026-03-26",
|
|
];
|
|
|
|
const categorize = (slug) => {
|
|
const s = slug.toLowerCase();
|
|
if (s.includes("guide-orcaslicer") || s.includes("guide-cura") || s.includes("guide-bambu") || s.includes("guide-prusaslicer") || s.includes("slicer")) return "Slicer";
|
|
if (s.includes("pla") || s.includes("petg") || s.includes("tpu") || s.includes("asa") || s.includes("abs") || s.includes("nylon") || s.includes("carbon") || s.includes("resin") || s.includes("filament") || s.includes("bed-adhesion")) return "Materialien";
|
|
if (s.includes("stringing") || s.includes("warping") || s.includes("unterextrusion") || s.includes("layer-separation") || s.includes("elefantenfuss") || s.includes("verstopfte")) return "Fehlerbehebung";
|
|
if (s.includes("retraction") || s.includes("flow-rate") || s.includes("pressure-advance") || s.includes("input-shaping") || s.includes("temperaturturm") || s.includes("speed-tower") || s.includes("erste-schicht") || s.includes("druckbett-leveln")) return "Kalibrierung";
|
|
if (s.includes("adaptive") || s.includes("modifier") || s.includes("ironing") || s.includes("fuzzy") || s.includes("multi-material") || s.includes("klipper")) return "Fortgeschritten";
|
|
if (s.includes("erstes-modell") || s.includes("support") || s.includes("infill") || s.includes("duesenwechsel") || s.includes("druckzeit") || s.includes("masshaltigkeit") || s.includes("bruecken") || s.includes("nachbearbeiten") || s.includes("gridfinity") || s.includes("naht")) return "Grundlagen";
|
|
return "Allgemein";
|
|
};
|
|
|
|
const difficulty = (slug, category) => {
|
|
if (slug.includes("erstes-modell") || slug.includes("erste-schicht") || slug.includes("druckbett-leveln")) return "einsteiger";
|
|
if (category === "Fortgeschritten" || slug.includes("klipper") || slug.includes("pressure-advance") || slug.includes("input-shaping") || slug.includes("carbon") || slug.includes("nylon-pa")) return "experte";
|
|
return "fortgeschritten";
|
|
};
|
|
|
|
// Erstelle NHM mit besserer Konfiguration
|
|
const nhm = new NodeHtmlMarkdown({
|
|
keepDataImages: false,
|
|
useLinkReferenceDefinitions: false,
|
|
ignore: ["style", "script", "nav", "footer", "button"],
|
|
// Beibehalte Tabellen-Struktur
|
|
});
|
|
|
|
const extractContent = (rawHtml) => {
|
|
// 1. Alles vor dem eigentlichen Content entfernen
|
|
// Content beginnt nach dem letzten Nav/Mobile-Menu-Block beim ersten <h1> oder <h2>
|
|
let html = rawHtml;
|
|
|
|
// Nav-Bloecke entfernen (alles in <nav> tags und v2-nav divs)
|
|
html = html.replace(/<nav[^>]*>[\s\S]*?<\/nav>/gi, "");
|
|
html = html.replace(/<div[^>]*class="v2-mobile-menu"[^>]*>[\s\S]*?<\/div>/gi, "");
|
|
|
|
// Footer entfernen
|
|
html = html.replace(/<footer[^>]*>[\s\S]*?<\/footer>/gi, "");
|
|
|
|
// Inline styles aus Tabellen entfernen (die verursachen die Backslash-Probleme)
|
|
html = html.replace(/\sstyle="[^"]*"/gi, "");
|
|
|
|
// Wrapping divs mit Klassen entfernen, Content behalten
|
|
html = html.replace(/<div[^>]*class="(hub-section|v2-guide|v2-footer)[^"]*"[^>]*>/gi, "");
|
|
|
|
// Lead-Paragraphen: class entfernen
|
|
html = html.replace(/<p\s+class="lead"[^>]*>/gi, "<p>");
|
|
|
|
// Leere divs entfernen
|
|
html = html.replace(/<div[^>]*>\s*<\/div>/gi, "");
|
|
|
|
return html;
|
|
};
|
|
|
|
const processFile = (filePath, outDir) => {
|
|
const raw = fs.readFileSync(filePath, "utf-8");
|
|
|
|
const fmMatch = raw.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
|
|
if (!fmMatch) return null;
|
|
|
|
const fmBlock = fmMatch[1];
|
|
const htmlContent = fmMatch[2].trim();
|
|
|
|
const titleMatch = fmBlock.match(/title:\s*"(.+?)"/);
|
|
const slugMatch = fmBlock.match(/slug:\s*"(.+?)"/);
|
|
const excerptMatch = fmBlock.match(/excerpt:\s*"(.*)"/);
|
|
|
|
const title = titleMatch?.[1] ?? path.basename(filePath, ".html");
|
|
const slug = slugMatch?.[1] ?? path.basename(filePath, ".html");
|
|
const excerpt = excerptMatch?.[1] ?? "";
|
|
|
|
if (SKIP_SLUGS.includes(slug)) return null;
|
|
|
|
// Content extrahieren und bereinigen
|
|
const cleanHtml = extractContent(htmlContent);
|
|
|
|
// HTML -> Markdown
|
|
let md = nhm.translate(cleanHtml);
|
|
|
|
// Post-Cleanup
|
|
md = md
|
|
// WordPress-Kommentare entfernen
|
|
.replace(/<!-- \/?wp:[^\s]+ -->/g, "")
|
|
// Escaped newlines
|
|
.replace(/\\n/g, "")
|
|
// Einzelne Backslashes auf eigener Zeile
|
|
.replace(/^\s*\\\s*$/gm, "")
|
|
// Backslash am Zeilenanfang/-ende (Artefakte von <br>)
|
|
.replace(/^\\ /gm, "")
|
|
.replace(/ \\$/gm, "")
|
|
// Inline-Backslashes vor/nach Text
|
|
.replace(/\\\s{2,}/g, " ")
|
|
// Doppelte Leerzeilen reduzieren
|
|
.replace(/\n{3,}/g, "\n\n")
|
|
// WordPress-Nav Reste
|
|
.replace(/M0LZI[_]3D[\s\S]*?\[SYS_BOOT[^\]]*\]/g, "")
|
|
.replace(/\[WISSEN\].*\[FAQ\]/gs, "")
|
|
.replace(/JOIN_NETWORK.*?→/g, "")
|
|
.replace(/M0LZI[_]3D © \d{4}/g, "")
|
|
.replace(/\[WHATSAPP\].*?\[DATENSCHUTZ\].*?$/gm, "")
|
|
.trim();
|
|
|
|
// Kaputte Tabellen fixen: Zeilen die nur aus | \ \ \ | bestehen → entfernen
|
|
md = md.replace(/^\|[\s\\|]+\|$/gm, "");
|
|
// Leere Tabellenkoepfe entfernen
|
|
md = md.replace(/^\|\s*\|\s*$/gm, "");
|
|
|
|
// Nochmal doppelte Leerzeilen
|
|
md = md.replace(/\n{3,}/g, "\n\n");
|
|
|
|
const cat = categorize(slug);
|
|
const diff = difficulty(slug, cat);
|
|
|
|
const frontmatter = `---\ntitle: "${title}"\nslug: "${slug}"\ncategory: "${cat}"\ndifficulty: "${diff}"\nexcerpt: "${excerpt}"\n---`;
|
|
|
|
const output = `${frontmatter}\n\n${md}\n`;
|
|
const outPath = path.join(outDir, `${slug}.md`);
|
|
fs.writeFileSync(outPath, output, "utf-8");
|
|
return slug;
|
|
};
|
|
|
|
// Main
|
|
console.log("=== Re-Konvertierung aus Original-HTML ===\n");
|
|
|
|
console.log("Posts -> Guides:");
|
|
const postFiles = fs.readdirSync(BACKUP_POSTS).filter(f => f.endsWith(".html"));
|
|
let ok = 0;
|
|
for (const file of postFiles) {
|
|
const result = processFile(path.join(BACKUP_POSTS, file), OUT_GUIDES);
|
|
if (result) { console.log(` OK: ${result}`); ok++; }
|
|
}
|
|
console.log(`\n${ok} Guides konvertiert.\n`);
|
|
|
|
console.log("Pages:");
|
|
const pageFiles = fs.readdirSync(BACKUP_PAGES).filter(f => f.endsWith(".html"));
|
|
let pOk = 0;
|
|
for (const file of pageFiles) {
|
|
const result = processFile(path.join(BACKUP_PAGES, file), OUT_PAGES);
|
|
if (result) { console.log(` OK: ${result}`); pOk++; }
|
|
}
|
|
console.log(`\n${pOk} Pages konvertiert.`);
|
|
|
|
// Verifizierung
|
|
console.log("\n=== Verifizierung ===");
|
|
const guideFiles = fs.readdirSync(OUT_GUIDES).filter(f => f.endsWith(".md"));
|
|
let issues = 0;
|
|
for (const file of guideFiles) {
|
|
const content = fs.readFileSync(path.join(OUT_GUIDES, file), "utf-8");
|
|
const backslashLines = (content.match(/^\s*\\\s*$/gm) || []).length;
|
|
const brokenTables = (content.match(/\| \\ \\ \\/g) || []).length;
|
|
if (backslashLines > 0 || brokenTables > 0) {
|
|
console.log(` ISSUE: ${file} (${backslashLines} backslash-lines, ${brokenTables} broken tables)`);
|
|
issues++;
|
|
}
|
|
}
|
|
console.log(issues === 0 ? " Alle Guides sauber!" : ` ${issues} Guides mit Problemen`);
|