fix: Komplette Neukonvertierung — Tabellen und Content endgueltig sauber
Grundproblem: WordPress speicherte HTML mit escaped \n (literal \\n statt Newlines) und inline style-Attributen in Tabellen. node-html-markdown konvertierte diese als Backslash-Artefakte und einzeilige Pipe-Strings. Loesung: Neues final-rebuild.mjs Skript: - \\n -> echte Newlines VOR der Konvertierung - style-Attribute komplett entfernt (verursachten Backslash-Tabellen) - Nav/Footer/SVG per Regex vor dem Parsing entfernt - Tabellen werden jetzt korrekt mehrzeilig mit Header/Separator/Rows gerendert - 44 Guides + 15 Pages verifiziert: 0 Probleme Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
171
scripts/final-rebuild.mjs
Normal file
171
scripts/final-rebuild.mjs
Normal file
@@ -0,0 +1,171 @@
|
||||
/**
|
||||
* FINALE Neukonvertierung aller Guides.
|
||||
*
|
||||
* Strategie:
|
||||
* 1. \\n -> echte Newlines
|
||||
* 2. Nav/Footer/SVG komplett entfernen
|
||||
* 3. style-Attribute entfernen (verursachen Backslash-Tabellen)
|
||||
* 4. node-html-markdown konvertiert den bereinigten HTML
|
||||
* 5. Post-Cleanup: verbleibende Artefakte
|
||||
*/
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import { NodeHtmlMarkdown } from "node-html-markdown";
|
||||
|
||||
const BACKUP_POSTS = "backup/content/posts";
|
||||
const BACKUP_PAGES = "backup/content/pages";
|
||||
const OUT_GUIDES = "app/src/content/guides";
|
||||
const OUT_PAGES = "app/src/content/pages";
|
||||
|
||||
const SKIP = new Set([
|
||||
"guide-1-pla-perfekt-einstellen-2026-03-25",
|
||||
"guide-2-stringing-reduzieren-2026-03-25",
|
||||
"guide-1-warping-vermeiden-2026-03-26",
|
||||
"guide-2-petg-ohne-frust-2026-03-26",
|
||||
]);
|
||||
|
||||
const categorize = (s) => {
|
||||
if (/guide-orcaslicer|guide-cura|guide-bambu|guide-prusaslicer|slicer/.test(s)) return "Slicer";
|
||||
if (/pla|petg|tpu|asa|abs|nylon|carbon|resin|filament|bed-adhesion/.test(s)) return "Materialien";
|
||||
if (/stringing|warping|unterextrusion|layer-separation|elefantenfuss|verstopfte/.test(s)) return "Fehlerbehebung";
|
||||
if (/retraction|flow-rate|pressure-advance|input-shaping|temperaturturm|speed-tower|erste-schicht|druckbett-leveln/.test(s)) return "Kalibrierung";
|
||||
if (/adaptive|modifier|ironing|fuzzy|multi-material|klipper/.test(s)) return "Fortgeschritten";
|
||||
if (/erstes-modell|support|infill|duesenwechsel|druckzeit|masshaltigkeit|bruecken|nachbearbeiten|gridfinity|naht/.test(s)) return "Grundlagen";
|
||||
return "Allgemein";
|
||||
};
|
||||
|
||||
const diff = (slug, cat) => {
|
||||
if (/erstes-modell|erste-schicht|druckbett-leveln/.test(slug)) return "einsteiger";
|
||||
if (cat === "Fortgeschritten" || /klipper|pressure-advance|input-shaping|carbon|nylon-pa/.test(slug)) return "experte";
|
||||
return "fortgeschritten";
|
||||
};
|
||||
|
||||
const nhm = new NodeHtmlMarkdown({
|
||||
keepDataImages: false,
|
||||
useLinkReferenceDefinitions: false,
|
||||
});
|
||||
|
||||
function cleanHtml(raw) {
|
||||
let html = raw;
|
||||
|
||||
// 1. Escaped newlines -> echte
|
||||
html = html.replace(/\\n/g, "\n");
|
||||
|
||||
// 2. WordPress-Kommentare entfernen (nur die Tags)
|
||||
html = html.replace(/<!-- \/?wp:\w+ -->/g, "");
|
||||
|
||||
// 3. Komplette Nav-Bloecke entfernen
|
||||
html = html.replace(/<nav[^>]*>[\s\S]*?<\/nav>/gi, "");
|
||||
html = html.replace(/<div[^>]*class="v2-mobile-menu"[^>]*>[\s\S]*?<\/div>/gi, "");
|
||||
|
||||
// 4. Footer-Bloecke entfernen
|
||||
html = html.replace(/<footer[^>]*>[\s\S]*?<\/footer>/gi, "");
|
||||
|
||||
// 5. SVG-Bloecke entfernen (Hero-Grafiken)
|
||||
html = html.replace(/<svg[^>]*>[\s\S]*?<\/svg>/gi, "");
|
||||
|
||||
// 6. KRITISCH: style-Attribute entfernen (verursachen Backslash-Tabellen!)
|
||||
html = html.replace(/\s+style="[^"]*"/gi, "");
|
||||
|
||||
// 7. Wrapping-Divs mit bekannten Klassen entfernen
|
||||
html = html.replace(/<div[^>]*class="(?:v2-guide|hub-section|v2-footer|lead)"[^>]*>/gi, "");
|
||||
|
||||
// 8. class-Attribute bereinigen
|
||||
html = html.replace(/\s+class="[^"]*"/gi, "");
|
||||
|
||||
// 9. Hamburger-Buttons
|
||||
html = html.replace(/<button[^>]*>[\s\S]*?<\/button>/gi, "");
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
function processFile(filePath, outDir) {
|
||||
const raw = fs.readFileSync(filePath, "utf-8");
|
||||
const fmMatch = raw.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
|
||||
if (!fmMatch) return null;
|
||||
|
||||
const fmBlock = fmMatch[1];
|
||||
const htmlContent = fmMatch[2].trim();
|
||||
|
||||
const title = fmBlock.match(/title:\s*"(.+?)"/)?.[1] ?? path.basename(filePath, ".html");
|
||||
const slug = fmBlock.match(/slug:\s*"(.+?)"/)?.[1] ?? path.basename(filePath, ".html");
|
||||
|
||||
if (SKIP.has(slug)) return null;
|
||||
|
||||
// HTML bereinigen
|
||||
const cleanedHtml = cleanHtml(htmlContent);
|
||||
|
||||
// Konvertieren
|
||||
let md = nhm.translate(cleanedHtml);
|
||||
|
||||
// Post-Cleanup
|
||||
md = md
|
||||
// WordPress-Reste
|
||||
.replace(/M0LZI[_\\]*3D[\s\S]*?\[SYS_BOOT[^\]]*\]/g, "")
|
||||
.replace(/\[WISSEN\][\s\S]*?\[FAQ\]/gs, "")
|
||||
.replace(/JOIN_NETWORK.*?→/g, "")
|
||||
.replace(/M0LZI[_\\]*3D © \d{4}/g, "")
|
||||
.replace(/\[WHATSAPP\][\s\S]*?\[DATENSCHUTZ\][\s\S]*?$/gm, "")
|
||||
// Backslash-Artefakte
|
||||
.replace(/^\s*\\+\s*$/gm, "")
|
||||
.replace(/^\\ /gm, "")
|
||||
.replace(/ \\$/gm, "")
|
||||
.replace(/\\\s{2,}/g, " ")
|
||||
// Escaped Sonderzeichen
|
||||
.replace(/\\~/g, "~")
|
||||
.replace(/\\\[/g, "[")
|
||||
.replace(/\\\]/g, "]")
|
||||
.replace(/\\_/g, "_")
|
||||
// Doppelte Leerzeilen
|
||||
.replace(/\n{3,}/g, "\n\n")
|
||||
.trim();
|
||||
|
||||
const cat = categorize(slug);
|
||||
const d = diff(slug, cat);
|
||||
|
||||
const fm = `---\ntitle: "${title}"\nslug: "${slug}"\ncategory: "${cat}"\ndifficulty: "${d}"\nexcerpt: ""\n---`;
|
||||
|
||||
const outPath = path.join(outDir, `${slug}.md`);
|
||||
fs.writeFileSync(outPath, `${fm}\n\n${md}\n`, "utf-8");
|
||||
return slug;
|
||||
}
|
||||
|
||||
// Main
|
||||
console.log("=== FINALE Neukonvertierung ===\n");
|
||||
|
||||
let ok = 0;
|
||||
for (const f of fs.readdirSync(BACKUP_POSTS).filter(f => f.endsWith(".html")).sort()) {
|
||||
const r = processFile(path.join(BACKUP_POSTS, f), OUT_GUIDES);
|
||||
if (r) { console.log(` OK: ${r}`); ok++; }
|
||||
}
|
||||
console.log(`\n${ok} Guides.\n`);
|
||||
|
||||
let pok = 0;
|
||||
for (const f of fs.readdirSync(BACKUP_PAGES).filter(f => f.endsWith(".html")).sort()) {
|
||||
const r = processFile(path.join(BACKUP_PAGES, f), OUT_PAGES);
|
||||
if (r) { console.log(` OK: ${r}`); pok++; }
|
||||
}
|
||||
console.log(`\n${pok} Pages.\n`);
|
||||
|
||||
// Verifikation
|
||||
console.log("=== Verifikation ===");
|
||||
let issues = 0;
|
||||
for (const dir of [OUT_GUIDES, OUT_PAGES]) {
|
||||
for (const f of fs.readdirSync(dir).filter(f => f.endsWith(".md")).sort()) {
|
||||
const c = fs.readFileSync(path.join(dir, f), "utf-8");
|
||||
const inlineTables = c.split("\n").filter(l => l.includes("| ---") && l.split("|").length > 8).length;
|
||||
const backslashJunk = (c.match(/^\s*\\{2,}/gm) || []).length;
|
||||
const tables = (c.match(/\n\| -/g) || []).length;
|
||||
|
||||
if (inlineTables > 0 || backslashJunk > 0) {
|
||||
console.log(` ISSUE: ${f} (${inlineTables} inline, ${backslashJunk} bs)`);
|
||||
issues++;
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log(issues === 0 ? "\nAlle sauber!" : `\n${issues} Probleme`);
|
||||
|
||||
// Stichprobe
|
||||
console.log("\n=== Stichprobe: Carbon Fiber ===");
|
||||
const cf = fs.readFileSync(path.join(OUT_GUIDES, "carbon-fiber-glasfaser-filamente.md"), "utf-8");
|
||||
console.log(cf.split("\n").slice(0, 40).join("\n"));
|
||||
Reference in New Issue
Block a user