- Cleanup-Skript entfernt alte WP-Navigation (M0LZI_3D, [WISSEN], etc.) - Footer-Bloecke (Copyright, WHATSAPP, IMPRESSUM) aus 44 Guides + 15 Pages - Escaped Backslash-Artefakte bereinigt - Alle 44 Guides live verifiziert: 0 Artefakte Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
168 lines
5.0 KiB
JavaScript
168 lines
5.0 KiB
JavaScript
/**
|
|
* Entfernt WordPress Header/Footer/Navigation-Artefakte aus allen Markdown-Guides.
|
|
* Behaelt nur den eigentlichen Content zwischen erstem H1/H2 und dem Footer.
|
|
*/
|
|
import fs from "fs";
|
|
import path from "path";
|
|
|
|
const GUIDES_DIR = path.resolve("app/src/content/guides");
|
|
const PAGES_DIR = path.resolve("app/src/content/pages");
|
|
|
|
// Patterns die den Header/Nav-Block identifizieren
|
|
const HEADER_PATTERNS = [
|
|
/M0LZI[_\\]3D/i,
|
|
/\[SYS[_\\]BOOT/i,
|
|
/\[WISSEN\]/,
|
|
/\[RECHNER\]/,
|
|
/\[TOOLS\]/,
|
|
/\[MODELLE\]/,
|
|
/\[NEWS\]/,
|
|
/\[SHOWCASE\]/,
|
|
/\[FAQ\]/,
|
|
/JOIN[_\\]NETWORK/,
|
|
/chat\.whatsapp\.com/,
|
|
];
|
|
|
|
// Patterns die den Footer-Block identifizieren
|
|
const FOOTER_PATTERNS = [
|
|
/M0LZI[_\\]3D © \d{4}/i,
|
|
/\[WHATSAPP\]/,
|
|
/\[IMPRESSUM\]/,
|
|
/\[DATENSCHUTZ\]/,
|
|
];
|
|
|
|
const isHeaderLine = (line) =>
|
|
HEADER_PATTERNS.some((p) => p.test(line));
|
|
|
|
const isFooterLine = (line) =>
|
|
FOOTER_PATTERNS.some((p) => p.test(line));
|
|
|
|
const isJunkLine = (line) => {
|
|
const trimmed = line.trim();
|
|
// Escaped newlines, empty escaped lines
|
|
if (trimmed === "\\n" || trimmed === "\\n\\n" || trimmed === "") return true;
|
|
// Lines that are only whitespace/backslash-n combos
|
|
if (/^[\\n\s]+$/.test(trimmed)) return true;
|
|
return false;
|
|
};
|
|
|
|
const cleanGuide = (filePath) => {
|
|
const raw = fs.readFileSync(filePath, "utf-8");
|
|
|
|
// Frontmatter extrahieren
|
|
const fmMatch = raw.match(/^(---\n[\s\S]*?\n---)\n([\s\S]*)$/);
|
|
if (!fmMatch) {
|
|
console.log(` SKIP (kein Frontmatter): ${path.basename(filePath)}`);
|
|
return false;
|
|
}
|
|
|
|
const frontmatter = fmMatch[1];
|
|
const body = fmMatch[2];
|
|
const lines = body.split("\n");
|
|
|
|
// Finde den Start des echten Contents (erster H1, H2 oder Difficulty-Label gefolgt von H1)
|
|
let contentStart = -1;
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i].trim();
|
|
// Erster H1 oder H2 der nicht zum Nav-Block gehoert
|
|
if ((line.startsWith("# ") || line.startsWith("## ")) && !isHeaderLine(line)) {
|
|
// Schaue ob eine Zeile davor ein Difficulty-Label steht
|
|
if (i > 0) {
|
|
const prev = lines[i - 1].trim();
|
|
if (/^(Einsteiger|Fortgeschritten|Experte)$/i.test(prev)) {
|
|
contentStart = i - 1; // Difficulty-Label mitnehmen
|
|
} else {
|
|
contentStart = i;
|
|
}
|
|
} else {
|
|
contentStart = i;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (contentStart === -1) {
|
|
console.log(` WARN (kein Content-Start gefunden): ${path.basename(filePath)}`);
|
|
return false;
|
|
}
|
|
|
|
// Finde das Ende des echten Contents (vor dem Footer-Block)
|
|
let contentEnd = lines.length;
|
|
for (let i = lines.length - 1; i >= contentStart; i--) {
|
|
if (isFooterLine(lines[i])) {
|
|
// Suche rueckwaerts bis zum letzten echten Content
|
|
contentEnd = i;
|
|
// Weiter zurueck: auch leere Zeilen und "Alle Guides" Links entfernen
|
|
while (contentEnd > contentStart && (
|
|
isJunkLine(lines[contentEnd - 1]) ||
|
|
isFooterLine(lines[contentEnd - 1]) ||
|
|
/^\[Alle Guides\]/.test(lines[contentEnd - 1].trim()) ||
|
|
/^\[Zur FAQ\]/.test(lines[contentEnd - 1].trim())
|
|
)) {
|
|
contentEnd--;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Content extrahieren
|
|
let contentLines = lines.slice(contentStart, contentEnd);
|
|
|
|
// Remaining junk lines am Anfang entfernen
|
|
while (contentLines.length > 0 && isJunkLine(contentLines[0])) {
|
|
contentLines.shift();
|
|
}
|
|
|
|
// "Weitere Guides" Section am Ende entfernen (wenn vorhanden)
|
|
const weitereIdx = contentLines.findIndex(l => /^##\s*Weitere Guides/.test(l.trim()));
|
|
if (weitereIdx !== -1) {
|
|
// Alles ab "Weitere Guides" entfernen
|
|
contentLines = contentLines.slice(0, weitereIdx);
|
|
// Trailing junk entfernen
|
|
while (contentLines.length > 0 && isJunkLine(contentLines[contentLines.length - 1])) {
|
|
contentLines.pop();
|
|
}
|
|
}
|
|
|
|
// Escaped \n in Leerzeilen umwandeln + doppelte Leerzeilen reduzieren
|
|
let cleanContent = contentLines
|
|
.map(line => line.replace(/^\\n$/, "").replace(/\\n/g, ""))
|
|
.join("\n")
|
|
.replace(/\n{3,}/g, "\n\n")
|
|
.trim();
|
|
|
|
// Difficulty-Labels als Markdown-Kommentar oder entfernen (wird per Badge dargestellt)
|
|
cleanContent = cleanContent.replace(/^(Einsteiger|Fortgeschritten|Experte)\n+/i, "");
|
|
|
|
const output = `${frontmatter}\n\n${cleanContent}\n`;
|
|
fs.writeFileSync(filePath, output, "utf-8");
|
|
return true;
|
|
};
|
|
|
|
// Main
|
|
console.log("=== Content Cleanup: Header/Footer-Artefakte entfernen ===\n");
|
|
|
|
console.log("Guides:");
|
|
const guideFiles = fs.readdirSync(GUIDES_DIR).filter(f => f.endsWith(".md"));
|
|
let cleaned = 0;
|
|
for (const file of guideFiles) {
|
|
const result = cleanGuide(path.join(GUIDES_DIR, file));
|
|
if (result) {
|
|
console.log(` OK: ${file}`);
|
|
cleaned++;
|
|
}
|
|
}
|
|
console.log(`\n${cleaned}/${guideFiles.length} Guides bereinigt.\n`);
|
|
|
|
console.log("Pages:");
|
|
const pageFiles = fs.readdirSync(PAGES_DIR).filter(f => f.endsWith(".md"));
|
|
let pCleaned = 0;
|
|
for (const file of pageFiles) {
|
|
const result = cleanGuide(path.join(PAGES_DIR, file));
|
|
if (result) {
|
|
console.log(` OK: ${file}`);
|
|
pCleaned++;
|
|
}
|
|
}
|
|
console.log(`\n${pCleaned}/${pageFiles.length} Pages bereinigt.`);
|