fix: WordPress Header/Footer/Nav-Artefakte aus allen Guides entfernt
- Cleanup-Skript entfernt alte WP-Navigation (M0LZI_3D, [WISSEN], etc.) - Footer-Bloecke (Copyright, WHATSAPP, IMPRESSUM) aus 44 Guides + 15 Pages - Escaped Backslash-Artefakte bereinigt - Alle 44 Guides live verifiziert: 0 Artefakte Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
167
scripts/cleanup-content.mjs
Normal file
167
scripts/cleanup-content.mjs
Normal file
@@ -0,0 +1,167 @@
|
||||
/**
|
||||
* Entfernt WordPress Header/Footer/Navigation-Artefakte aus allen Markdown-Guides.
|
||||
* Behaelt nur den eigentlichen Content zwischen erstem H1/H2 und dem Footer.
|
||||
*/
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
const GUIDES_DIR = path.resolve("app/src/content/guides");
|
||||
const PAGES_DIR = path.resolve("app/src/content/pages");
|
||||
|
||||
// Patterns die den Header/Nav-Block identifizieren
|
||||
const HEADER_PATTERNS = [
|
||||
/M0LZI[_\\]3D/i,
|
||||
/\[SYS[_\\]BOOT/i,
|
||||
/\[WISSEN\]/,
|
||||
/\[RECHNER\]/,
|
||||
/\[TOOLS\]/,
|
||||
/\[MODELLE\]/,
|
||||
/\[NEWS\]/,
|
||||
/\[SHOWCASE\]/,
|
||||
/\[FAQ\]/,
|
||||
/JOIN[_\\]NETWORK/,
|
||||
/chat\.whatsapp\.com/,
|
||||
];
|
||||
|
||||
// Patterns die den Footer-Block identifizieren
|
||||
const FOOTER_PATTERNS = [
|
||||
/M0LZI[_\\]3D © \d{4}/i,
|
||||
/\[WHATSAPP\]/,
|
||||
/\[IMPRESSUM\]/,
|
||||
/\[DATENSCHUTZ\]/,
|
||||
];
|
||||
|
||||
const isHeaderLine = (line) =>
|
||||
HEADER_PATTERNS.some((p) => p.test(line));
|
||||
|
||||
const isFooterLine = (line) =>
|
||||
FOOTER_PATTERNS.some((p) => p.test(line));
|
||||
|
||||
const isJunkLine = (line) => {
|
||||
const trimmed = line.trim();
|
||||
// Escaped newlines, empty escaped lines
|
||||
if (trimmed === "\\n" || trimmed === "\\n\\n" || trimmed === "") return true;
|
||||
// Lines that are only whitespace/backslash-n combos
|
||||
if (/^[\\n\s]+$/.test(trimmed)) return true;
|
||||
return false;
|
||||
};
|
||||
|
||||
const cleanGuide = (filePath) => {
|
||||
const raw = fs.readFileSync(filePath, "utf-8");
|
||||
|
||||
// Frontmatter extrahieren
|
||||
const fmMatch = raw.match(/^(---\n[\s\S]*?\n---)\n([\s\S]*)$/);
|
||||
if (!fmMatch) {
|
||||
console.log(` SKIP (kein Frontmatter): ${path.basename(filePath)}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
const frontmatter = fmMatch[1];
|
||||
const body = fmMatch[2];
|
||||
const lines = body.split("\n");
|
||||
|
||||
// Finde den Start des echten Contents (erster H1, H2 oder Difficulty-Label gefolgt von H1)
|
||||
let contentStart = -1;
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i].trim();
|
||||
// Erster H1 oder H2 der nicht zum Nav-Block gehoert
|
||||
if ((line.startsWith("# ") || line.startsWith("## ")) && !isHeaderLine(line)) {
|
||||
// Schaue ob eine Zeile davor ein Difficulty-Label steht
|
||||
if (i > 0) {
|
||||
const prev = lines[i - 1].trim();
|
||||
if (/^(Einsteiger|Fortgeschritten|Experte)$/i.test(prev)) {
|
||||
contentStart = i - 1; // Difficulty-Label mitnehmen
|
||||
} else {
|
||||
contentStart = i;
|
||||
}
|
||||
} else {
|
||||
contentStart = i;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (contentStart === -1) {
|
||||
console.log(` WARN (kein Content-Start gefunden): ${path.basename(filePath)}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Finde das Ende des echten Contents (vor dem Footer-Block)
|
||||
let contentEnd = lines.length;
|
||||
for (let i = lines.length - 1; i >= contentStart; i--) {
|
||||
if (isFooterLine(lines[i])) {
|
||||
// Suche rueckwaerts bis zum letzten echten Content
|
||||
contentEnd = i;
|
||||
// Weiter zurueck: auch leere Zeilen und "Alle Guides" Links entfernen
|
||||
while (contentEnd > contentStart && (
|
||||
isJunkLine(lines[contentEnd - 1]) ||
|
||||
isFooterLine(lines[contentEnd - 1]) ||
|
||||
/^\[Alle Guides\]/.test(lines[contentEnd - 1].trim()) ||
|
||||
/^\[Zur FAQ\]/.test(lines[contentEnd - 1].trim())
|
||||
)) {
|
||||
contentEnd--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Content extrahieren
|
||||
let contentLines = lines.slice(contentStart, contentEnd);
|
||||
|
||||
// Remaining junk lines am Anfang entfernen
|
||||
while (contentLines.length > 0 && isJunkLine(contentLines[0])) {
|
||||
contentLines.shift();
|
||||
}
|
||||
|
||||
// "Weitere Guides" Section am Ende entfernen (wenn vorhanden)
|
||||
const weitereIdx = contentLines.findIndex(l => /^##\s*Weitere Guides/.test(l.trim()));
|
||||
if (weitereIdx !== -1) {
|
||||
// Alles ab "Weitere Guides" entfernen
|
||||
contentLines = contentLines.slice(0, weitereIdx);
|
||||
// Trailing junk entfernen
|
||||
while (contentLines.length > 0 && isJunkLine(contentLines[contentLines.length - 1])) {
|
||||
contentLines.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// Escaped \n in Leerzeilen umwandeln + doppelte Leerzeilen reduzieren
|
||||
let cleanContent = contentLines
|
||||
.map(line => line.replace(/^\\n$/, "").replace(/\\n/g, ""))
|
||||
.join("\n")
|
||||
.replace(/\n{3,}/g, "\n\n")
|
||||
.trim();
|
||||
|
||||
// Difficulty-Labels als Markdown-Kommentar oder entfernen (wird per Badge dargestellt)
|
||||
cleanContent = cleanContent.replace(/^(Einsteiger|Fortgeschritten|Experte)\n+/i, "");
|
||||
|
||||
const output = `${frontmatter}\n\n${cleanContent}\n`;
|
||||
fs.writeFileSync(filePath, output, "utf-8");
|
||||
return true;
|
||||
};
|
||||
|
||||
// Main
|
||||
console.log("=== Content Cleanup: Header/Footer-Artefakte entfernen ===\n");
|
||||
|
||||
console.log("Guides:");
|
||||
const guideFiles = fs.readdirSync(GUIDES_DIR).filter(f => f.endsWith(".md"));
|
||||
let cleaned = 0;
|
||||
for (const file of guideFiles) {
|
||||
const result = cleanGuide(path.join(GUIDES_DIR, file));
|
||||
if (result) {
|
||||
console.log(` OK: ${file}`);
|
||||
cleaned++;
|
||||
}
|
||||
}
|
||||
console.log(`\n${cleaned}/${guideFiles.length} Guides bereinigt.\n`);
|
||||
|
||||
console.log("Pages:");
|
||||
const pageFiles = fs.readdirSync(PAGES_DIR).filter(f => f.endsWith(".md"));
|
||||
let pCleaned = 0;
|
||||
for (const file of pageFiles) {
|
||||
const result = cleanGuide(path.join(PAGES_DIR, file));
|
||||
if (result) {
|
||||
console.log(` OK: ${file}`);
|
||||
pCleaned++;
|
||||
}
|
||||
}
|
||||
console.log(`\n${pCleaned}/${pageFiles.length} Pages bereinigt.`);
|
||||
Reference in New Issue
Block a user