Files
aia-website/www/tools/audit-blog-content.mjs
T
DeCentN2Madness e4e6f15e9a fix: remove duplicated headers from blog post bodies
- remove extracted title, byline, date, avatar, and featured image preambles
- prevent both extraction scripts from recreating duplicated headers
- add blog content duplication auditing
- update migration documentation
2026-06-08 13:26:00 -07:00

32 lines
1.2 KiB
JavaScript

import fs from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import fg from 'fast-glob';
import matter from 'gray-matter';
const root = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
const failures = [];
const files = await fg('src/content/blog/**/*.{md,mdx}', { cwd: root, absolute: true });
for (const file of files) {
const { content, data } = matter(await fs.readFile(file, 'utf8'));
const body = content.trimStart();
if (body.startsWith('# ')) failures.push(`${path.relative(root, file)}: body begins with a duplicate article title`);
if (body.includes(data.featuredImage) && body.indexOf(data.featuredImage) < 500) {
failures.push(`${path.relative(root, file)}: body begins with a duplicate featured image`);
}
}
const report = [
'# Blog Content Audit',
'',
`Checked ${files.length} blog post bodies for layout content duplicated in Markdown.`,
'',
failures.length ? failures.map((failure) => `- ${failure}`).join('\n') : 'No duplicated article headers were detected.',
''
].join('\n');
await fs.writeFile(path.join(root, 'reports/blog-content-audit.md'), report);
console.log(`${failures.length} duplicated blog header elements.`);
if (failures.length) process.exitCode = 1;