fix: remove duplicated headers from blog post bodies

- remove extracted title, byline, date, avatar, and featured image preambles
- prevent both extraction scripts from recreating duplicated headers
- add blog content duplication auditing
- update migration documentation
This commit is contained in:
2026-06-08 13:26:00 -07:00
parent bfed3bee15
commit e4e6f15e9a
73 changed files with 66 additions and 591 deletions
+2 -1
View File
@@ -11,6 +11,7 @@ const site = 'https://www.azinstitute4autism.com';
const mkdir = (value) => fs.mkdir(value, { recursive: true });
const quote = (value = '') => JSON.stringify(String(value).replace(/\s+/g, ' ').trim());
const csv = (value = '') => `"${String(value).replaceAll('"', '""')}"`;
const blogPreamble = /^# .+\n\n!\[[^\]]*\]\(\/assets\/images\/rula-diab-avatar\.jpg\)\n\n[^\n]+\n\n[^\n]+\n\n!\[[^\]]*\]\([^)]+\)\n\n/;
async function walk(dir, prefix = '') {
const output = [];
@@ -129,7 +130,7 @@ function frontmatter(item, blog) {
'draft: false',
'---',
'',
item.markdown || `# ${item.h1}`,
(blog ? item.markdown.replace(blogPreamble, '') : item.markdown) || `# ${item.h1}`,
''
].filter((line) => line !== '');
return `${lines.join('\n')}\n`;