Files
aia-website/www/tools/audit-links.mjs

111 lines
5.4 KiB
JavaScript

import fs from 'node:fs/promises';
import path from 'node:path';
import fg from 'fast-glob';
import matter from 'gray-matter';
const root = path.resolve(new URL('..', import.meta.url).pathname);
const dist = path.join(root, 'dist');
const normalizePath = (value) => {
const decoded = decodeURIComponent(value).replace(/\/index(?:\.html)?$/, '/').replace(/\.html$/, '');
return decoded !== '/' ? decoded.replace(/\/$/, '') : decoded;
};
const contentRoute = (file, data) => {
if (!data.slug || data.draft) return null;
const languagePrefix = data.lang === 'en' ? '' : `/${data.lang}`;
if (file.includes('/blog/')) return `${languagePrefix}/library/${data.slug}`;
if (data.slug === 'index') return languagePrefix || '/';
if (data.slug === 'library') return `${languagePrefix}/library`;
return `${languagePrefix}/${data.slug}`;
};
const resolveInternalReference = (reference, route) => {
const cleaned = reference.trim().replace(/^<|>$/g, '');
if (!cleaned || cleaned.startsWith('#') || /^[a-z][a-z\d+.-]*:/i.test(cleaned) || cleaned.startsWith('//')) return null;
try {
const base = new URL(route, 'https://audit.local');
const resolved = new URL(cleaned, base);
return normalizePath(resolved.pathname);
} catch {
return cleaned;
}
};
async function walk(dir) {
const output = [];
for (const entry of await fs.readdir(dir, { withFileTypes: true })) {
const target = path.join(dir, entry.name);
if (entry.isDirectory()) output.push(...await walk(target));
else if (entry.name.endsWith('.html')) output.push(path.relative(dist, target));
}
return output;
}
const distExists = await fs.access(dist).then(() => true).catch(() => false);
if (!distExists) {
const routes = new Set(['/', '/library', '/es', '/es/library', '/ar', '/ar/library']);
const contentFiles = await fg('src/content/**/*.{md,mdx}', { cwd: root, absolute: true });
const contentRecords = [];
for (const file of contentFiles) {
const parsed = matter(await fs.readFile(file, 'utf8'));
const route = contentRoute(file, parsed.data);
if (!route) continue;
routes.add(normalizePath(route));
contentRecords.push({ file, route: normalizePath(route), text: parsed.content });
}
const publicFiles = new Set((await fg('public/**/*', { cwd: root, onlyFiles: true })).map((file) => `/${file.replace(/^public\//, '')}`));
const broken = [];
for (const { file, route, text } of contentRecords) {
if (file.includes('/pages/') && ['/library', '/es/library', '/ar/library', '/', '/es', '/ar'].includes(route)) continue;
const references = [...text.matchAll(/\]\(([^)\s]+)(?:\s+["'][^)]*)?\)/g)].map((match) => match[1]);
for (const rawReference of references) {
const reference = resolveInternalReference(rawReference, route);
if (!reference) continue;
if (reference.startsWith('/assets/')) {
if (!publicFiles.has(reference)) broken.push(`${path.relative(root, file)}: ${rawReference} -> ${reference}`);
} else if (!routes.has(reference)) {
broken.push(`${path.relative(root, file)}: ${rawReference} -> ${reference}`);
}
}
}
const sourceFiles = await fg('src/**/*.{astro,ts,js,json}', { cwd: root, absolute: true });
for (const file of sourceFiles) {
const text = await fs.readFile(file, 'utf8');
const references = [
...text.matchAll(/(?:href|src)=["'](\/[^"'#?{]*)/g),
...text.matchAll(/\]\((\/[^)#?]*)/g)
].map((match) => normalizePath(match[1]));
for (const reference of references) {
if (reference.startsWith('/assets/') || reference === '/robots.txt' || reference === '/sitemap.xml') {
if (!publicFiles.has(reference)) broken.push(`${path.relative(root, file)}: ${reference}`);
} else if (!routes.has(reference)) {
broken.push(`${path.relative(root, file)}: ${reference}`);
}
}
}
const unique = [...new Set(broken)].sort();
const report = unique.length
? `# Broken Links\n\nRendered output is unavailable in the sandbox; source routes and public assets were audited.\n\n${unique.map((item) => `- ${item}`).join('\n')}\n`
: '# Broken Links\n\nRendered output is unavailable in the sandbox; source routes and public assets were audited. No broken internal source links were detected.\n';
await fs.writeFile(path.join(root, 'reports/broken-links.md'), report);
console.log(`${unique.length} broken internal source links.`);
if (unique.length) process.exitCode = 1;
process.exit();
}
const files = await walk(dist);
const broken = [];
for (const file of files) {
const html = await fs.readFile(path.join(dist, file), 'utf8');
for (const match of html.matchAll(/(?:href|src)="(\/[^"#?]*)/g)) {
const url = match[1];
if (url.startsWith('/assets/')) {
if (!await fs.access(path.join(dist, url)).then(() => true).catch(() => false)) broken.push(`${file}: ${url}`);
continue;
}
const candidates = [path.join(dist, url, 'index.html'), path.join(dist, `${url}.html`), path.join(dist, url)];
if (!await Promise.any(candidates.map((candidate) => fs.access(candidate))).then(() => true).catch(() => false)) broken.push(`${file}: ${url}`);
}
}
const report = broken.length ? `# Broken Links\n\n${broken.map((item) => `- ${item}`).join('\n')}\n` : '# Broken Links\n\nNo broken internal build links detected.\n';
await fs.writeFile(path.join(root, 'reports/broken-links.md'), report);
console.log(`${broken.length} broken internal links.`);
if (broken.length) process.exitCode = 1;