From 7894e061108ad816c159b460b11651db504f3290 Mon Sep 17 00:00:00 2001 From: Janpot <2109932+Janpot@users.noreply.github.com> Date: Wed, 22 Oct 2025 14:08:04 +0200 Subject: [PATCH 01/35] WIP --- docs/package.json | 4 +- docs/scripts/postBuild.mts | 323 +++++++++++++++++++++++++++++++++++++ pnpm-lock.yaml | 11 ++ 3 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 docs/scripts/postBuild.mts diff --git a/docs/package.json b/docs/package.json index e2eacde57b2589..6abf7c746aa848 100644 --- a/docs/package.json +++ b/docs/package.json @@ -14,7 +14,8 @@ "typescript": "tsc -p tsconfig.json && tsc -p scripts/tsconfig.json", "typescript:transpile": "echo 'Use `pnpm docs:typescript:formatted'` instead && exit 1", "typescript:transpile:dev": "echo 'Use `pnpm docs:typescript'` instead && exit 1", - "link-check": "tsx ./scripts/reportBrokenLinks.ts" + "link-check": "tsx ./scripts/reportBrokenLinks.ts", + "crawl": "tsx ./scripts/postBuild.mts" }, "dependencies": { "@babel/core": "^7.28.4", @@ -132,6 +133,7 @@ "cross-fetch": "^4.1.0", "gm": "^1.25.1", "marked": "^16.2.0", + "node-html-parser": "^7.0.1", "prettier": "^3.6.2", "tailwindcss": "^4.1.14", "yargs": "^18.0.0" diff --git a/docs/scripts/postBuild.mts b/docs/scripts/postBuild.mts new file mode 100644 index 00000000000000..1c984a0fc64487 --- /dev/null +++ b/docs/scripts/postBuild.mts @@ -0,0 +1,323 @@ +/* eslint-disable no-console */ +import { execa } from 'execa'; +import timers from 'timers/promises'; +import { parse, HTMLElement } from 'node-html-parser'; +import * as path from 'path'; +import fs from 'fs/promises'; + +// Target paths to ignore during link checking +const IGNORED_PATHS = [ + // Internal links not on this server + // TODO: Seed crawler with stored links from e.g. mui.com/x/link-structure.json + /^\/(x|base-ui|joy-ui|store)(\/|$)/, +]; + +// CSS selectors for content to ignore during link checking +const IGNORED_CONTENT: string[] = [ + // Links used in demos under MemoryRouter + // TODO: Create an easier way to identify content under MemoryRouter + // (e.g. a class or an option on the demo) + '[id^="demo-"] a[href^="/inbox"]', + '[id^="demo-"] a[href^="/trash"]', + '[id^="demo-"] a[href^="/spam"]', + '[id^="demo-"] a[href^="/drafts"]', +]; + +const IGNORED_IDS = new Set(['__next', '__NEXT_DATA__']); + +const PORT = 3001; + +async function pollUrl(url: string, timeout: number): Promise { + const start = Date.now(); + while (true) { + try { + // eslint-disable-next-line no-await-in-loop + const res = await fetch(url); + if (!res.ok) { + throw new Error(`Failed to fetch ${url}: [${res.status}] ${res.statusText}`); + } + return; + } catch (error: any) { + if (Date.now() - start > timeout) { + throw new Error(`Timeout waiting for ${url}: ${error.message}`); + } + // eslint-disable-next-line no-await-in-loop + await timers.setTimeout(1000); + } + } +} + +async function writePagesToFile(pages: Map) { + const outPath = path.resolve(import.meta.dirname, '../public/material-ui/link-structure.json'); + const data = { + pages: Object.fromEntries( + Array.from(pages.entries(), ([url, pageData]) => [ + url, + { + status: pageData.status, + targets: Object.fromEntries(pageData.targets), + }, + ]), + ), + }; + await fs.writeFile(outPath, JSON.stringify(data, null, 2), 'utf-8'); +} + +// Polyfill for `node.computedName` available only in chrome v112+ +function getAccessibleName(elm: HTMLElement | null, ownerDocument: HTMLElement): string { + if (!elm) { + return ''; + } + + // 1. aria-label + const ariaLabel = elm.getAttribute('aria-label')?.trim(); + if (ariaLabel) { + return ariaLabel; + } + + // 2. aria-labelledby + const labelledby = elm.getAttribute('aria-labelledby'); + if (labelledby) { + const labels = []; + for (const id of labelledby.split(/\s+/)) { + const label = getAccessibleName(ownerDocument.getElementById(id), ownerDocument); + if (label) { + labels.push(label); + } + } + const label = labels.join(' ').trim(); + if (label) { + return label; + } + } + + // 3.