From 0c2799d82b5399f87d6da4868d9c6018cf14aa6e Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Wed, 29 Oct 2025 10:02:12 -0400 Subject: [PATCH 01/33] feat(caching) try with 2 workers on enhanced build --- scripts/generate-md-exports.mjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 229a700691e29..364906f4940ca 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -6,7 +6,7 @@ import {selectAll} from 'hast-util-select'; import {createHash} from 'node:crypto'; import {createReadStream, createWriteStream, existsSync} from 'node:fs'; import {mkdir, opendir, readFile, rm, writeFile} from 'node:fs/promises'; -import {cpus} from 'node:os'; +// import {cpus} from 'node:os'; import * as path from 'node:path'; import {compose, Readable} from 'node:stream'; import {text} from 'node:stream/consumers'; @@ -96,7 +96,7 @@ async function createWork() { } // On a 16-core machine, 8 workers were optimal (and slightly faster than 16) - const numWorkers = Math.max(Math.floor(cpus().length / 2), 2); + const numWorkers = 2; const workerTasks = new Array(numWorkers).fill(null).map(() => []); let existingFilesOnR2 = null; From 79ac9930e56f97a0594a76d0e70bc75f8dd3c1a3 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Wed, 29 Oct 2025 12:37:19 -0400 Subject: [PATCH 02/33] testing with more workers --- scripts/generate-md-exports.mjs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 364906f4940ca..e9c0afb95aeb1 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -6,7 +6,7 @@ import {selectAll} from 'hast-util-select'; import {createHash} from 'node:crypto'; import {createReadStream, createWriteStream, existsSync} from 'node:fs'; import {mkdir, opendir, readFile, rm, writeFile} from 'node:fs/promises'; -// import {cpus} from 'node:os'; +import {cpus} from 'node:os'; import * as path from 'node:path'; import {compose, Readable} from 'node:stream'; import {text} from 'node:stream/consumers'; @@ -95,8 +95,8 @@ async function createWork() { await mkdir(CACHE_DIR, {recursive: true}); } - // On a 16-core machine, 8 workers were optimal (and slightly faster than 16) - const numWorkers = 2; + // We are currently on Enhanced Builds in Vercel which gives us 8 cores, here we'll use half. + const numWorkers = Math.max(Math.floor(cpus().length / 2), 2); const workerTasks = new Array(numWorkers).fill(null).map(() => []); let existingFilesOnR2 = null; From 6bc7aaea9fd98b237075141b98d441f4d843956a Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Wed, 29 Oct 2025 15:03:10 -0400 Subject: [PATCH 03/33] 75% workers --- scripts/generate-md-exports.mjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index e9c0afb95aeb1..6c2852c906fcb 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -95,8 +95,8 @@ async function createWork() { await mkdir(CACHE_DIR, {recursive: true}); } - // We are currently on Enhanced Builds in Vercel which gives us 8 cores, here we'll use half. - const numWorkers = Math.max(Math.floor(cpus().length / 2), 2); + // We are currently on Enhanced Builds in Vercel which gives us 8 cores, lets try 75% + const numWorkers = Math.max(Math.floor(cpus().length * 0.75), 2); const workerTasks = new Array(numWorkers).fill(null).map(() => []); let existingFilesOnR2 = null; From 52dee11053fd93949fdffcfc34663087e7593c7b Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Wed, 29 Oct 2025 17:21:57 -0400 Subject: [PATCH 04/33] test 1 worker --- scripts/generate-md-exports.mjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 6c2852c906fcb..025ca8f515544 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -95,8 +95,8 @@ async function createWork() { await mkdir(CACHE_DIR, {recursive: true}); } - // We are currently on Enhanced Builds in Vercel which gives us 8 cores, lets try 75% - const numWorkers = Math.max(Math.floor(cpus().length * 0.75), 2); + // idk just testing with 1 worker for now + const numWorkers = 1; const workerTasks = new Array(numWorkers).fill(null).map(() => []); let existingFilesOnR2 = null; From 865b223a0db3d9a58718eb72e53650eb68b3c1f6 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Wed, 29 Oct 2025 17:34:33 -0400 Subject: [PATCH 05/33] caching release registry --- src/mdx.ts | 87 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 77 insertions(+), 10 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index 459caea86d37f..a81dfae99810e 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -68,6 +68,59 @@ if (process.env.CI) { const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex'); +// Worker-level registry cache to avoid fetching multiple times per worker +let cachedRegistryHash: Promise | null = null; + +/** + * Fetch registry data and compute its hash, with retry logic and exponential backoff. + * Retries up to maxRetries times with exponential backoff starting at initialDelayMs. + */ +async function getRegistryHashWithRetry( + maxRetries = 3, + initialDelayMs = 1000 +): Promise { + let lastError: Error | null = null; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const [apps, packages] = await Promise.all([ + getAppRegistry(), + getPackageRegistry(), + ]); + return md5(JSON.stringify({apps, packages})); + } catch (err) { + lastError = err as Error; + + if (attempt < maxRetries) { + const delay = initialDelayMs * Math.pow(2, attempt); + // eslint-disable-next-line no-console + console.warn( + `Failed to fetch registry (attempt ${attempt + 1}/${maxRetries + 1}). Retrying in ${delay}ms...`, + err + ); + await new Promise(resolve => setTimeout(resolve, delay)); + } + } + } + + throw lastError || new Error('Failed to fetch registry after all retries'); +} + +/** + * Get the registry hash, using cached value if available. + * This ensures we only fetch the registry once per worker process. + */ +function getRegistryHash(): Promise { + if (!cachedRegistryHash) { + cachedRegistryHash = getRegistryHashWithRetry().catch(err => { + // Reset cache on error so next call will retry + cachedRegistryHash = null; + throw err; + }); + } + return cachedRegistryHash; +} + async function readCacheFile(file: string): Promise { const reader = createReadStream(file); const decompressor = createBrotliDecompress(); @@ -541,23 +594,37 @@ export async function getFileBySlug(slug: string): Promise { // continue anyway - images should already exist from build time } - // If the file contains content that depends on the Release Registry (such as an SDK's latest version), avoid using the cache for that file, i.e. always rebuild it. - // This is because the content from the registry might have changed since the last time the file was cached. - // If a new component that injects content from the registry is introduced, it should be added to the patterns below. - const skipCache = + // Detect if file contains content that depends on the Release Registry + // If it does, we include the registry hash in the cache key so the cache + // is invalidated when the registry changes. + const dependsOnRegistry = source.includes('@inject') || source.includes(' Date: Wed, 29 Oct 2025 18:20:14 -0400 Subject: [PATCH 06/33] bring back 75% cpu --- scripts/generate-md-exports.mjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 025ca8f515544..6c2852c906fcb 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -95,8 +95,8 @@ async function createWork() { await mkdir(CACHE_DIR, {recursive: true}); } - // idk just testing with 1 worker for now - const numWorkers = 1; + // We are currently on Enhanced Builds in Vercel which gives us 8 cores, lets try 75% + const numWorkers = Math.max(Math.floor(cpus().length * 0.75), 2); const workerTasks = new Array(numWorkers).fill(null).map(() => []); let existingFilesOnR2 = null; From e968968d373251455a9c3a56a0feddd54331aafd Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Wed, 29 Oct 2025 18:33:19 -0400 Subject: [PATCH 07/33] back to half --- scripts/generate-md-exports.mjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 6c2852c906fcb..229a700691e29 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -95,8 +95,8 @@ async function createWork() { await mkdir(CACHE_DIR, {recursive: true}); } - // We are currently on Enhanced Builds in Vercel which gives us 8 cores, lets try 75% - const numWorkers = Math.max(Math.floor(cpus().length * 0.75), 2); + // On a 16-core machine, 8 workers were optimal (and slightly faster than 16) + const numWorkers = Math.max(Math.floor(cpus().length / 2), 2); const workerTasks = new Array(numWorkers).fill(null).map(() => []); let existingFilesOnR2 = null; From 98d867ca2c31aa96ac3caf693ce51666e3c576a2 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Wed, 29 Oct 2025 18:35:31 -0400 Subject: [PATCH 08/33] registry cache logs --- src/mdx.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mdx.ts b/src/mdx.ts index a81dfae99810e..c2cc571279cdf 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -112,6 +112,8 @@ async function getRegistryHashWithRetry( */ function getRegistryHash(): Promise { if (!cachedRegistryHash) { + // eslint-disable-next-line no-console + console.info('Fetching registry hash for the first time in this worker'); cachedRegistryHash = getRegistryHashWithRetry().catch(err => { // Reset cache on error so next call will retry cachedRegistryHash = null; @@ -611,6 +613,10 @@ export async function getFileBySlug(slug: string): Promise { try { const registryHash = await getRegistryHash(); cacheKey = `${sourceHash}-${registryHash}`; + // eslint-disable-next-line no-console + console.info( + `Using registry-aware cache for ${sourcePath} (registry hash: ${registryHash.slice(0, 8)}...)` + ); } catch (err) { // If we can't get registry hash, skip cache for this file // eslint-disable-next-line no-console From c163037d576b045958eecbb2f76f89424088ed20 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Wed, 29 Oct 2025 19:02:39 -0400 Subject: [PATCH 09/33] lets test file changes --- docs/cli/installation.mdx | 2 +- docs/product/index.mdx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/cli/installation.mdx b/docs/cli/installation.mdx index acc96f17aabb1..ce19e7afcb084 100644 --- a/docs/cli/installation.mdx +++ b/docs/cli/installation.mdx @@ -1,5 +1,5 @@ --- -title: Installation +title: Installation test sidebar_order: 0 description: "Learn about the different methods available to install `sentry-cli`." --- diff --git a/docs/product/index.mdx b/docs/product/index.mdx index 3dced685a84ac..720b7440d0a4c 100644 --- a/docs/product/index.mdx +++ b/docs/product/index.mdx @@ -1,5 +1,5 @@ --- -title: Product Walkthroughs +title: Product Walkthroughs -test sidebar_order: 30 description: "Sentry can be used to not just observe, but debug errors as well as identify performance issues. Check out our product walkthroughs to see it in action." --- @@ -50,7 +50,7 @@ Our [**AI Agents Monitoring**](/product/insights/ai/agents/) feature gives you i ### Uptime Monitoring -Sentry's [**Uptime Monitoring**](/product/uptime-monitoring/) helps you maintain uptime for your web services by monitoring relevant URLs. It continuously tracks configured URLs, delivering alerts and insights to quickly identify downtime and troubleshoot issues. By leveraging [distributed tracing](/product/uptime-monitoring/uptime-tracing/), Sentry enables you to pinpoint any errors that occur during an uptime check, simplifying triage and accelerating root cause analysis. Uptime monitoring includes [uptime request spans](/product/uptime-monitoring/#uptime-request-spans) by default. These act as the root of any uptime issue's trace, giving you better context for faster debugging. +Sentry's [**Uptime Monitoring**](/product/uptime-monitoring/) helps you maintain uptime for your web services by monitoring relevant URLs. It continuously tracks configured URLs, delivering alerts and insights to quickly identify downtime and troubleshoot issues. By leveraging [distributed tracing](/product/uptime-monitoring/uptime-tracing/), Sentry enables you to pinpoint any errors that occur during an uptime check, simplifying triage and accelerating root cause analysis. Uptime monitoring includes [uptime request spans](/product/uptime-monitoring/#uptime-request-spans) by default. These act as the root of any uptime issue's trace, giving you better context for faster debugging. ### Recurring Job Monitoring From 4f152754a74ec719c6cab8ba94ba55b1feafe0ee Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Thu, 30 Oct 2025 10:27:34 -0400 Subject: [PATCH 10/33] revert content changes --- docs/cli/installation.mdx | 2 +- docs/product/index.mdx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/cli/installation.mdx b/docs/cli/installation.mdx index ce19e7afcb084..acc96f17aabb1 100644 --- a/docs/cli/installation.mdx +++ b/docs/cli/installation.mdx @@ -1,5 +1,5 @@ --- -title: Installation test +title: Installation sidebar_order: 0 description: "Learn about the different methods available to install `sentry-cli`." --- diff --git a/docs/product/index.mdx b/docs/product/index.mdx index 720b7440d0a4c..3dced685a84ac 100644 --- a/docs/product/index.mdx +++ b/docs/product/index.mdx @@ -1,5 +1,5 @@ --- -title: Product Walkthroughs -test +title: Product Walkthroughs sidebar_order: 30 description: "Sentry can be used to not just observe, but debug errors as well as identify performance issues. Check out our product walkthroughs to see it in action." --- @@ -50,7 +50,7 @@ Our [**AI Agents Monitoring**](/product/insights/ai/agents/) feature gives you i ### Uptime Monitoring -Sentry's [**Uptime Monitoring**](/product/uptime-monitoring/) helps you maintain uptime for your web services by monitoring relevant URLs. It continuously tracks configured URLs, delivering alerts and insights to quickly identify downtime and troubleshoot issues. By leveraging [distributed tracing](/product/uptime-monitoring/uptime-tracing/), Sentry enables you to pinpoint any errors that occur during an uptime check, simplifying triage and accelerating root cause analysis. Uptime monitoring includes [uptime request spans](/product/uptime-monitoring/#uptime-request-spans) by default. These act as the root of any uptime issue's trace, giving you better context for faster debugging. +Sentry's [**Uptime Monitoring**](/product/uptime-monitoring/) helps you maintain uptime for your web services by monitoring relevant URLs. It continuously tracks configured URLs, delivering alerts and insights to quickly identify downtime and troubleshoot issues. By leveraging [distributed tracing](/product/uptime-monitoring/uptime-tracing/), Sentry enables you to pinpoint any errors that occur during an uptime check, simplifying triage and accelerating root cause analysis. Uptime monitoring includes [uptime request spans](/product/uptime-monitoring/#uptime-request-spans) by default. These act as the root of any uptime issue's trace, giving you better context for faster debugging. ### Recurring Job Monitoring From 5e97f5a30d4194b55d73308b5f3996bf5e3893b7 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Thu, 30 Oct 2025 11:37:27 -0400 Subject: [PATCH 11/33] cleanup old cache files --- scripts/generate-md-exports.mjs | 32 ++++++++++++++++++++++++++++ src/mdx.ts | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 229a700691e29..9011d171a6429 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -93,6 +93,38 @@ async function createWork() { if (noCache) { console.log(`โ„น๏ธ No cache directory found, this will take a while...`); await mkdir(CACHE_DIR, {recursive: true}); + } else { + // Clean up old cache files to prevent unbounded growth + // Keep files accessed within last 7 days only + const MAX_CACHE_AGE_MS = 7 * 24 * 60 * 60 * 1000; // 7 days + const now = Date.now(); + let cleanedCount = 0; + + try { + const {readdir, stat, rm} = await import('node:fs/promises'); + const files = await readdir(CACHE_DIR); + + for (const file of files) { + const filePath = path.join(CACHE_DIR, file); + try { + const stats = await stat(filePath); + const age = now - stats.atimeMs; // Time since last access + + if (age > MAX_CACHE_AGE_MS) { + await rm(filePath, {force: true}); + cleanedCount++; + } + } catch (err) { + // Skip files we can't stat/delete + } + } + + if (cleanedCount > 0) { + console.log(`๐Ÿงน Cleaned up ${cleanedCount} old cache files (>7 days)`); + } + } catch (err) { + console.warn('Failed to clean cache:', err); + } } // On a 16-core machine, 8 workers were optimal (and slightly faster than 16) diff --git a/src/mdx.ts b/src/mdx.ts index c2cc571279cdf..77b2a3ea5c47b 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -64,6 +64,43 @@ const CACHE_COMPRESS_LEVEL = 4; const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler'); if (process.env.CI) { mkdirSync(CACHE_DIR, {recursive: true}); + + // Clean up old cache files in background to prevent unbounded growth + // This runs once per worker process and doesn't block the build + (async () => { + try { + const MAX_CACHE_AGE_MS = 7 * 24 * 60 * 60 * 1000; // 7 days + const now = Date.now(); + let cleanedCount = 0; + + const {opendir, rm, stat} = await import('node:fs/promises'); + const dir = await opendir(CACHE_DIR); + + for await (const dirent of dir) { + if (!dirent.isFile() && !dirent.isDirectory()) continue; + + const itemPath = path.join(CACHE_DIR, dirent.name); + try { + const stats = await stat(itemPath); + const age = now - stats.atimeMs; // Time since last access + + if (age > MAX_CACHE_AGE_MS) { + await rm(itemPath, {recursive: true, force: true}); + cleanedCount++; + } + } catch (err) { + // Skip items we can't stat/delete + } + } + + if (cleanedCount > 0) { + // eslint-disable-next-line no-console + console.log(`๐Ÿงน MDX cache: Cleaned up ${cleanedCount} old items (>7 days)`); + } + } catch (err) { + // Silently fail - cache cleanup is not critical + } + })(); } const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex'); From 4011531825395caf2a4fa3d47c4630ac70e1fa18 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Thu, 30 Oct 2025 11:41:52 -0400 Subject: [PATCH 12/33] lint --- scripts/generate-md-exports.mjs | 3 +-- src/mdx.ts | 7 ++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 9011d171a6429..9e6c72f15c60a 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -5,7 +5,7 @@ import imgLinks from '@pondorasti/remark-img-links'; import {selectAll} from 'hast-util-select'; import {createHash} from 'node:crypto'; import {createReadStream, createWriteStream, existsSync} from 'node:fs'; -import {mkdir, opendir, readFile, rm, writeFile} from 'node:fs/promises'; +import {mkdir, opendir, readdir, readFile, rm, stat, writeFile} from 'node:fs/promises'; import {cpus} from 'node:os'; import * as path from 'node:path'; import {compose, Readable} from 'node:stream'; @@ -101,7 +101,6 @@ async function createWork() { let cleanedCount = 0; try { - const {readdir, stat, rm} = await import('node:fs/promises'); const files = await readdir(CACHE_DIR); for (const file of files) { diff --git a/src/mdx.ts b/src/mdx.ts index 77b2a3ea5c47b..3c5c60632539e 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -4,7 +4,7 @@ import yaml from 'js-yaml'; import {bundleMDX} from 'mdx-bundler'; import {BinaryLike, createHash} from 'node:crypto'; import {createReadStream, createWriteStream, mkdirSync} from 'node:fs'; -import {access, cp, mkdir, opendir, readFile} from 'node:fs/promises'; +import {access, cp, mkdir, opendir, readFile, rm, stat} from 'node:fs/promises'; import path from 'node:path'; // @ts-expect-error ts(2305) -- For some reason "compose" is not recognized in the types import {compose, Readable} from 'node:stream'; @@ -73,11 +73,12 @@ if (process.env.CI) { const now = Date.now(); let cleanedCount = 0; - const {opendir, rm, stat} = await import('node:fs/promises'); const dir = await opendir(CACHE_DIR); for await (const dirent of dir) { - if (!dirent.isFile() && !dirent.isDirectory()) continue; + if (!dirent.isFile() && !dirent.isDirectory()) { + continue; + } const itemPath = path.join(CACHE_DIR, dirent.name); try { From 4cc99d15e5498b7868739b316923257f91f60ec6 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Thu, 30 Oct 2025 12:10:44 -0400 Subject: [PATCH 13/33] delete if not used --- scripts/generate-md-exports.mjs | 79 ++++++++++++++++++--------------- src/mdx.ts | 5 ++- 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 9e6c72f15c60a..e99bfccc1b093 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -5,7 +5,7 @@ import imgLinks from '@pondorasti/remark-img-links'; import {selectAll} from 'hast-util-select'; import {createHash} from 'node:crypto'; import {createReadStream, createWriteStream, existsSync} from 'node:fs'; -import {mkdir, opendir, readdir, readFile, rm, stat, writeFile} from 'node:fs/promises'; +import {mkdir, opendir, readdir, readFile, rm, writeFile} from 'node:fs/promises'; import {cpus} from 'node:os'; import * as path from 'node:path'; import {compose, Readable} from 'node:stream'; @@ -58,7 +58,12 @@ async function uploadToCFR2(s3Client, relativePath, data) { return; } -function taskFinishHandler({id, success, failedTasks}) { +function taskFinishHandler({id, success, failedTasks, usedCacheFiles}, allUsedCacheFiles) { + // Collect cache files used by this worker + if (usedCacheFiles) { + usedCacheFiles.forEach(file => allUsedCacheFiles.add(file)); + } + if (failedTasks.length === 0) { console.log(`โœ… Worker[${id}]: converted ${success} files successfully.`); return false; @@ -93,39 +98,11 @@ async function createWork() { if (noCache) { console.log(`โ„น๏ธ No cache directory found, this will take a while...`); await mkdir(CACHE_DIR, {recursive: true}); - } else { - // Clean up old cache files to prevent unbounded growth - // Keep files accessed within last 7 days only - const MAX_CACHE_AGE_MS = 7 * 24 * 60 * 60 * 1000; // 7 days - const now = Date.now(); - let cleanedCount = 0; - - try { - const files = await readdir(CACHE_DIR); - - for (const file of files) { - const filePath = path.join(CACHE_DIR, file); - try { - const stats = await stat(filePath); - const age = now - stats.atimeMs; // Time since last access - - if (age > MAX_CACHE_AGE_MS) { - await rm(filePath, {force: true}); - cleanedCount++; - } - } catch (err) { - // Skip files we can't stat/delete - } - } - - if (cleanedCount > 0) { - console.log(`๐Ÿงน Cleaned up ${cleanedCount} old cache files (>7 days)`); - } - } catch (err) { - console.warn('Failed to clean cache:', err); - } } + // Track which cache files are used during this build + const usedCacheFiles = new Set(); + // On a 16-core machine, 8 workers were optimal (and slightly faster than 16) const numWorkers = Math.max(Math.floor(cpus().length / 2), 2); const workerTasks = new Array(numWorkers).fill(null).map(() => []); @@ -194,7 +171,7 @@ async function createWork() { }, }); let hasErrors = false; - worker.on('message', data => (hasErrors = taskFinishHandler(data))); + worker.on('message', data => (hasErrors = taskFinishHandler(data, usedCacheFiles))); worker.on('error', reject); worker.on('exit', code => { if (code !== 0) { @@ -206,14 +183,16 @@ async function createWork() { }); }); // The main thread can also process tasks -- That's 65% more bullet per bullet! -Cave Johnson + const mainThreadUsedFiles = new Set(); workerPromises.push( processTaskList({ id: workerTasks.length - 1, tasks: workerTasks[workerTasks.length - 1], cacheDir: CACHE_DIR, noCache, + usedCacheFiles: mainThreadUsedFiles, }).then(data => { - if (taskFinishHandler(data)) { + if (taskFinishHandler(data, usedCacheFiles)) { throw new Error(`Worker[${data.id}] had some errors.`); } }) @@ -221,13 +200,34 @@ async function createWork() { await Promise.all(workerPromises); + // Clean up unused cache files to prevent unbounded growth + if (!noCache) { + try { + const allFiles = await readdir(CACHE_DIR); + let cleanedCount = 0; + + for (const file of allFiles) { + if (!usedCacheFiles.has(file)) { + await rm(path.join(CACHE_DIR, file), {force: true}); + cleanedCount++; + } + } + + if (cleanedCount > 0) { + console.log(`๐Ÿงน Cleaned up ${cleanedCount} unused cache files`); + } + } catch (err) { + console.warn('Failed to clean unused cache files:', err); + } + } + console.log(`๐Ÿ“„ Generated ${numFiles} markdown files from HTML.`); console.log('โœ… Markdown export generation complete!'); } const md5 = data => createHash('md5').update(data).digest('hex'); -async function genMDFromHTML(source, target, {cacheDir, noCache}) { +async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles}) { const leanHTML = (await readFile(source, {encoding: 'utf8'})) // Remove all script tags, as they are not needed in markdown // and they are not stable across builds, causing cache misses @@ -241,6 +241,9 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { ); await writeFile(target, data, {encoding: 'utf8'}); + // Track that we used this cache file + usedCacheFiles.add(cacheKey); + return {cacheHit: true, data}; } catch (err) { if (err.code !== 'ENOENT') { @@ -338,7 +341,7 @@ async function genMDFromHTML(source, target, {cacheDir, noCache}) { return {cacheHit: false, data}; } -async function processTaskList({id, tasks, cacheDir, noCache}) { +async function processTaskList({id, tasks, cacheDir, noCache, usedCacheFiles}) { const s3Client = getS3Client(); const failedTasks = []; let cacheMisses = []; @@ -349,6 +352,7 @@ async function processTaskList({id, tasks, cacheDir, noCache}) { const {data, cacheHit} = await genMDFromHTML(sourcePath, targetPath, { cacheDir, noCache, + usedCacheFiles, }); if (!cacheHit) { cacheMisses.push(relativePath); @@ -388,6 +392,7 @@ async function processTaskList({id, tasks, cacheDir, noCache}) { id, success, failedTasks, + usedCacheFiles: Array.from(usedCacheFiles), }; } diff --git a/src/mdx.ts b/src/mdx.ts index 3c5c60632539e..20db70cbdfbf6 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -66,10 +66,11 @@ if (process.env.CI) { mkdirSync(CACHE_DIR, {recursive: true}); // Clean up old cache files in background to prevent unbounded growth + // Delete any file not accessed in the last 24 hours (meaning it wasn't used in recent builds) // This runs once per worker process and doesn't block the build (async () => { try { - const MAX_CACHE_AGE_MS = 7 * 24 * 60 * 60 * 1000; // 7 days + const MAX_CACHE_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours const now = Date.now(); let cleanedCount = 0; @@ -96,7 +97,7 @@ if (process.env.CI) { if (cleanedCount > 0) { // eslint-disable-next-line no-console - console.log(`๐Ÿงน MDX cache: Cleaned up ${cleanedCount} old items (>7 days)`); + console.log(`๐Ÿงน MDX cache: Cleaned up ${cleanedCount} unused items (>24h)`); } } catch (err) { // Silently fail - cache cleanup is not critical From 95385be4cc3fa77ec720072e4f07ae77d0b9ca6f Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Thu, 30 Oct 2025 12:21:55 -0400 Subject: [PATCH 14/33] worker cleanup --- scripts/generate-md-exports.mjs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index e99bfccc1b093..b58e13d6a11fb 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -242,7 +242,9 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} await writeFile(target, data, {encoding: 'utf8'}); // Track that we used this cache file - usedCacheFiles.add(cacheKey); + if (usedCacheFiles) { + usedCacheFiles.add(cacheKey); + } return {cacheHit: true, data}; } catch (err) { @@ -338,10 +340,20 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} ).catch(err => console.warn('Error writing cache file:', err)), ]); + // Track that we created this cache file + if (usedCacheFiles) { + usedCacheFiles.add(cacheKey); + } + return {cacheHit: false, data}; } async function processTaskList({id, tasks, cacheDir, noCache, usedCacheFiles}) { + // Workers don't receive usedCacheFiles in workerData, so create a new Set + if (!usedCacheFiles) { + usedCacheFiles = new Set(); + } + const s3Client = getS3Client(); const failedTasks = []; let cacheMisses = []; From bef41c22768140a7bd023d17d96d7daaf7ee94d0 Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Thu, 30 Oct 2025 16:11:31 +0000 Subject: [PATCH 15/33] [getsentry/action-github-commit] Auto commit --- scripts/generate-md-exports.mjs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index b58e13d6a11fb..090ecfed7a0bc 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -58,7 +58,10 @@ async function uploadToCFR2(s3Client, relativePath, data) { return; } -function taskFinishHandler({id, success, failedTasks, usedCacheFiles}, allUsedCacheFiles) { +function taskFinishHandler( + {id, success, failedTasks, usedCacheFiles}, + allUsedCacheFiles +) { // Collect cache files used by this worker if (usedCacheFiles) { usedCacheFiles.forEach(file => allUsedCacheFiles.add(file)); From 6e8ad912b4a6eb0f8eb8389425bacd4074d88ad5 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 14:34:21 -0400 Subject: [PATCH 16/33] address byk's comments --- scripts/generate-md-exports.mjs | 36 +++++++++++++---------------- src/mdx.ts | 41 +-------------------------------- 2 files changed, 17 insertions(+), 60 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 090ecfed7a0bc..671639e7d900a 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -58,13 +58,13 @@ async function uploadToCFR2(s3Client, relativePath, data) { return; } -function taskFinishHandler( - {id, success, failedTasks, usedCacheFiles}, - allUsedCacheFiles -) { - // Collect cache files used by this worker - if (usedCacheFiles) { - usedCacheFiles.forEach(file => allUsedCacheFiles.add(file)); +// Global set to track which cache files are used across all workers +let globalUsedCacheFiles = null; + +function taskFinishHandler({id, success, failedTasks, usedCacheFiles}) { + // Collect cache files used by this worker into the global set + if (usedCacheFiles && globalUsedCacheFiles) { + usedCacheFiles.forEach(file => globalUsedCacheFiles.add(file)); } if (failedTasks.length === 0) { @@ -104,7 +104,7 @@ async function createWork() { } // Track which cache files are used during this build - const usedCacheFiles = new Set(); + globalUsedCacheFiles = new Set(); // On a 16-core machine, 8 workers were optimal (and slightly faster than 16) const numWorkers = Math.max(Math.floor(cpus().length / 2), 2); @@ -174,7 +174,7 @@ async function createWork() { }, }); let hasErrors = false; - worker.on('message', data => (hasErrors = taskFinishHandler(data, usedCacheFiles))); + worker.on('message', data => (hasErrors = taskFinishHandler(data))); worker.on('error', reject); worker.on('exit', code => { if (code !== 0) { @@ -195,7 +195,7 @@ async function createWork() { noCache, usedCacheFiles: mainThreadUsedFiles, }).then(data => { - if (taskFinishHandler(data, usedCacheFiles)) { + if (taskFinishHandler(data)) { throw new Error(`Worker[${data.id}] had some errors.`); } }) @@ -207,17 +207,13 @@ async function createWork() { if (!noCache) { try { const allFiles = await readdir(CACHE_DIR); - let cleanedCount = 0; - - for (const file of allFiles) { - if (!usedCacheFiles.has(file)) { - await rm(path.join(CACHE_DIR, file), {force: true}); - cleanedCount++; - } - } + const filesToDelete = allFiles.filter(file => !globalUsedCacheFiles.has(file)); - if (cleanedCount > 0) { - console.log(`๐Ÿงน Cleaned up ${cleanedCount} unused cache files`); + if (filesToDelete.length > 0) { + await Promise.all( + filesToDelete.map(file => rm(path.join(CACHE_DIR, file), {force: true})) + ); + console.log(`๐Ÿงน Cleaned up ${filesToDelete.length} unused cache files`); } } catch (err) { console.warn('Failed to clean unused cache files:', err); diff --git a/src/mdx.ts b/src/mdx.ts index 20db70cbdfbf6..c2cc571279cdf 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -4,7 +4,7 @@ import yaml from 'js-yaml'; import {bundleMDX} from 'mdx-bundler'; import {BinaryLike, createHash} from 'node:crypto'; import {createReadStream, createWriteStream, mkdirSync} from 'node:fs'; -import {access, cp, mkdir, opendir, readFile, rm, stat} from 'node:fs/promises'; +import {access, cp, mkdir, opendir, readFile} from 'node:fs/promises'; import path from 'node:path'; // @ts-expect-error ts(2305) -- For some reason "compose" is not recognized in the types import {compose, Readable} from 'node:stream'; @@ -64,45 +64,6 @@ const CACHE_COMPRESS_LEVEL = 4; const CACHE_DIR = path.join(root, '.next', 'cache', 'mdx-bundler'); if (process.env.CI) { mkdirSync(CACHE_DIR, {recursive: true}); - - // Clean up old cache files in background to prevent unbounded growth - // Delete any file not accessed in the last 24 hours (meaning it wasn't used in recent builds) - // This runs once per worker process and doesn't block the build - (async () => { - try { - const MAX_CACHE_AGE_MS = 24 * 60 * 60 * 1000; // 24 hours - const now = Date.now(); - let cleanedCount = 0; - - const dir = await opendir(CACHE_DIR); - - for await (const dirent of dir) { - if (!dirent.isFile() && !dirent.isDirectory()) { - continue; - } - - const itemPath = path.join(CACHE_DIR, dirent.name); - try { - const stats = await stat(itemPath); - const age = now - stats.atimeMs; // Time since last access - - if (age > MAX_CACHE_AGE_MS) { - await rm(itemPath, {recursive: true, force: true}); - cleanedCount++; - } - } catch (err) { - // Skip items we can't stat/delete - } - } - - if (cleanedCount > 0) { - // eslint-disable-next-line no-console - console.log(`๐Ÿงน MDX cache: Cleaned up ${cleanedCount} unused items (>24h)`); - } - } catch (err) { - // Silently fail - cache cleanup is not critical - } - })(); } const md5 = (data: BinaryLike) => createHash('md5').update(data).digest('hex'); From e803adce67eb50c424e2e49de5f8403ed3148677 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 15:10:07 -0400 Subject: [PATCH 17/33] Fix merge conflict resolution: use cacheKey condition instead of skipCache --- src/mdx.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdx.ts b/src/mdx.ts index c2cc571279cdf..a0013ec47b126 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -784,7 +784,7 @@ export async function getFileBySlug(slug: string): Promise { }, }; - if (assetsCacheDir && cacheFile && !skipCache) { + if (assetsCacheDir && cacheFile && cacheKey) { try { await cp(assetsCacheDir, outdir, {recursive: true}); } catch (e) { From aa5dc1685c714ccd072c3d2326b7b0075406d153 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 15:11:47 -0400 Subject: [PATCH 18/33] Remove error cache reset - cache failures so worker fails fast --- src/mdx.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/mdx.ts b/src/mdx.ts index a0013ec47b126..1fc5189fad55e 100644 --- a/src/mdx.ts +++ b/src/mdx.ts @@ -109,16 +109,13 @@ async function getRegistryHashWithRetry( /** * Get the registry hash, using cached value if available. * This ensures we only fetch the registry once per worker process. + * If the fetch fails, the error is cached so subsequent calls fail fast. */ function getRegistryHash(): Promise { if (!cachedRegistryHash) { // eslint-disable-next-line no-console console.info('Fetching registry hash for the first time in this worker'); - cachedRegistryHash = getRegistryHashWithRetry().catch(err => { - // Reset cache on error so next call will retry - cachedRegistryHash = null; - throw err; - }); + cachedRegistryHash = getRegistryHashWithRetry(); } return cachedRegistryHash; } From 5116b556b3f2262f98f96fa66af41978a9e58394 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 15:34:37 -0400 Subject: [PATCH 19/33] Add debug logging for cache tracking --- scripts/generate-md-exports.mjs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 671639e7d900a..58c202a1793e9 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -64,7 +64,10 @@ let globalUsedCacheFiles = null; function taskFinishHandler({id, success, failedTasks, usedCacheFiles}) { // Collect cache files used by this worker into the global set if (usedCacheFiles && globalUsedCacheFiles) { + console.log(`๐Ÿ” Worker[${id}]: returned ${usedCacheFiles.length} cache files`); usedCacheFiles.forEach(file => globalUsedCacheFiles.add(file)); + } else { + console.warn(`โš ๏ธ Worker[${id}]: usedCacheFiles=${!!usedCacheFiles}, globalUsedCacheFiles=${!!globalUsedCacheFiles}`); } if (failedTasks.length === 0) { @@ -209,6 +212,11 @@ async function createWork() { const allFiles = await readdir(CACHE_DIR); const filesToDelete = allFiles.filter(file => !globalUsedCacheFiles.has(file)); + console.log(`๐Ÿ“Š Cache tracking stats:`); + console.log(` - Files in cache dir: ${allFiles.length}`); + console.log(` - Files tracked as used: ${globalUsedCacheFiles.size}`); + console.log(` - Files to delete: ${filesToDelete.length}`); + if (filesToDelete.length > 0) { await Promise.all( filesToDelete.map(file => rm(path.join(CACHE_DIR, file), {force: true})) From 64ed4c0ed39e17cac90fe20b44473ff247463971 Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Fri, 31 Oct 2025 19:35:35 +0000 Subject: [PATCH 20/33] [getsentry/action-github-commit] Auto commit --- scripts/generate-md-exports.mjs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 58c202a1793e9..b2b5794749434 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -67,7 +67,9 @@ function taskFinishHandler({id, success, failedTasks, usedCacheFiles}) { console.log(`๐Ÿ” Worker[${id}]: returned ${usedCacheFiles.length} cache files`); usedCacheFiles.forEach(file => globalUsedCacheFiles.add(file)); } else { - console.warn(`โš ๏ธ Worker[${id}]: usedCacheFiles=${!!usedCacheFiles}, globalUsedCacheFiles=${!!globalUsedCacheFiles}`); + console.warn( + `โš ๏ธ Worker[${id}]: usedCacheFiles=${!!usedCacheFiles}, globalUsedCacheFiles=${!!globalUsedCacheFiles}` + ); } if (failedTasks.length === 0) { From 9e33c137d7d0dcd5b10915b64213b3a41a10590e Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 15:58:09 -0400 Subject: [PATCH 21/33] Add detailed debug logging for cache cleanup --- scripts/generate-md-exports.mjs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index b2b5794749434..a3bbf853d4df8 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -219,12 +219,21 @@ async function createWork() { console.log(` - Files tracked as used: ${globalUsedCacheFiles.size}`); console.log(` - Files to delete: ${filesToDelete.length}`); + // Debug: Show a few examples of what we're comparing + console.log(` - Example used files: ${Array.from(globalUsedCacheFiles).slice(0, 3).join(', ')}`); + console.log(` - Example dir files: ${allFiles.slice(0, 3).join(', ')}`); + console.log(` - Example to delete: ${filesToDelete.slice(0, 3).join(', ')}`); + if (filesToDelete.length > 0) { await Promise.all( filesToDelete.map(file => rm(path.join(CACHE_DIR, file), {force: true})) ); console.log(`๐Ÿงน Cleaned up ${filesToDelete.length} unused cache files`); } + + // Verify cleanup worked + const remainingFiles = await readdir(CACHE_DIR); + console.log(`โœ… Cache directory now has ${remainingFiles.length} files`); } catch (err) { console.warn('Failed to clean unused cache files:', err); } From 95af6ef7a8a3f21cfd0d042624769882d8c4183d Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Fri, 31 Oct 2025 19:59:27 +0000 Subject: [PATCH 22/33] [getsentry/action-github-commit] Auto commit --- scripts/generate-md-exports.mjs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index a3bbf853d4df8..e13f26fa3aa81 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -220,7 +220,9 @@ async function createWork() { console.log(` - Files to delete: ${filesToDelete.length}`); // Debug: Show a few examples of what we're comparing - console.log(` - Example used files: ${Array.from(globalUsedCacheFiles).slice(0, 3).join(', ')}`); + console.log( + ` - Example used files: ${Array.from(globalUsedCacheFiles).slice(0, 3).join(', ')}` + ); console.log(` - Example dir files: ${allFiles.slice(0, 3).join(', ')}`); console.log(` - Example to delete: ${filesToDelete.slice(0, 3).join(', ')}`); From 6a793f2bf100a62cc9b5f4bc69b649cb7890f331 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 16:13:58 -0400 Subject: [PATCH 23/33] Add overlap detection and cache miss debugging --- scripts/generate-md-exports.mjs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index e13f26fa3aa81..21186b39fc148 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -220,12 +220,18 @@ async function createWork() { console.log(` - Files to delete: ${filesToDelete.length}`); // Debug: Show a few examples of what we're comparing - console.log( - ` - Example used files: ${Array.from(globalUsedCacheFiles).slice(0, 3).join(', ')}` - ); + const usedArray = Array.from(globalUsedCacheFiles); + console.log(` - Example used files: ${usedArray.slice(0, 3).join(', ')}`); console.log(` - Example dir files: ${allFiles.slice(0, 3).join(', ')}`); console.log(` - Example to delete: ${filesToDelete.slice(0, 3).join(', ')}`); + // Check if there's ANY overlap + const overlaps = allFiles.filter(file => globalUsedCacheFiles.has(file)); + console.log(` - Files that overlap (exist in both): ${overlaps.length}`); + if (overlaps.length > 0) { + console.log(` - Example overlaps: ${overlaps.slice(0, 3).join(', ')}`); + } + if (filesToDelete.length > 0) { await Promise.all( filesToDelete.map(file => rm(path.join(CACHE_DIR, file), {force: true})) @@ -271,6 +277,14 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} if (err.code !== 'ENOENT') { console.warn(`Error using cache file ${cacheFile}:`, err); } + // Log first cache miss to help debug why HTML is changing + if (err.code === 'ENOENT' && !genMDFromHTML._loggedFirstMiss) { + genMDFromHTML._loggedFirstMiss = true; + console.log(`๐Ÿ” First cache miss: ${source}`); + console.log(` Looking for cache key: ${cacheKey}`); + console.log(` HTML length: ${leanHTML.length} chars`); + console.log(` First 200 chars: ${leanHTML.substring(0, 200).replace(/\n/g, '\\n')}`); + } } } let baseUrl = DOCS_ORIGIN; From 529e6927b4871462976d8fbd61052a18de411248 Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Fri, 31 Oct 2025 20:15:21 +0000 Subject: [PATCH 24/33] [getsentry/action-github-commit] Auto commit --- scripts/generate-md-exports.mjs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 21186b39fc148..4e9cd7956feee 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -283,7 +283,9 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} console.log(`๐Ÿ” First cache miss: ${source}`); console.log(` Looking for cache key: ${cacheKey}`); console.log(` HTML length: ${leanHTML.length} chars`); - console.log(` First 200 chars: ${leanHTML.substring(0, 200).replace(/\n/g, '\\n')}`); + console.log( + ` First 200 chars: ${leanHTML.substring(0, 200).replace(/\n/g, '\\n')}` + ); } } } From 577e9d46c5600904741f184b353c02489ac29afc Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 16:29:34 -0400 Subject: [PATCH 25/33] Add detailed cache hit/miss stats and initial cache size --- scripts/generate-md-exports.mjs | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 4e9cd7956feee..e7f865aabe2f6 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -106,6 +106,9 @@ async function createWork() { if (noCache) { console.log(`โ„น๏ธ No cache directory found, this will take a while...`); await mkdir(CACHE_DIR, {recursive: true}); + } else { + const initialCacheFiles = await readdir(CACHE_DIR); + console.log(`๐Ÿ“ฆ Cache directory has ${initialCacheFiles.length} files from previous build`); } // Track which cache files are used during this build @@ -213,24 +216,20 @@ async function createWork() { try { const allFiles = await readdir(CACHE_DIR); const filesToDelete = allFiles.filter(file => !globalUsedCacheFiles.has(file)); + const overlaps = allFiles.filter(file => globalUsedCacheFiles.has(file)); console.log(`๐Ÿ“Š Cache tracking stats:`); - console.log(` - Files in cache dir: ${allFiles.length}`); + console.log(` - Files in cache dir (after build): ${allFiles.length}`); console.log(` - Files tracked as used: ${globalUsedCacheFiles.size}`); - console.log(` - Files to delete: ${filesToDelete.length}`); + console.log(` - Files that existed and were used: ${overlaps.length}`); + console.log(` - Files to delete (old/unused): ${filesToDelete.length}`); + console.log(` - Expected after cleanup: ${overlaps.length} files`); - // Debug: Show a few examples of what we're comparing - const usedArray = Array.from(globalUsedCacheFiles); - console.log(` - Example used files: ${usedArray.slice(0, 3).join(', ')}`); - console.log(` - Example dir files: ${allFiles.slice(0, 3).join(', ')}`); - console.log(` - Example to delete: ${filesToDelete.slice(0, 3).join(', ')}`); + // Debug: Show a few examples + console.log(` - Example used: ${Array.from(globalUsedCacheFiles).slice(0, 2).join(', ')}`); + console.log(` - Example to delete: ${filesToDelete.slice(0, 2).join(', ')}`); + console.log(` - Example kept: ${overlaps.slice(0, 2).join(', ')}`); - // Check if there's ANY overlap - const overlaps = allFiles.filter(file => globalUsedCacheFiles.has(file)); - console.log(` - Files that overlap (exist in both): ${overlaps.length}`); - if (overlaps.length > 0) { - console.log(` - Example overlaps: ${overlaps.slice(0, 3).join(', ')}`); - } if (filesToDelete.length > 0) { await Promise.all( @@ -428,6 +427,9 @@ async function processTaskList({id, tasks, cacheDir, noCache, usedCacheFiles}) { `๐Ÿ“ค Worker[${id}]: Updated the following files on R2: \n${r2CacheMisses.map(n => ` - ${n}`).join('\n')}` ); } + const cacheHits = success - cacheMisses.length; + console.log(`๐Ÿ“ˆ Worker[${id}]: Cache stats: ${cacheHits} hits, ${cacheMisses.length} misses (${((cacheMisses.length / success) * 100).toFixed(1)}% miss rate)`); + if (cacheMisses.length / tasks.length > 0.1) { console.warn(`โš ๏ธ Worker[${id}]: More than 10% cache miss rate during build.`); } else if (cacheMisses.length > 0) { From 4683bcf0268d4a8e3af240591cfb2878c5033bab Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Fri, 31 Oct 2025 20:31:02 +0000 Subject: [PATCH 26/33] [getsentry/action-github-commit] Auto commit --- scripts/generate-md-exports.mjs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index e7f865aabe2f6..c67f25e336ce0 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -108,7 +108,9 @@ async function createWork() { await mkdir(CACHE_DIR, {recursive: true}); } else { const initialCacheFiles = await readdir(CACHE_DIR); - console.log(`๐Ÿ“ฆ Cache directory has ${initialCacheFiles.length} files from previous build`); + console.log( + `๐Ÿ“ฆ Cache directory has ${initialCacheFiles.length} files from previous build` + ); } // Track which cache files are used during this build @@ -226,11 +228,12 @@ async function createWork() { console.log(` - Expected after cleanup: ${overlaps.length} files`); // Debug: Show a few examples - console.log(` - Example used: ${Array.from(globalUsedCacheFiles).slice(0, 2).join(', ')}`); + console.log( + ` - Example used: ${Array.from(globalUsedCacheFiles).slice(0, 2).join(', ')}` + ); console.log(` - Example to delete: ${filesToDelete.slice(0, 2).join(', ')}`); console.log(` - Example kept: ${overlaps.slice(0, 2).join(', ')}`); - if (filesToDelete.length > 0) { await Promise.all( filesToDelete.map(file => rm(path.join(CACHE_DIR, file), {force: true})) @@ -428,7 +431,9 @@ async function processTaskList({id, tasks, cacheDir, noCache, usedCacheFiles}) { ); } const cacheHits = success - cacheMisses.length; - console.log(`๐Ÿ“ˆ Worker[${id}]: Cache stats: ${cacheHits} hits, ${cacheMisses.length} misses (${((cacheMisses.length / success) * 100).toFixed(1)}% miss rate)`); + console.log( + `๐Ÿ“ˆ Worker[${id}]: Cache stats: ${cacheHits} hits, ${cacheMisses.length} misses (${((cacheMisses.length / success) * 100).toFixed(1)}% miss rate)` + ); if (cacheMisses.length / tasks.length > 0.1) { console.warn(`โš ๏ธ Worker[${id}]: More than 10% cache miss rate during build.`); From b041698b299ec76a3b452a6e474c3e37b3002ab0 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 16:43:38 -0400 Subject: [PATCH 27/33] Add detection for non-deterministic HTML patterns --- scripts/generate-md-exports.mjs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index c67f25e336ce0..3d33cfb6933ab 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -285,9 +285,20 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} console.log(`๐Ÿ” First cache miss: ${source}`); console.log(` Looking for cache key: ${cacheKey}`); console.log(` HTML length: ${leanHTML.length} chars`); + + // Look for common non-deterministic patterns + const buildHashMatch = leanHTML.match(/buildId['":]+"([^"]+)"/); + const timestampMatch = leanHTML.match(/timestamp['":]+"?(\d+)"?/i); + const dateMatch = leanHTML.match(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/); + + console.log( + ` Build hash found: ${buildHashMatch ? buildHashMatch[1] : 'none'}` + ); console.log( - ` First 200 chars: ${leanHTML.substring(0, 200).replace(/\n/g, '\\n')}` + ` Timestamp found: ${timestampMatch ? timestampMatch[1] : 'none'}` ); + console.log(` Date found: ${dateMatch ? dateMatch[0] : 'none'}`); + console.log(` First 500 chars: ${leanHTML.substring(0, 500)}`); } } } From cefcb464acd28075b5c774306967a498e35fa945 Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Fri, 31 Oct 2025 20:44:45 +0000 Subject: [PATCH 28/33] [getsentry/action-github-commit] Auto commit --- scripts/generate-md-exports.mjs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 3d33cfb6933ab..17086195f5495 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -294,9 +294,7 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} console.log( ` Build hash found: ${buildHashMatch ? buildHashMatch[1] : 'none'}` ); - console.log( - ` Timestamp found: ${timestampMatch ? timestampMatch[1] : 'none'}` - ); + console.log(` Timestamp found: ${timestampMatch ? timestampMatch[1] : 'none'}`); console.log(` Date found: ${dateMatch ? dateMatch[0] : 'none'}`); console.log(` First 500 chars: ${leanHTML.substring(0, 500)}`); } From bd569ac3abd86865ba3de5979480a91fdc13fe31 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 16:55:19 -0400 Subject: [PATCH 29/33] Fix cache by normalizing timestamps and Next.js asset hashes - Strip ISO timestamps (2025-10-29T16:22:19) before hashing - Normalize Next.js asset hashes in paths (/_next/static/css/abc123...) - Bump CACHE_VERSION to 4 since cache key format changed - This should fix the 99.99% cache miss rate --- scripts/generate-md-exports.mjs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 17086195f5495..393211bbaf6e7 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -27,7 +27,7 @@ import {unified} from 'unified'; import {remove} from 'unist-util-remove'; const DOCS_ORIGIN = 'https://docs.sentry.io'; -const CACHE_VERSION = 3; +const CACHE_VERSION = 4; // Bumped: now normalizing timestamps and Next.js asset hashes const CACHE_COMPRESS_LEVEL = 4; const R2_BUCKET = process.env.NEXT_PUBLIC_DEVELOPER_DOCS ? 'sentry-develop-docs' @@ -259,7 +259,12 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} const leanHTML = (await readFile(source, {encoding: 'utf8'})) // Remove all script tags, as they are not needed in markdown // and they are not stable across builds, causing cache misses - .replace(/]*>[\s\S]*?<\/script>/gi, ''); + .replace(/]*>[\s\S]*?<\/script>/gi, '') + // Remove ISO timestamps (e.g., "2025-10-29T16:22:19") that change each build + .replace(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{3})?Z?/g, 'BUILD_TIME') + // Normalize Next.js asset hashes in paths (e.g., /_next/static/css/abc123.css) + // so cache isn't invalidated when only asset hashes change + .replace(/\/_next\/static\/([^\/]+)\/[a-f0-9]{16,}/g, '/_next/static/$1/BUILD_HASH'); const cacheKey = `v${CACHE_VERSION}_${md5(leanHTML)}`; const cacheFile = path.join(cacheDir, cacheKey); if (!noCache) { From eba934663150d9f1b11f1af99dff1a8c060e39ea Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 17:00:13 -0400 Subject: [PATCH 30/33] Revert hasty fix and add proper debugging - Reverted timestamp/asset hash normalization (not verified) - Instead: save first 2000 chars of HTML to debug file - This lets us diff between builds to see what's ACTUALLY changing - Will investigate properly before applying fixes --- scripts/generate-md-exports.mjs | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 393211bbaf6e7..40e43128b2b5f 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -259,12 +259,7 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} const leanHTML = (await readFile(source, {encoding: 'utf8'})) // Remove all script tags, as they are not needed in markdown // and they are not stable across builds, causing cache misses - .replace(/]*>[\s\S]*?<\/script>/gi, '') - // Remove ISO timestamps (e.g., "2025-10-29T16:22:19") that change each build - .replace(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{3})?Z?/g, 'BUILD_TIME') - // Normalize Next.js asset hashes in paths (e.g., /_next/static/css/abc123.css) - // so cache isn't invalidated when only asset hashes change - .replace(/\/_next\/static\/([^\/]+)\/[a-f0-9]{16,}/g, '/_next/static/$1/BUILD_HASH'); + .replace(/]*>[\s\S]*?<\/script>/gi, ''); const cacheKey = `v${CACHE_VERSION}_${md5(leanHTML)}`; const cacheFile = path.join(cacheDir, cacheKey); if (!noCache) { @@ -291,17 +286,12 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} console.log(` Looking for cache key: ${cacheKey}`); console.log(` HTML length: ${leanHTML.length} chars`); - // Look for common non-deterministic patterns - const buildHashMatch = leanHTML.match(/buildId['":]+"([^"]+)"/); - const timestampMatch = leanHTML.match(/timestamp['":]+"?(\d+)"?/i); - const dateMatch = leanHTML.match(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/); - - console.log( - ` Build hash found: ${buildHashMatch ? buildHashMatch[1] : 'none'}` - ); - console.log(` Timestamp found: ${timestampMatch ? timestampMatch[1] : 'none'}`); - console.log(` Date found: ${dateMatch ? dateMatch[0] : 'none'}`); - console.log(` First 500 chars: ${leanHTML.substring(0, 500)}`); + // Save the first 2000 chars to a temp file so we can diff between builds + const debugFile = path.join(cacheDir, '..', 'debug-first-miss.txt'); + writeFile(debugFile, `${source}\n${cacheKey}\n${leanHTML.substring(0, 2000)}`, { + encoding: 'utf8', + }).catch(() => {}); + console.log(` Saved first 2000 chars to ${debugFile} for comparison`); } } } From 3b93727f08f9844ce07a2f28ec89c36251514766 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 17:02:07 -0400 Subject: [PATCH 31/33] Output debug info to build logs instead of file - Log existing v4 cache files for comparison - Log HTML in 800-char chunks to avoid truncation - This way we can compare build logs to see what's changing --- scripts/generate-md-exports.mjs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 40e43128b2b5f..c151c89683b70 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -286,12 +286,18 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} console.log(` Looking for cache key: ${cacheKey}`); console.log(` HTML length: ${leanHTML.length} chars`); - // Save the first 2000 chars to a temp file so we can diff between builds - const debugFile = path.join(cacheDir, '..', 'debug-first-miss.txt'); - writeFile(debugFile, `${source}\n${cacheKey}\n${leanHTML.substring(0, 2000)}`, { - encoding: 'utf8', - }).catch(() => {}); - console.log(` Saved first 2000 chars to ${debugFile} for comparison`); + // List a few cache files that exist to compare + try { + const existingFiles = await readdir(cacheDir); + const v4Files = existingFiles.filter(f => f.startsWith('v4_')).slice(0, 5); + console.log(` Existing v4 cache files: ${v4Files.join(', ')}`); + } catch (e) { + // Ignore + } + + // Log HTML in chunks to avoid truncation + console.log(` HTML chunk 1 (0-800): ${leanHTML.substring(0, 800)}`); + console.log(` HTML chunk 2 (800-1600): ${leanHTML.substring(800, 1600)}`); } } } From 28b643195ba213bc3bb87091cf0d0c79b88beb5a Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 17:09:02 -0400 Subject: [PATCH 32/33] Save leanHTML samples locally for debugging - Saves each cache miss HTML to .next/cache/debug-html-samples/ - Filenames include timestamp for comparison between builds - Only runs locally (skips in CI) - Can diff files to see what's actually changing --- scripts/generate-md-exports.mjs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index c151c89683b70..1fdb8e745f5a3 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -279,25 +279,25 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} if (err.code !== 'ENOENT') { console.warn(`Error using cache file ${cacheFile}:`, err); } - // Log first cache miss to help debug why HTML is changing - if (err.code === 'ENOENT' && !genMDFromHTML._loggedFirstMiss) { - genMDFromHTML._loggedFirstMiss = true; - console.log(`๐Ÿ” First cache miss: ${source}`); - console.log(` Looking for cache key: ${cacheKey}`); - console.log(` HTML length: ${leanHTML.length} chars`); - - // List a few cache files that exist to compare + // Save HTML samples for debugging (skip in CI) + if (!process.env.CI && err.code === 'ENOENT') { + const debugDir = path.join(cacheDir, '..', 'debug-html-samples'); try { - const existingFiles = await readdir(cacheDir); - const v4Files = existingFiles.filter(f => f.startsWith('v4_')).slice(0, 5); - console.log(` Existing v4 cache files: ${v4Files.join(', ')}`); + await mkdir(debugDir, {recursive: true}); + const timestamp = Date.now(); + const basename = path.basename(source, '.html'); + const debugFile = path.join(debugDir, `${basename}-${timestamp}.html`); + await writeFile(debugFile, leanHTML, {encoding: 'utf8'}); + + if (!genMDFromHTML._loggedFirstMiss) { + genMDFromHTML._loggedFirstMiss = true; + console.log(`๐Ÿ” First cache miss: ${source}`); + console.log(` Saving HTML samples to: ${debugDir}`); + console.log(` Compare files to find what's changing between builds`); + } } catch (e) { - // Ignore + // Ignore errors } - - // Log HTML in chunks to avoid truncation - console.log(` HTML chunk 1 (0-800): ${leanHTML.substring(0, 800)}`); - console.log(` HTML chunk 2 (800-1600): ${leanHTML.substring(800, 1600)}`); } } } From 77e937bf3d22d1d570b798da57e0d7f11b696c98 Mon Sep 17 00:00:00 2001 From: Sergiy Dybskiy Date: Fri, 31 Oct 2025 18:05:39 -0400 Subject: [PATCH 33/33] keep testing --- scripts/generate-md-exports.mjs | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/scripts/generate-md-exports.mjs b/scripts/generate-md-exports.mjs index 1fdb8e745f5a3..35bbfb55c442e 100644 --- a/scripts/generate-md-exports.mjs +++ b/scripts/generate-md-exports.mjs @@ -27,7 +27,7 @@ import {unified} from 'unified'; import {remove} from 'unist-util-remove'; const DOCS_ORIGIN = 'https://docs.sentry.io'; -const CACHE_VERSION = 4; // Bumped: now normalizing timestamps and Next.js asset hashes +const CACHE_VERSION = 3; const CACHE_COMPRESS_LEVEL = 4; const R2_BUCKET = process.env.NEXT_PUBLIC_DEVELOPER_DOCS ? 'sentry-develop-docs' @@ -64,7 +64,7 @@ let globalUsedCacheFiles = null; function taskFinishHandler({id, success, failedTasks, usedCacheFiles}) { // Collect cache files used by this worker into the global set if (usedCacheFiles && globalUsedCacheFiles) { - console.log(`๐Ÿ” Worker[${id}]: returned ${usedCacheFiles.length} cache files`); + console.log(`๐Ÿ” Worker[${id}]: returned ${usedCacheFiles.length} cache files.`); usedCacheFiles.forEach(file => globalUsedCacheFiles.add(file)); } else { console.warn( @@ -227,13 +227,6 @@ async function createWork() { console.log(` - Files to delete (old/unused): ${filesToDelete.length}`); console.log(` - Expected after cleanup: ${overlaps.length} files`); - // Debug: Show a few examples - console.log( - ` - Example used: ${Array.from(globalUsedCacheFiles).slice(0, 2).join(', ')}` - ); - console.log(` - Example to delete: ${filesToDelete.slice(0, 2).join(', ')}`); - console.log(` - Example kept: ${overlaps.slice(0, 2).join(', ')}`); - if (filesToDelete.length > 0) { await Promise.all( filesToDelete.map(file => rm(path.join(CACHE_DIR, file), {force: true})) @@ -279,26 +272,6 @@ async function genMDFromHTML(source, target, {cacheDir, noCache, usedCacheFiles} if (err.code !== 'ENOENT') { console.warn(`Error using cache file ${cacheFile}:`, err); } - // Save HTML samples for debugging (skip in CI) - if (!process.env.CI && err.code === 'ENOENT') { - const debugDir = path.join(cacheDir, '..', 'debug-html-samples'); - try { - await mkdir(debugDir, {recursive: true}); - const timestamp = Date.now(); - const basename = path.basename(source, '.html'); - const debugFile = path.join(debugDir, `${basename}-${timestamp}.html`); - await writeFile(debugFile, leanHTML, {encoding: 'utf8'}); - - if (!genMDFromHTML._loggedFirstMiss) { - genMDFromHTML._loggedFirstMiss = true; - console.log(`๐Ÿ” First cache miss: ${source}`); - console.log(` Saving HTML samples to: ${debugDir}`); - console.log(` Compare files to find what's changing between builds`); - } - } catch (e) { - // Ignore errors - } - } } } let baseUrl = DOCS_ORIGIN;