diff --git a/.github/workflows/generate-llms.yml b/.github/workflows/generate-llms.yml index 094b553..7729b13 100644 --- a/.github/workflows/generate-llms.yml +++ b/.github/workflows/generate-llms.yml @@ -2,6 +2,15 @@ name: Generate LLMs files on: workflow_dispatch: + inputs: + environment: + description: 'Environment' + required: true + default: 'dev' + type: choice + options: + - dev + - prod permissions: contents: write @@ -23,27 +32,24 @@ jobs: - name: Install dependencies run: yarn install --frozen-lockfile - - name: Copy prod env file + - name: Copy env file run: | - echo "${{ secrets.ENV_PROD }}" | base64 -d > .env + if [ "${{ inputs.environment }}" = "prod" ]; then + echo "${{ secrets.ENV_PROD }}" | base64 -d > .env + else + echo "${{ secrets.ENV_DEV }}" | base64 -d > .env + fi rm -f .env.local - - name: Generate llms.txt - run: node scripts/generate-llms.mjs - env: - LLMS_MODE: curated - - - name: Generate llms-full.txt - run: node scripts/generate-llms.mjs - env: - LLMS_MODE: full + - name: Generate llms-full-pages + run: yarn generate:llms:pages - name: Commit and push generated files run: | git config user.name "github-actions" git config user.email "github-actions@github.com" - git add public/keepsimple_/ + git add public/uxcore_/ if ! git diff --cached --quiet; then - git commit -m "chore: regenerate llms files" + git commit -m "chore: regenerate llms pages" git push - fi \ No newline at end of file + fi diff --git a/package.json b/package.json index 66c9661..6a6a6e8 100644 --- a/package.json +++ b/package.json @@ -16,8 +16,9 @@ "test:edge": "cypress run --browser edge", "test:all": "npm run test:chrome && npm run test:firefox && npm run test:edge", "prepare": "husky install", - "generate:llms": "tsx --tsconfig scripts/tsconfig.json scripts/generate-llms.ts", - "generate:llms:full": "tsx --tsconfig scripts/tsconfig.json scripts/generate-llms-full.ts" + "generate:llms": "cross-env LLMS_MODE=curated ts-node --compiler-options '{\"module\":\"commonjs\",\"target\":\"es2020\"}' scripts/generate-llms.ts", + "generate:llms:full": "ts-node --compiler-options '{\"module\":\"commonjs\",\"target\":\"es2020\"}' scripts/generate-llms-full.ts", + "generate:llms:pages": "ts-node --compiler-options '{\"module\":\"commonjs\",\"target\":\"es2020\"}' scripts/generate-llms-pages.ts" }, "lint-staged": { "**/*.{ts,tsx}": [ diff --git a/scripts/generate-llms-full.ts b/scripts/generate-llms-full.ts index b264b5b..d102f38 100644 --- a/scripts/generate-llms-full.ts +++ b/scripts/generate-llms-full.ts @@ -1,7 +1,5 @@ process.env.LLMS_OUTPUT_FILE = 'uxcore_/llms-full.txt'; // Large enough to include all current UXCore (105) and UXCG entries. process.env.LLMS_DYNAMIC_LIMIT = '1000'; -process.env.LLMS_WRITE_SLUG_MDS = 'true'; -process.env.LLMS_SLUG_MD_DIR = 'uxcore_/llms-full-pages'; void import('./generate-llms'); diff --git a/scripts/generate-llms-pages.ts b/scripts/generate-llms-pages.ts new file mode 100644 index 0000000..de6d2b0 --- /dev/null +++ b/scripts/generate-llms-pages.ts @@ -0,0 +1,126 @@ +import * as fs from 'fs'; +import * as path from 'path'; + +import { + absoluteRoute, + OutputPage, + pickSeoDescription, + STRAPI_BASE, + strapiGet, +} from './generate-llms-shared'; + +const OUTPUT_DIR = process.env.LLMS_PAGES_DIR || 'uxcore_/llms-full-pages'; + +if (!STRAPI_BASE) { + console.error('[error] STRAPI_URL or NEXT_PUBLIC_STRAPI must be set in .env'); + process.exit(1); +} + +function routeSlug(route: string): string | null { + const normalized = route.replace(/\/+$/, ''); + const parts = normalized.split('/'); + return parts[parts.length - 1] || null; +} + +function writeSlugMarkdownFiles(pages: OutputPage[], baseDir: string): void { + for (const page of pages) { + if (!page.slugSection) continue; + const slug = routeSlug(page.route); + if (!slug) continue; + + const sectionDir = path.join(baseDir, page.slugSection); + fs.mkdirSync(sectionDir, { recursive: true }); + + const content = [ + `# ${page.name}`, + '', + `- URL: ${absoluteRoute(page.route)}`, + `- Description: ${page.seoDescription ?? ''}`, + '', + ].join('\n'); + + fs.writeFileSync(path.join(sectionDir, `${slug}.md`), content, 'utf-8'); + } +} + +async function fetchUxcoreSlugPages(): Promise { + try { + const data = await strapiGet( + 'biases?locale=en&sort=number&pagination[pageSize]=1000&pagination[page]=1&populate[OGTags][populate]=ogImage', + ); + const items = Array.isArray(data?.data) ? data.data : []; + return items + .map((item: any) => { + const attrs = item?.attributes ?? {}; + const slug = attrs?.slug; + if (!slug) return null; + return { + route: `/uxcore/${slug}`, + name: String(attrs?.title ?? `UXCore ${attrs?.number ?? slug}`), + seoDescription: pickSeoDescription(attrs), + slugSection: 'uxcore' as const, + }; + }) + .filter(Boolean) as OutputPage[]; + } catch (err) { + console.log( + `[pages] skipping uxcore slugs — fetch failed: ${(err as Error).message}`, + ); + return []; + } +} + +async function fetchUxcgSlugPages(): Promise { + try { + const data = await strapiGet( + 'questions?locale=en&sort=number&pagination[pageSize]=1000&pagination[page]=1&populate[OGTags][populate]=ogImage', + ); + const items = Array.isArray(data?.data) ? data.data : []; + return items + .map((item: any) => { + const attrs = item?.attributes ?? {}; + const slug = attrs?.slug; + if (!slug) return null; + return { + route: `/uxcg/${slug}`, + name: String(attrs?.title ?? `UXCG ${attrs?.number ?? slug}`), + seoDescription: pickSeoDescription(attrs), + slugSection: 'uxcg' as const, + }; + }) + .filter(Boolean) as OutputPage[]; + } catch (err) { + console.log( + `[pages] skipping uxcg slugs — fetch failed: ${(err as Error).message}`, + ); + return []; + } +} + +async function main(): Promise { + console.log('=== generate-llms-pages.ts ===\n'); + + console.log('[step 1] Fetching all slug pages from Strapi...'); + const [uxcorePages, uxcgPages] = await Promise.all([ + fetchUxcoreSlugPages(), + fetchUxcgSlugPages(), + ]); + + const allPages = [...uxcorePages, ...uxcgPages]; + console.log( + ` found ${uxcorePages.length} uxcore + ${uxcgPages.length} uxcg = ${allPages.length} total\n`, + ); + + console.log(`[step 2] Writing markdown files to public/${OUTPUT_DIR}...`); + const baseDir = path.join(process.cwd(), 'public', OUTPUT_DIR); + writeSlugMarkdownFiles(allPages, baseDir); + + console.log( + `\nSuccessfully wrote ${allPages.length} page files to public/${OUTPUT_DIR}`, + ); +} + +main().catch(err => { + console.error('\n[error] generate-llms-pages failed:', err); + process.exit(1); +}); diff --git a/scripts/generate-llms-shared.ts b/scripts/generate-llms-shared.ts new file mode 100644 index 0000000..7433765 --- /dev/null +++ b/scripts/generate-llms-shared.ts @@ -0,0 +1,89 @@ +import * as dotenv from 'dotenv'; +import * as http from 'http'; +import * as https from 'https'; +import * as path from 'path'; + +dotenv.config({ path: path.join(process.cwd(), '.env'), override: true }); +dotenv.config({ path: path.join(process.cwd(), '.env.local'), override: true }); + +export const STRAPI_BASE = + process.env.STRAPI_URL || process.env.NEXT_PUBLIC_STRAPI || ''; +export const SITE_BASE_URL = (process.env.NEXT_PUBLIC_DOMAIN || '').replace( + /\/$/, + '', +); + +process.env.NEXT_PUBLIC_STRAPI = process.env.NEXT_PUBLIC_STRAPI || STRAPI_BASE; + +export function stripHtml(html: string): string { + return html + .replace(/<[^>]*>/g, '') + .replace(/ /g, ' ') + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/\s+/g, ' ') + .trim(); +} + +export function getJson( + url: string, + headers: Record = {}, +): Promise { + return new Promise((resolve, reject) => { + const client = url.startsWith('https://') ? https : http; + const req = client.request(url, { method: 'GET', headers }, res => { + const status = res.statusCode ?? 0; + let raw = ''; + res.setEncoding('utf8'); + res.on('data', chunk => { + raw += chunk; + }); + res.on('end', () => { + if (status < 200 || status >= 300) { + reject(new Error(`HTTP ${status} for ${url}`)); + return; + } + try { + resolve(JSON.parse(raw)); + } catch (err) { + reject( + new Error( + `Invalid JSON for ${url}: ${(err as Error).message || 'unknown error'}`, + ), + ); + } + }); + }); + req.on('error', reject); + req.end(); + }); +} + +export async function strapiGet(endpoint: string): Promise { + const url = `${STRAPI_BASE}/api/${endpoint}`; + return getJson(url); +} + +export function pickSeoDescription(attrs: any): string | null { + const raw = + attrs?.seoDescription ?? + attrs?.OGTags?.ogDescription ?? + attrs?.ogDescription ?? + null; + return raw ? stripHtml(String(raw)) : null; +} + +export function absoluteRoute(route: string): string { + if (!SITE_BASE_URL) return route; + if (route === '/') return SITE_BASE_URL; + return `${SITE_BASE_URL}${route}`; +} + +export interface OutputPage { + route: string; + name: string; + seoDescription: string | null; + slugSection?: 'uxcore' | 'uxcg'; +} diff --git a/scripts/generate-llms.ts b/scripts/generate-llms.ts index a9fdb5e..96df2da 100644 --- a/scripts/generate-llms.ts +++ b/scripts/generate-llms.ts @@ -1,25 +1,18 @@ -import * as dotenv from 'dotenv'; import * as fs from 'fs'; -import * as http from 'http'; -import * as https from 'https'; import * as path from 'path'; import { getLlmsMeta } from '../src/api/llmsMeta'; +import { + absoluteRoute, + OutputPage, + pickSeoDescription, + STRAPI_BASE, + strapiGet, + stripHtml, +} from './generate-llms-shared'; -dotenv.config({ path: path.join(process.cwd(), '.env'), override: true }); -dotenv.config({ path: path.join(process.cwd(), '.env.local'), override: true }); - -const STRAPI_BASE = - process.env.STRAPI_URL || process.env.NEXT_PUBLIC_STRAPI || ''; -const STRAPI_TOKEN = process.env.STRAPI_API_TOKEN || ''; -const SITE_BASE_URL = (process.env.NEXT_PUBLIC_DOMAIN || '').replace(/\/$/, ''); const OUTPUT_FILENAME = process.env.LLMS_OUTPUT_FILE || 'uxcore_/llms.txt'; const DYNAMIC_SLUG_LIMIT = Number(process.env.LLMS_DYNAMIC_LIMIT || '10') || 10; -const WRITE_SLUG_MD_FILES = process.env.LLMS_WRITE_SLUG_MDS === 'true'; -const SLUG_MD_DIRNAME = - process.env.LLMS_SLUG_MD_DIR || - OUTPUT_FILENAME.replace(/\.txt$/i, '').replace(/[^a-zA-Z0-9-_]/g, ''); -process.env.NEXT_PUBLIC_STRAPI = process.env.NEXT_PUBLIC_STRAPI || STRAPI_BASE; if (!STRAPI_BASE) { console.error('[error] STRAPI_URL or NEXT_PUBLIC_STRAPI must be set in .env'); @@ -29,65 +22,6 @@ if (!STRAPI_BASE) { const PAGES_DIR = path.join(process.cwd(), 'src', 'pages'); const OUTPUT_FILE = path.join(process.cwd(), 'public', OUTPUT_FILENAME); -// ───────────────────────────────────────────── -// Helpers -// ───────────────────────────────────────────── - -function stripHtml(html: string): string { - return html - .replace(/<[^>]*>/g, '') - .replace(/ /g, ' ') - .replace(/&/g, '&') - .replace(/</g, '<') - .replace(/>/g, '>') - .replace(/"/g, '"') - .replace(/\s+/g, ' ') - .trim(); -} - -function strapiHeaders(): Record { - return STRAPI_TOKEN ? { Authorization: `Bearer ${STRAPI_TOKEN}` } : {}; -} - -function getJson( - url: string, - headers: Record = {}, -): Promise { - return new Promise((resolve, reject) => { - const client = url.startsWith('https://') ? https : http; - const req = client.request(url, { method: 'GET', headers }, res => { - const status = res.statusCode ?? 0; - let raw = ''; - res.setEncoding('utf8'); - res.on('data', chunk => { - raw += chunk; - }); - res.on('end', () => { - if (status < 200 || status >= 300) { - reject(new Error(`HTTP ${status} for ${url}`)); - return; - } - try { - resolve(JSON.parse(raw)); - } catch (err) { - reject( - new Error( - `Invalid JSON for ${url}: ${(err as Error).message || 'unknown error'}`, - ), - ); - } - }); - }); - req.on('error', reject); - req.end(); - }); -} - -async function strapiGet(endpoint: string): Promise { - const url = `${STRAPI_BASE}/api/${endpoint}`; - return getJson(url, strapiHeaders()); -} - // ───────────────────────────────────────────── // Step 1 — Site meta from /api/llms-meta // ───────────────────────────────────────────── @@ -132,13 +66,6 @@ interface PageEntry { isDynamic: boolean; } -interface OutputPage { - route: string; - name: string; - seoDescription: string | null; - slugSection?: 'uxcore' | 'uxcg'; -} - /** Convert a path relative to src/pages into a URL route. */ function relPathToRoute(relPath: string): string { // Normalise separators @@ -249,15 +176,6 @@ async function fetchPageSeoDescription(route: string): Promise { } } -function pickSeoDescription(attrs: any): string | null { - const raw = - attrs?.seoDescription ?? - attrs?.OGTags?.ogDescription ?? - attrs?.ogDescription ?? - null; - return raw ? stripHtml(String(raw)) : null; -} - async function fetchTopUxcoreSlugPages(limit = 10): Promise { try { const data = await strapiGet( @@ -327,42 +245,6 @@ async function fetchUxcatSeoDescription(): Promise { } } -function absoluteRoute(route: string): string { - if (!SITE_BASE_URL) return route; - if (route === '/') return SITE_BASE_URL; - return `${SITE_BASE_URL}${route}`; -} - -function routeSlug(route: string): string | null { - const normalized = route.replace(/\/+$/, ''); - const parts = normalized.split('/'); - return parts[parts.length - 1] || null; -} - -function writeSlugMarkdownFiles(pages: OutputPage[]): void { - if (!WRITE_SLUG_MD_FILES) return; - - const baseDir = path.join(process.cwd(), 'public', SLUG_MD_DIRNAME); - for (const page of pages) { - if (!page.slugSection) continue; - const slug = routeSlug(page.route); - if (!slug) continue; - - const sectionDir = path.join(baseDir, page.slugSection); - fs.mkdirSync(sectionDir, { recursive: true }); - - const content = [ - `# ${page.name}`, - '', - `- URL: ${absoluteRoute(page.route)}`, - `- Description: ${page.seoDescription ?? ''}`, - '', - ].join('\n'); - - fs.writeFileSync(path.join(sectionDir, `${slug}.md`), content, 'utf-8'); - } -} - // ───────────────────────────────────────────── // Step 4 — Build and write /public/llms.txt // ───────────────────────────────────────────── @@ -491,7 +373,6 @@ async function main(): Promise { fs.mkdirSync(path.dirname(OUTPUT_FILE), { recursive: true }); fs.writeFileSync(OUTPUT_FILE, content, 'utf-8'); - writeSlugMarkdownFiles(pages); console.log( `\nSuccessfully mapped ${pages.length} routes to /public/${OUTPUT_FILENAME}`,