diff --git a/scripts/install-bundled-lsp.ts b/scripts/install-bundled-lsp.ts index f1b68152..3b020fb2 100644 --- a/scripts/install-bundled-lsp.ts +++ b/scripts/install-bundled-lsp.ts @@ -1,14 +1,18 @@ /** * Install LSP dependencies for all bundled extensions + * and copy tree-sitter WASM files to public directory * Runs automatically after `bun install` via postinstall hook */ import { existsSync } from "node:fs"; -import { readdir } from "node:fs/promises"; +import { copyFile, mkdir, readdir } from "node:fs/promises"; import { join } from "node:path"; import { $ } from "bun"; const BUNDLED_EXTENSIONS_DIR = "src/extensions/bundled"; +const TREE_SITTER_WASMS_DIR = "node_modules/tree-sitter-wasms/out"; +const PUBLIC_PARSERS_DIR = "public/tree-sitter/parsers"; +const PUBLIC_QUERIES_DIR = "public/tree-sitter/queries"; async function installBundledLspDependencies() { console.log("Installing bundled extension LSP dependencies..."); @@ -42,4 +46,83 @@ async function installBundledLspDependencies() { console.log("Bundled LSP installation complete."); } +/** + * Copy tree-sitter WASM files from node_modules to public directory + * Only copies parsers that are actually used by bundled extensions + */ +async function copyTreeSitterWasms() { + console.log("Copying tree-sitter WASM files..."); + + const wasmsDir = join(process.cwd(), TREE_SITTER_WASMS_DIR); + const publicDir = join(process.cwd(), PUBLIC_PARSERS_DIR); + + if (!existsSync(wasmsDir)) { + console.log(" tree-sitter-wasms not installed, skipping."); + return; + } + + // Ensure public directory exists + await mkdir(publicDir, { recursive: true }); + + // List of parsers to copy (used by bundled extensions) + // Note: Rust uses CDN (see src/extensions/languages/parser-cdn.ts) + const parsersToInstall = ["tree-sitter-tsx.wasm"]; + + for (const parser of parsersToInstall) { + const src = join(wasmsDir, parser); + const dest = join(publicDir, parser); + + if (existsSync(src)) { + try { + await copyFile(src, dest); + console.log(` Copied ${parser}`); + } catch (error) { + console.error(` Failed to copy ${parser}:`, error); + } + } else { + console.warn(` ${parser} not found in tree-sitter-wasms`); + } + } + + console.log("Tree-sitter WASM copy complete."); +} + +/** + * Copy highlight queries from bundled extensions to public directory + */ +async function copyHighlightQueries() { + console.log("Copying highlight queries..."); + + const bundledDir = join(process.cwd(), BUNDLED_EXTENSIONS_DIR); + const publicQueriesDir = join(process.cwd(), PUBLIC_QUERIES_DIR); + + // Map of bundled extension to query folder name + // Note: Rust uses CDN (see src/extensions/languages/parser-cdn.ts) + const extensionQueryMap: Record = { + typescript: "tsx", // TypeScript extension provides tsx queries + }; + + for (const [extName, queryFolder] of Object.entries(extensionQueryMap)) { + const srcQuery = join(bundledDir, extName, "queries", "highlights.scm"); + const destDir = join(publicQueriesDir, queryFolder); + const destQuery = join(destDir, "highlights.scm"); + + if (existsSync(srcQuery)) { + try { + await mkdir(destDir, { recursive: true }); + await copyFile(srcQuery, destQuery); + console.log(` Copied ${extName} queries to ${queryFolder}/`); + } catch (error) { + console.error(` Failed to copy ${extName} queries:`, error); + } + } else { + console.warn(` No queries found for ${extName}`); + } + } + + console.log("Highlight queries copy complete."); +} + await installBundledLspDependencies(); +await copyTreeSitterWasms(); +await copyHighlightQueries(); diff --git a/src/extensions/bundled/rust/extension.json b/src/extensions/bundled/rust/extension.json deleted file mode 100644 index 0a8f62d9..00000000 --- a/src/extensions/bundled/rust/extension.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "id": "athas.rust", - "name": "Rust", - "displayName": "Rust Language Support", - "description": "Rust language support with rust-analyzer LSP for IntelliSense, diagnostics, and more", - "version": "1.0.0", - "publisher": "Athas", - "categories": ["Language"], - "languages": [ - { - "id": "rust", - "extensions": [".rs"], - "aliases": ["Rust", "rs"] - } - ], - "grammar": { - "wasmPath": "/tree-sitter/parsers/tree-sitter-rust.wasm", - "highlightQueryPath": "/tree-sitter/queries/rust/highlights.scm", - "scopeName": "source.rust", - "languageId": "rust" - }, - "lsp": { - "server": { - "default": "rust-analyzer", - "darwin": "./lsp/rust-analyzer-darwin", - "linux": "./lsp/rust-analyzer-linux", - "win32": "./lsp/rust-analyzer.exe" - }, - "args": [], - "fileExtensions": [".rs"], - "languageIds": ["rust"], - "initializationOptions": { - "cargo": { - "buildScripts": { - "enable": true - } - }, - "procMacro": { - "enable": true - } - } - }, - "commands": [ - { - "command": "rust.restart", - "title": "Restart Rust Analyzer", - "category": "Rust" - }, - { - "command": "rust.toggle", - "title": "Toggle Rust Analyzer", - "category": "Rust" - } - ], - "activationEvents": ["onLanguage:rust"], - "icon": "icon.svg" -} diff --git a/src/extensions/bundled/rust/lsp/.gitkeep b/src/extensions/bundled/rust/lsp/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/src/extensions/bundled/typescript/queries/highlights.scm b/src/extensions/bundled/typescript/queries/highlights.scm index 1a34c2b0..3f5ad41e 100644 --- a/src/extensions/bundled/typescript/queries/highlights.scm +++ b/src/extensions/bundled/typescript/queries/highlights.scm @@ -1,9 +1,5 @@ -; Combined highlights for TSX (JavaScript + JSX + TypeScript) -; Based on tree-sitter-javascript and tree-sitter-typescript queries - -; ==================== -; JavaScript Base -; ==================== +; Highlights for TSX/TypeScript/JavaScript +; Simplified query compatible with tree-sitter-wasms ; Variables (identifier) @variable @@ -23,19 +19,10 @@ key: (property_identifier) @function.method value: [(function_expression) (arrow_function)]) -(assignment_expression - left: (member_expression - property: (property_identifier) @function.method) - right: [(function_expression) (arrow_function)]) - (variable_declarator name: (identifier) @function value: [(function_expression) (arrow_function)]) -(assignment_expression - left: (identifier) @function - right: [(function_expression) (arrow_function)]) - ; Function and method calls (call_expression function: (identifier) @function) @@ -55,14 +42,6 @@ ] @constant (#match? @constant "^[A-Z_][A-Z\\d_]+$")) -((identifier) @variable.builtin - (#match? @variable.builtin "^(arguments|module|console|window|document)$") - (#is-not? local)) - -((identifier) @function.builtin - (#eq? @function.builtin "require") - (#is-not? local)) - ; Literals (this) @variable.builtin (super) @variable.builtin @@ -84,7 +63,7 @@ (regex) @string.special (number) @number -; Tokens +; Punctuation [ ";" (optional_chain) @@ -196,24 +175,7 @@ "yield" ] @keyword -; ==================== -; JSX Extensions -; ==================== - -(jsx_opening_element (identifier) @tag (#match? @tag "^[a-z][^.]*$")) -(jsx_closing_element (identifier) @tag (#match? @tag "^[a-z][^.]*$")) -(jsx_self_closing_element (identifier) @tag (#match? @tag "^[a-z][^.]*$")) - -(jsx_attribute (property_identifier) @attribute) -(jsx_opening_element (["<" ">"]) @punctuation.bracket) -(jsx_closing_element ([""]) @punctuation.bracket) -(jsx_self_closing_element (["<" "/>"]) @punctuation.bracket) - -; ==================== ; TypeScript Extensions -; ==================== - -; Types (type_identifier) @type (predefined_type) @type.builtin @@ -224,11 +186,9 @@ "<" @punctuation.bracket ">" @punctuation.bracket) -; Variables (required_parameter (identifier) @variable.parameter) (optional_parameter (identifier) @variable.parameter) -; TypeScript Keywords [ "abstract" "declare" diff --git a/src/extensions/languages/language-packager.ts b/src/extensions/languages/language-packager.ts index 05e739ce..7d9a8a2c 100644 --- a/src/extensions/languages/language-packager.ts +++ b/src/extensions/languages/language-packager.ts @@ -4,10 +4,11 @@ */ import type { ExtensionManifest } from "../types/extension-manifest"; +import { getQueryCdnUrl, getWasmCdnUrl } from "./parser-cdn"; -// CDN base URL for downloading WASM parsers and highlight queries +// Fallback CDN base URL for languages not in parser-cdn.ts // Can be configured via environment variable -const CDN_BASE_URL = import.meta.env.VITE_PARSER_CDN_URL || "https://athas.dev/extensions"; +const FALLBACK_CDN_BASE_URL = import.meta.env.VITE_PARSER_CDN_URL || "https://athas.dev/extensions"; // Old manifest format from JSON files interface LanguageManifestFile { @@ -40,10 +41,6 @@ function convertLanguageManifest(manifest: LanguageManifestFile): ExtensionManif ext.startsWith(".") ? ext : `.${ext}`, ); - // Extract parser name from wasmPath - // "/tree-sitter/parsers/tree-sitter-javascript.wasm" -> "tree-sitter-javascript.wasm" - const wasmFileName = languageProvider.wasmPath.split("/").pop() || ""; - return { id: manifest.id, name: manifest.name, @@ -62,7 +59,7 @@ function convertLanguageManifest(manifest: LanguageManifestFile): ExtensionManif activationEvents: [`onLanguage:${languageProvider.id}`], // Extension is downloadable from CDN installation: { - downloadUrl: `${CDN_BASE_URL}/parsers/${wasmFileName}`, + downloadUrl: getWasmUrlForLanguage(languageProvider.id), size: 0, // Will be determined during download checksum: "", // Will be calculated after download minEditorVersion: "0.1.0", @@ -116,14 +113,28 @@ export function getLanguageExtensionByFileExt(fileExt: string): ExtensionManifes /** * Get WASM download URL for a language + * Uses GitHub/jsdelivr CDN if configured, otherwise falls back to athas.dev */ export function getWasmUrlForLanguage(languageId: string): string { - return `${CDN_BASE_URL}/parsers/tree-sitter-${languageId}.wasm`; + // Check if this language has a version-pinned CDN config + const cdnUrl = getWasmCdnUrl(languageId); + if (cdnUrl) { + return cdnUrl; + } + // Fall back to default CDN + return `${FALLBACK_CDN_BASE_URL}/parsers/tree-sitter-${languageId}.wasm`; } /** * Get highlight query URL for a language + * Uses GitHub CDN if configured, otherwise falls back to athas.dev */ export function getHighlightQueryUrl(languageId: string): string { - return `${CDN_BASE_URL}/queries/${languageId}/highlights.scm`; + // Check if this language has a version-pinned CDN config + const cdnUrl = getQueryCdnUrl(languageId); + if (cdnUrl) { + return cdnUrl; + } + // Fall back to default CDN + return `${FALLBACK_CDN_BASE_URL}/queries/${languageId}/highlights.scm`; } diff --git a/src/extensions/languages/parser-cdn.ts b/src/extensions/languages/parser-cdn.ts new file mode 100644 index 00000000..cc71fdfe --- /dev/null +++ b/src/extensions/languages/parser-cdn.ts @@ -0,0 +1,123 @@ +/** + * Parser CDN Configuration + * Maps languages to their CDN URLs with version pinning + * + * WASM files are served from jsdelivr (npm CDN) + * Highlight queries are fetched from GitHub tree-sitter repos + */ + +interface ParserCdnConfig { + /** Version of tree-sitter-wasms package */ + wasmsVersion: string; + /** Version tag of the tree-sitter grammar repo */ + grammarVersion: string; + /** Language ID used in tree-sitter (may differ from our languageId) */ + treeSitterLangId?: string; + /** GitHub organization (defaults to "tree-sitter") */ + githubOrg?: string; + /** Whether to omit "v" prefix in version tag (defaults to false) */ + noVersionPrefix?: boolean; +} + +/** Default version for tree-sitter-wasms package */ +const WASMS_VERSION = "0.1.13"; + +/** + * Version-pinned parser configurations + * These versions must match - the WASM in tree-sitter-wasms is built from the grammar version + */ +const PARSER_VERSIONS: Record = { + // Core languages + json: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.20.2" }, + python: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.21.0" }, + go: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.20.0" }, + rust: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.20.4" }, + java: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.20.2" }, + + // C family + c: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.20.8" }, + cpp: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.20.5" }, + c_sharp: { + wasmsVersion: WASMS_VERSION, + grammarVersion: "0.20.0", + treeSitterLangId: "c-sharp", + }, + + // Web + html: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.20.4" }, + css: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.20.0" }, + php: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.22.8" }, + + // Scripting + bash: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.20.5" }, + ruby: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.20.1" }, + + // Config files + toml: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.5.1" }, + + // Other languages with different GitHub orgs + kotlin: { + wasmsVersion: WASMS_VERSION, + grammarVersion: "0.3.8", + githubOrg: "fwcd", + noVersionPrefix: true, + }, + swift: { + wasmsVersion: WASMS_VERSION, + grammarVersion: "0.4.3", + githubOrg: "alex-pinkus", + noVersionPrefix: true, + }, + elixir: { + wasmsVersion: WASMS_VERSION, + grammarVersion: "0.1.1", + githubOrg: "elixir-lang", + }, + zig: { + wasmsVersion: WASMS_VERSION, + grammarVersion: "1.1.2", + githubOrg: "tree-sitter-grammars", + }, + ocaml: { wasmsVersion: WASMS_VERSION, grammarVersion: "0.20.4" }, +}; + +/** + * Get CDN URL for a language's WASM parser + * Uses jsdelivr which serves npm packages + */ +export function getWasmCdnUrl(languageId: string): string | null { + const config = PARSER_VERSIONS[languageId]; + if (!config) return null; + + const treeSitterLangId = config.treeSitterLangId || languageId; + return `https://cdn.jsdelivr.net/npm/tree-sitter-wasms@${config.wasmsVersion}/out/tree-sitter-${treeSitterLangId}.wasm`; +} + +/** + * Get CDN URL for a language's highlight query + * Uses raw GitHub URLs from tree-sitter repos + */ +export function getQueryCdnUrl(languageId: string): string | null { + const config = PARSER_VERSIONS[languageId]; + if (!config) return null; + + const treeSitterLangId = config.treeSitterLangId || languageId; + const githubOrg = config.githubOrg || "tree-sitter"; + const versionTag = config.noVersionPrefix ? config.grammarVersion : `v${config.grammarVersion}`; + + return `https://raw.githubusercontent.com/${githubOrg}/tree-sitter-${treeSitterLangId}/${versionTag}/queries/highlights.scm`; +} + +/** + * Check if a language has CDN configuration + */ +export function hasCdnConfig(languageId: string): boolean { + return languageId in PARSER_VERSIONS; +} + +/** + * Get all languages with CDN support + */ +export function getCdnSupportedLanguages(): string[] { + return Object.keys(PARSER_VERSIONS); +} diff --git a/src/features/editor/hooks/use-tokenizer.ts b/src/features/editor/hooks/use-tokenizer.ts index 6f630d37..1c30e135 100644 --- a/src/features/editor/hooks/use-tokenizer.ts +++ b/src/features/editor/hooks/use-tokenizer.ts @@ -4,6 +4,7 @@ */ import { useCallback, useRef, useState } from "react"; +import { getQueryCdnUrl, getWasmCdnUrl, hasCdnConfig } from "@/extensions/languages/parser-cdn"; import { EDITOR_CONSTANTS } from "@/features/editor/config/constants"; import { logger } from "@/features/editor/utils/logger"; import { indexedDBParserCache } from "../lib/wasm-parser/cache-indexeddb"; @@ -89,10 +90,16 @@ export function getLanguageId(filePath: string): string | null { } /** - * Map language IDs to their local WASM paths - * TypeScript and JavaScript use the tsx parser + * Get WASM path for a language - uses CDN if configured, otherwise local */ -function getLocalWasmPath(languageId: string): string { +function getWasmPath(languageId: string): string { + // Check if this language has CDN configuration + const cdnUrl = getWasmCdnUrl(languageId); + if (cdnUrl) { + return cdnUrl; + } + + // Fall back to local paths // TypeScript and JavaScript both use tsx parser if (languageId === "typescript" || languageId === "javascript") { return "/tree-sitter/parsers/tree-sitter-tsx.wasm"; @@ -101,13 +108,36 @@ function getLocalWasmPath(languageId: string): string { } /** - * Get the query folder for a language ID + * Get highlight query URL for a language - uses CDN if configured, otherwise local */ -function getQueryFolder(languageId: string): string { - if (languageId === "typescript" || languageId === "javascript") { - return "tsx"; +function getQueryUrl(languageId: string): string { + // Check if this language has CDN configuration + const cdnUrl = getQueryCdnUrl(languageId); + if (cdnUrl) { + return cdnUrl; } - return languageId; + + // Fall back to local paths + const queryFolder = + languageId === "typescript" || languageId === "javascript" ? "tsx" : languageId; + return `/tree-sitter/queries/${queryFolder}/highlights.scm`; +} + +/** + * Validate that a string is a valid tree-sitter query (not HTML or other error response) + */ +function isValidHighlightQuery(text: string | undefined): boolean { + if (!text || text.trim().length === 0) return false; + // Reject HTML responses (common 404 error pages) + if (text.trimStart().startsWith(" { - // TypeScript and JavaScript both use tsx queries + // First try CDN for languages with CDN config + const cdnUrl = getQueryCdnUrl(languageId); + if (cdnUrl) { + try { + const response = await fetch(cdnUrl); + if (response.ok) { + const text = await response.text(); + if (this.isValidHighlightQuery(text)) { + logger.debug("WasmParser", `Loaded highlight query from CDN for ${languageId}`); + return text; + } + logger.warn("WasmParser", `Invalid highlight query from CDN ${cdnUrl}`); + } + } catch { + logger.debug("WasmParser", `Failed to fetch highlight query from CDN for ${languageId}`); + } + } + + // Fall back to local path for languages without CDN config const queryFolder = languageId === "typescript" || languageId === "javascript" ? "tsx" : languageId; const localPath = `/tree-sitter/queries/${queryFolder}/highlights.scm`; try { const response = await fetch(localPath); if (response.ok) { - return await response.text(); + const text = await response.text(); + if (this.isValidHighlightQuery(text)) { + return text; + } + logger.warn( + "WasmParser", + `Invalid highlight query from ${localPath} (appears to be HTML or empty)`, + ); } } catch { logger.debug("WasmParser", `No local highlight query found at ${localPath}`); @@ -171,7 +219,12 @@ class WasmParserLoader { } private async _loadParserInternal(config: ParserConfig): Promise { - const { languageId, wasmPath, highlightQuery } = config; + const { languageId, wasmPath, highlightQuery: rawHighlightQuery } = config; + // Validate passed highlight query - reject if it looks like HTML + const highlightQuery = + rawHighlightQuery && this.isValidHighlightQuery(rawHighlightQuery) + ? rawHighlightQuery + : undefined; try { // Ensure Tree-sitter is initialized @@ -212,11 +265,43 @@ class WasmParserLoader { throw new Error(`Cache entry for ${languageId} has no WASM data`); } - // Use cached highlight query if available and not empty - // Prefer cached query over passed parameter if cached is non-empty - if (cached.highlightQuery && cached.highlightQuery.trim().length > 0) { + // Validate cached WASM has correct magic number + if ( + wasmBytes.length < 4 || + wasmBytes[0] !== 0x00 || + wasmBytes[1] !== 0x61 || + wasmBytes[2] !== 0x73 || + wasmBytes[3] !== 0x6d + ) { + logger.error( + "WasmParser", + `Cached WASM for ${languageId} is invalid (size: ${wasmBytes.length}), clearing cache`, + ); + await indexedDBParserCache.delete(languageId); + // Recursively call to re-download + return this._loadParserInternal(config); + } + + // Use cached highlight query if available, valid, and not empty + // Prefer cached query over passed parameter if cached is valid + if (cached.highlightQuery && this.isValidHighlightQuery(cached.highlightQuery)) { queryText = cached.highlightQuery; logger.debug("WasmParser", `Using cached highlight query for ${languageId}`); + } else if (cached.highlightQuery && !this.isValidHighlightQuery(cached.highlightQuery)) { + // Cached query is invalid (e.g., HTML error page), fetch fresh + logger.warn( + "WasmParser", + `Cached highlight query for ${languageId} is invalid, fetching fresh`, + ); + const freshQuery = await this.fetchLocalHighlightQuery(languageId); + if (freshQuery) { + queryText = freshQuery; + // Update cache with valid query + await indexedDBParserCache.set({ + ...cached, + highlightQuery: freshQuery, + }); + } } else if (!queryText) { logger.warn( "WasmParser", @@ -241,6 +326,24 @@ class WasmParserLoader { const arrayBuffer = await response.arrayBuffer(); wasmBytes = new Uint8Array(arrayBuffer); + // Validate WASM magic number (first 4 bytes should be \0asm) + if ( + wasmBytes.length < 4 || + wasmBytes[0] !== 0x00 || + wasmBytes[1] !== 0x61 || + wasmBytes[2] !== 0x73 || + wasmBytes[3] !== 0x6d + ) { + throw new Error( + `Downloaded file is not valid WASM (size: ${wasmBytes.length}, magic: ${wasmBytes.slice(0, 4)})`, + ); + } + + logger.info( + "WasmParser", + `Downloaded valid WASM for ${languageId} (${wasmBytes.length} bytes)`, + ); + // Cache for future use try { await indexedDBParserCache.set({ @@ -272,6 +375,24 @@ class WasmParserLoader { const arrayBuffer = await response.arrayBuffer(); wasmBytes = new Uint8Array(arrayBuffer); + // Validate WASM magic number + if ( + wasmBytes.length < 4 || + wasmBytes[0] !== 0x00 || + wasmBytes[1] !== 0x61 || + wasmBytes[2] !== 0x73 || + wasmBytes[3] !== 0x6d + ) { + throw new Error( + `Local file is not valid WASM (size: ${wasmBytes.length}, magic: ${wasmBytes.slice(0, 4)})`, + ); + } + + logger.info( + "WasmParser", + `Loaded valid WASM for ${languageId} (${wasmBytes.length} bytes)`, + ); + // Also fetch highlight query from local path if not provided if (!queryText) { const localQuery = await this.fetchLocalHighlightQuery(languageId); @@ -335,11 +456,15 @@ class WasmParserLoader { }) .catch(() => {}); } catch (localError) { + // Both queries failed - likely grammar mismatch (e.g., cached WASM lacks JSX support) + // Delete cached WASM and let it re-download on next load logger.error( "WasmParser", - `Local highlight query also failed for ${languageId}:`, + `Local highlight query also failed for ${languageId} - possible grammar mismatch, clearing cache`, localError, ); + await indexedDBParserCache.delete(languageId); + this.parsers.delete(languageId); } } }