From c22cf46cd67c8003983cf67a04b8d2e77bc338bb Mon Sep 17 00:00:00 2001 From: ro Date: Sun, 28 Dec 2025 17:02:08 -0800 Subject: [PATCH 01/35] direnv --- .envrc | 1 + .gitignore | 1 + 2 files changed, 2 insertions(+) create mode 100644 .envrc diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore index ea8c4bf..2d5df85 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +.direnv From 9496a364c2e7a969c86d126b1b9e5425dceb41e2 Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 13 Dec 2025 12:47:27 +0530 Subject: [PATCH 02/35] feat(code): init new code feature --- .gitignore | 2 + Cargo.lock | 59 +++++- Cargo.toml | 1 + flake.nix | 1 + registry.toml | 394 +++++++++++++++++++++++++++++++++++++++ test_project/src/lib.rs | 1 - test_project/src/main.rs | 1 - 7 files changed, 449 insertions(+), 10 deletions(-) create mode 100644 registry.toml delete mode 100644 test_project/src/lib.rs delete mode 100644 test_project/src/main.rs diff --git a/.gitignore b/.gitignore index 2d5df85..cdbe4ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /target .direnv + +test/ diff --git a/Cargo.lock b/Cargo.lock index 438cb1a..0e9dbb3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -472,7 +472,7 @@ dependencies = [ "crossterm_winapi", "mio", "parking_lot", - "rustix", + "rustix 0.38.44", "signal-hook", "signal-hook-mio", "winapi", @@ -687,6 +687,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "env_home" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" + [[package]] name = "equivalent" version = "1.0.2" @@ -984,6 +990,7 @@ dependencies = [ "toml", "url", "walkdir", + "which", ] [[package]] @@ -1519,9 +1526,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.169" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "libgit2-sys" @@ -1585,6 +1592,12 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + [[package]] name = "litemap" version = "0.7.4" @@ -2478,7 +2491,20 @@ dependencies = [ "bitflags 2.8.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags 2.8.0", + "errno", + "libc", + "linux-raw-sys 0.11.0", "windows-sys 0.59.0", ] @@ -2930,7 +2956,7 @@ dependencies = [ "fastrand", "getrandom 0.3.1", "once_cell", - "rustix", + "rustix 0.38.44", "windows-sys 0.59.0", ] @@ -3430,7 +3456,7 @@ checksum = "b7208998eaa3870dad37ec8836979581506e0c5c64c20c9e79e9d2a10d6f47bf" dependencies = [ "cc", "downcast-rs", - "rustix", + "rustix 0.38.44", "smallvec", "wayland-sys", ] @@ -3442,7 +3468,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2120de3d33638aaef5b9f4472bff75f07c56379cf76ea320bd3a3d65ecaf73f" dependencies = [ "bitflags 2.8.0", - "rustix", + "rustix 0.38.44", "wayland-backend", "wayland-scanner", ] @@ -3527,6 +3553,17 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" +[[package]] +name = "which" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fabb953106c3c8eea8306e4393700d7657561cb43122571b172bbfb7c7ba1d" +dependencies = [ + "env_home", + "rustix 1.1.2", + "winsafe", +] + [[package]] name = "winapi" version = "0.3.9" @@ -3725,6 +3762,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "winsafe" +version = "0.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" + [[package]] name = "wit-bindgen-rt" version = "0.33.0" @@ -3773,7 +3816,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d91ffca73ee7f68ce055750bf9f6eca0780b8c85eff9bc046a3b0da41755e12" dependencies = [ "gethostname", - "rustix", + "rustix 0.38.44", "x11rb-protocol", ] diff --git a/Cargo.toml b/Cargo.toml index a8c4e86..b388282 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ url = "2.5" git2 = "0.18" mockito = "1.4" num-format = { version = "0.4.4" } +which = "8.0.0" [build-dependencies] serde = { version = "1.0.217", features = ["derive"] } diff --git a/flake.nix b/flake.nix index 32848b4..883033e 100644 --- a/flake.nix +++ b/flake.nix @@ -75,6 +75,7 @@ pkg-config openssl cacert + tree-sitter ] ++ lib.optionals stdenv.isDarwin [ darwin.apple_sdk.frameworks.Security diff --git a/registry.toml b/registry.toml new file mode 100644 index 0000000..29f6e94 --- /dev/null +++ b/registry.toml @@ -0,0 +1,394 @@ +[[language]] +name = "go" +extensions = ["go"] +repo = "https://github.com/tree-sitter/tree-sitter-go" +branch = "master" +symbol = "tree_sitter_go" +definition_query = """ +( + (comment)* @doc + . + (function_declaration + name: (identifier) @name + body: (block) @body) @function.definition +) +( + (comment)* @doc + . + (method_declaration + name: (field_identifier) @name + body: (block) @body) @method.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (parenthesized_expression (identifier) @name) + (selector_expression field: (field_identifier) @name) + (parenthesized_expression (selector_expression field: (field_identifier) @name)) + ]) @reference.call +""" + +[[language]] +name = "zig" +extensions = ["zig"] +repo = "https://github.com/tree-sitter-grammars/tree-sitter-zig" +branch = "master" +symbol = "tree_sitter_zig" +definition_query = """ +( + (comment)* @doc + . + (function_declaration + name: (identifier) @name + body: (block) @body) @function.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (field_expression member: (identifier) @name) + ]) @reference.call +""" + +[[language]] +name = "c" +extensions = ["c", "h"] +repo = "https://github.com/tree-sitter/tree-sitter-c" +branch = "master" +symbol = "tree_sitter_c" +definition_query = """ +( + (comment)* @doc + . + (function_definition + declarator: [ + (function_declarator + declarator: (identifier) @name) + (pointer_declarator + declarator: (function_declarator + declarator: (identifier) @name)) + ] + body: (compound_statement) @body) @function.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (field_expression field: (field_identifier) @name) + (parenthesized_expression (identifier) @name) + ]) @reference.call +""" + +[[language]] +name = "cpp" +extensions = ["cpp", "cc", "cxx", "hpp", "hxx"] +repo = "https://github.com/tree-sitter/tree-sitter-cpp" +branch = "master" +symbol = "tree_sitter_cpp" +definition_query = """ +( + (comment)* @doc + . + (function_definition + declarator: (function_declarator + declarator: (identifier) @name) + body: (compound_statement) @body) @function.definition +) +( + (comment)* @doc + . + (function_definition + declarator: (function_declarator + declarator: (qualified_identifier + name: (identifier) @name)) + body: (compound_statement) @body) @method.definition +) +( + (comment)* @doc + . + (function_definition + declarator: (pointer_declarator + declarator: (function_declarator + declarator: (identifier) @name)) + body: (compound_statement) @body) @function.definition +) +( + (comment)* @doc + . + (function_definition + declarator: (reference_declarator + (function_declarator + declarator: (qualified_identifier + name: [(identifier) (operator_name) (destructor_name)] @name))) + body: (compound_statement) @body) @method.definition +) +( + (comment)* @doc + . + (function_definition + declarator: (function_declarator + declarator: (qualified_identifier + name: (destructor_name) @name)) + body: (compound_statement) @body) @method.definition +) +( + (comment)* @doc + . + (template_declaration + (function_definition + declarator: (function_declarator + declarator: (qualified_identifier + name: (identifier) @name)) + body: (compound_statement) @body)) @function.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (qualified_identifier name: (identifier) @name) + (template_function name: (identifier) @name) + (field_expression field: (field_identifier) @name) + ]) @reference.call +""" + +[[language]] +name = "bash" +extensions = ["sh", "bash"] +repo = "https://github.com/tree-sitter/tree-sitter-bash" +branch = "master" +symbol = "tree_sitter_bash" +definition_query = """ +( + (comment)* @doc + . + (function_definition + name: (word) @name + body: (compound_statement) @body) @function.definition +) +""" +call_query = """ +(command_name (word) @name) @reference.call +""" + +[[language]] +name = "python" +extensions = ["py"] +repo = "https://github.com/tree-sitter/tree-sitter-python" +branch = "master" +symbol = "tree_sitter_python" +definition_query = """ +( + (comment)* @doc + . + (function_definition + name: (identifier) @name + body: (block) @body) @function.definition +) +( + (comment)* @doc + . + (class_definition + name: (identifier) @name + body: (block) @body) @class.definition +) +""" +call_query = """ +(call + function: [ + (identifier) @name + (attribute attribute: (identifier) @name) + ]) @reference.call +""" + +[[language]] +name = "typescript" +extensions = ["ts", "mts", "cts"] +repo = "https://github.com/tree-sitter/tree-sitter-typescript" +branch = "master" +subpath = "typescript" +symbol = "tree_sitter_typescript" +definition_query = """ +( + (comment)* @doc + . + (function_declaration + name: (identifier) @name + body: (statement_block) @body) @function.definition +) +( + (comment)* @doc + . + (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (arrow_function + body: (_) @body))) @function.definition +) +( + (comment)* @doc + . + (method_definition + name: (property_identifier) @name + body: (statement_block) @body) @method.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (member_expression property: (property_identifier) @name) + ]) @reference.call +""" + +[[language]] +name = "rust" +extensions = ["rs"] +repo = "https://github.com/tree-sitter/tree-sitter-rust" +branch = "master" +symbol = "tree_sitter_rust" +definition_query = """ +( + (line_comment)* @doc + . + (function_item + name: (identifier) @name + body: (block) @body) @function.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (scoped_identifier name: (identifier) @name) + (field_expression field: (field_identifier) @name) + ]) @reference.call +""" + +[[language]] +name = "javascript" +extensions = ["js", "mjs", "cjs"] +repo = "https://github.com/tree-sitter/tree-sitter-javascript" +branch = "master" +symbol = "tree_sitter_javascript" +definition_query = """ +( + (comment)* @doc + . + (function_declaration + name: (identifier) @name + body: (statement_block) @body) @function.definition +) +( + (comment)* @doc + . + (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (arrow_function + body: (_) @body))) @function.definition +) +( + (comment)* @doc + . + (method_definition + name: (property_identifier) @name + body: (statement_block) @body) @method.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (member_expression property: (property_identifier) @name) + ]) @reference.call +""" + +[[language]] +name = "java" +extensions = ["java"] +repo = "https://github.com/tree-sitter/tree-sitter-java" +branch = "master" +symbol = "tree_sitter_java" +definition_query = """ +(class_body + (block_comment) @doc + . + (method_declaration + name: (identifier) @name + body: (_) @body) @method.definition +) +(class_body + (block_comment) @doc + . + (constructor_declaration + name: (identifier) @name + body: (_) @body) @method.definition +) +(method_declaration + name: (identifier) @name + body: (_) @body) @method.definition +(constructor_declaration + name: (identifier) @name + body: (_) @body) @method.definition +""" +call_query = """ +(method_invocation + name: (identifier) @name) @reference.call +(object_creation_expression + type: (type_identifier) @name) @reference.call +""" + +[[language]] +name = "scala" +extensions = ["scala", "sc"] +repo = "https://github.com/tree-sitter/tree-sitter-scala" +branch = "master" +symbol = "tree_sitter_scala" +definition_query = """ +( + (block_comment)* @doc + . + (function_definition + name: (identifier) @name + body: (_) @body) @function.definition +) +( + (block_comment)* @doc + . + (function_declaration + name: (identifier) @name) @function.definition +) +( + (block_comment)* @doc + . + (class_definition + name: (identifier) @name + body: (template_body) @body) @class.definition +) +( + (block_comment)* @doc + . + (object_definition + name: (identifier) @name + body: (template_body) @body) @object.definition +) +( + (block_comment)* @doc + . + (trait_definition + name: (identifier) @name + body: (template_body) @body) @trait.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (field_expression field: (identifier) @name) + ]) @reference.call +""" \ No newline at end of file diff --git a/test_project/src/lib.rs b/test_project/src/lib.rs deleted file mode 100644 index a61949e..0000000 --- a/test_project/src/lib.rs +++ /dev/null @@ -1 +0,0 @@ -pub fn helper() { println!("Helper function"); } diff --git a/test_project/src/main.rs b/test_project/src/main.rs deleted file mode 100644 index a3e978a..0000000 --- a/test_project/src/main.rs +++ /dev/null @@ -1 +0,0 @@ -fn main() { println!("Hello, world!"); } From 702a612a7b4647a68355bea3f8e62717a3b37d22 Mon Sep 17 00:00:00 2001 From: ro Date: Thu, 25 Dec 2025 07:08:32 +0530 Subject: [PATCH 03/35] refactor: reorganize into workspace with core, fetch, tui, cli crates - split monolithic src/ into focused crates: - glimpse-core: types, config, tokenizer, source detection - glimpse-fetch: git clone and url processing - glimpse-tui: file picker - glimpse (cli): binary, arg parsing, analyzer, output - migrate analyzer tests (16 tests for pattern matching, excludes, includes) - add AGENTS.md with development guidelines - clean dependency graph with core as shared foundation --- AGENTS.md | 166 +++++++++++ Cargo.lock | 274 ++++-------------- Cargo.toml | 64 ++-- crates/cli/Cargo.toml | 29 ++ {src => crates/cli/src}/analyzer.rs | 251 +++++----------- {src => crates/cli/src}/cli.rs | 100 ++++--- {src => crates/cli/src}/main.rs | 41 +-- {src => crates/cli/src}/output.rs | 195 +------------ crates/core/Cargo.toml | 22 ++ build.rs => crates/core/build.rs | 22 +- {src => crates/core/src}/config.rs | 11 +- crates/core/src/lib.rs | 12 + {src => crates/core/src}/source_detection.rs | 20 -- {src => crates/core/src}/tokenizer.rs | 14 +- crates/core/src/types.rs | 30 ++ crates/fetch/Cargo.toml | 17 ++ .../fetch/src/git.rs | 27 +- crates/fetch/src/lib.rs | 5 + .../fetch/src/url.rs | 113 +------- crates/tui/Cargo.toml | 17 ++ {src => crates/tui/src}/file_picker.rs | 26 +- crates/tui/src/lib.rs | 3 + 22 files changed, 586 insertions(+), 873 deletions(-) create mode 100644 AGENTS.md create mode 100644 crates/cli/Cargo.toml rename {src => crates/cli/src}/analyzer.rs (73%) rename {src => crates/cli/src}/cli.rs (66%) rename {src => crates/cli/src}/main.rs (87%) rename {src => crates/cli/src}/output.rs (59%) create mode 100644 crates/core/Cargo.toml rename build.rs => crates/core/build.rs (85%) rename {src => crates/core/src}/config.rs (91%) create mode 100644 crates/core/src/lib.rs rename {src => crates/core/src}/source_detection.rs (84%) rename {src => crates/core/src}/tokenizer.rs (89%) create mode 100644 crates/core/src/types.rs create mode 100644 crates/fetch/Cargo.toml rename src/git_processor.rs => crates/fetch/src/git.rs (81%) create mode 100644 crates/fetch/src/lib.rs rename src/url_processor.rs => crates/fetch/src/url.rs (76%) create mode 100644 crates/tui/Cargo.toml rename {src => crates/tui/src}/file_picker.rs (95%) create mode 100644 crates/tui/src/lib.rs diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..d5d1d31 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,166 @@ +# Glimpse Development Guide + +A blazingly fast tool for peeking at codebases. Perfect for loading your codebase into an LLM's context. + +## Task Tracking + +Check `.todo.md` for current tasks and next steps. Keep it updated: +- Mark items `[x]` when completed +- Add new tasks as they're discovered +- Reference it before asking "what's next?" + +## Build Commands + +```bash +cargo build # debug build +cargo build --release # release build +cargo run -- # run with arguments +cargo run -- . # analyze current directory +cargo run -- --help # show help +``` + +## Test Commands + +```bash +cargo test # run all tests +cargo test --package glimpse-core # run tests for specific crate +cargo test test_name # run single test by name +cargo test test_name -- --nocapture # run test with stdout +cargo test -- --test-threads=1 # run tests sequentially +``` + +## Lint & Format + +```bash +cargo fmt # format all code +cargo fmt -- --check # check formatting (CI) +cargo clippy # run linter +cargo clippy -- -D warnings # fail on warnings (CI) +``` + +## Project Structure + +``` +glimpse/ +├── crates/ +│ ├── core/ # shared types, config, tokenizer, source detection +│ ├── fetch/ # git clone + url/html processing +│ ├── tui/ # file picker, output formatting, pdf generation +│ └── cli/ # binary crate, arg parsing, directory analyzer +└── languages.yml # language definitions for source detection +``` + +## Code Style + +### No Comments + +Code should be self-documenting. The only acceptable documentation is: +- Brief `///` docstrings on public API functions that aren't obvious +- `//!` module-level docs when necessary + +```rust +// BAD: explaining what code does +// Check if the file is a source file +if is_source_file(path) { ... } + +// BAD: inline comments +let name = path.file_name(); // get the filename + +// GOOD: self-documenting code, no comments needed +if is_source_file(path) { ... } + +// GOOD: docstring for non-obvious public function +/// Extract interpreter from shebang line and exec pattern +fn extract_interpreter(data: &str) -> Option { ... } +``` + +### Import Order + +Group imports in this order, separated by blank lines: +1. `std` library +2. External crates (alphabetical) +3. Internal crates - prefer `super::` over `crate::` when possible + +```rust +use std::fs; +use std::path::{Path, PathBuf}; + +use anyhow::Result; +use serde::{Deserialize, Serialize}; + +use super::types::FileEntry; // preferred for sibling modules +use crate::config::Config; // only when super:: won't reach +``` + +### Error Handling + +- Use `anyhow::Result` for fallible functions +- Propagate errors with `?` operator +- Use `.expect("message")` only when failure is a bug +- Never use `.unwrap()` outside of tests +- Use `anyhow::bail!` for early returns with errors + +### Naming Conventions + +- `snake_case` for functions, methods, variables, modules +- `PascalCase` for types, traits, enums +- `SCREAMING_SNAKE_CASE` for constants +- Prefer descriptive names over abbreviations +- Boolean functions: `is_`, `has_`, `can_`, `should_` + +### Type Definitions + +- Derive common traits: `Debug`, `Clone`, `Serialize`, `Deserialize` +- Put derives in consistent order +- Use `pub` sparingly - only what's needed + +```rust +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileEntry { + pub path: PathBuf, + pub content: String, + pub size: u64, +} +``` + +### Function Style + +- Keep functions focused and small +- Use early returns for guard clauses +- Prefer iterators and combinators over loops when clearer +- Use `impl Trait` for return types when appropriate + +### Testing + +- Tests live in `#[cfg(test)] mod tests` at bottom of file +- Use descriptive test names: `test__` +- Use `tempfile` for filesystem tests +- Group related assertions + +### Workspace Dependencies + +Always use workspace dependencies in crate Cargo.toml: + +```toml +[dependencies] +anyhow.workspace = true +serde.workspace = true +glimpse-core.workspace = true +``` + +### Patterns to Follow + +- Use `Option` combinators: `.map()`, `.and_then()`, `.unwrap_or()` +- Use `Result` combinators: `.map_err()`, `.context()` +- Prefer `&str` over `String` in function parameters +- Use `impl AsRef` for path parameters when flexible +- Use builders for complex configuration + +### Patterns to Avoid + +- Comments explaining what code does (code should be obvious) +- Deeply nested code (use early returns) +- Magic numbers (use named constants) +- `clone()` when borrowing works +- `Box` (use `anyhow::Error`) +- Panicking in library code diff --git a/Cargo.lock b/Cargo.lock index 0e9dbb3..54d661c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -126,22 +126,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" -[[package]] -name = "assert-json-diff" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" -dependencies = [ - "serde", - "serde_json", -] - -[[package]] -name = "atomic-waker" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" - [[package]] name = "autocfg" version = "1.4.0" @@ -351,16 +335,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" -[[package]] -name = "colored" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" -dependencies = [ - "lazy_static", - "windows-sys 0.59.0", -] - [[package]] name = "compact_str" version = "0.8.1" @@ -472,7 +446,7 @@ dependencies = [ "crossterm_winapi", "mio", "parking_lot", - "rustix 0.38.44", + "rustix", "signal-hook", "signal-hook-mio", "winapi", @@ -687,12 +661,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "env_home" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" - [[package]] name = "equivalent" version = "1.0.2" @@ -967,30 +935,61 @@ dependencies = [ "arboard", "base64 0.22.1", "clap", - "colored", - "crossterm", - "dirs", - "git2", - "globset", + "glimpse-core", + "glimpse-fetch", + "glimpse-tui", "ignore", "indicatif", - "mockito", "num-format", - "once_cell", "printpdf", - "ratatui", "rayon", - "reqwest", - "scraper", + "serde", + "tempfile", +] + +[[package]] +name = "glimpse-core" +version = "0.7.8" +dependencies = [ + "anyhow", + "dirs", + "once_cell", "serde", "serde_yaml", "tempfile", "tiktoken-rs", "tokenizers", "toml", +] + +[[package]] +name = "glimpse-fetch" +version = "0.7.8" +dependencies = [ + "anyhow", + "arboard", + "git2", + "glimpse-core", + "indicatif", + "reqwest", + "scraper", + "tempfile", "url", - "walkdir", - "which", +] + +[[package]] +name = "glimpse-tui" +version = "0.7.8" +dependencies = [ + "anyhow", + "arboard", + "base64 0.22.1", + "crossterm", + "glimpse-core", + "ignore", + "num-format", + "printpdf", + "ratatui", ] [[package]] @@ -1017,26 +1016,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http 0.2.12", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "h2" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" -dependencies = [ - "atomic-waker", - "bytes", - "fnv", - "futures-core", - "futures-sink", - "http 1.2.0", + "http", "indexmap", "slab", "tokio", @@ -1103,17 +1083,6 @@ dependencies = [ "itoa", ] -[[package]] -name = "http" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - [[package]] name = "http-body" version = "0.4.6" @@ -1121,30 +1090,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", - "http 0.2.12", - "pin-project-lite", -] - -[[package]] -name = "http-body" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" -dependencies = [ - "bytes", - "http 1.2.0", -] - -[[package]] -name = "http-body-util" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" -dependencies = [ - "bytes", - "futures-util", - "http 1.2.0", - "http-body 1.0.1", + "http", "pin-project-lite", ] @@ -1170,9 +1116,9 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2 0.3.26", - "http 0.2.12", - "http-body 0.4.6", + "h2", + "http", + "http-body", "httparse", "httpdate", "itoa", @@ -1184,26 +1130,6 @@ dependencies = [ "want", ] -[[package]] -name = "hyper" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" -dependencies = [ - "bytes", - "futures-channel", - "futures-util", - "h2 0.4.7", - "http 1.2.0", - "http-body 1.0.1", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "smallvec", - "tokio", -] - [[package]] name = "hyper-tls" version = "0.5.0" @@ -1211,27 +1137,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper 0.14.32", + "hyper", "native-tls", "tokio", "tokio-native-tls", ] -[[package]] -name = "hyper-util" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" -dependencies = [ - "bytes", - "futures-util", - "http 1.2.0", - "http-body 1.0.1", - "hyper 1.6.0", - "pin-project-lite", - "tokio", -] - [[package]] name = "icu_collections" version = "1.5.0" @@ -1592,12 +1503,6 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" -[[package]] -name = "linux-raw-sys" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" - [[package]] name = "litemap" version = "0.7.4" @@ -1728,30 +1633,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "mockito" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "652cd6d169a36eaf9d1e6bce1a221130439a966d7f27858af66a33a66e9c4ee2" -dependencies = [ - "assert-json-diff", - "bytes", - "colored", - "futures-util", - "http 1.2.0", - "http-body 1.0.1", - "http-body-util", - "hyper 1.6.0", - "hyper-util", - "log", - "rand", - "regex", - "serde_json", - "serde_urlencoded", - "similar", - "tokio", -] - [[package]] name = "monostate" version = "0.1.13" @@ -2427,10 +2308,10 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2 0.3.26", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.32", + "h2", + "http", + "http-body", + "hyper", "hyper-tls", "ipnet", "js-sys", @@ -2491,20 +2372,7 @@ dependencies = [ "bitflags 2.8.0", "errno", "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - -[[package]] -name = "rustix" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" -dependencies = [ - "bitflags 2.8.0", - "errno", - "libc", - "linux-raw-sys 0.11.0", + "linux-raw-sys", "windows-sys 0.59.0", ] @@ -2760,12 +2628,6 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" -[[package]] -name = "similar" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" - [[package]] name = "siphasher" version = "0.3.11" @@ -2956,7 +2818,7 @@ dependencies = [ "fastrand", "getrandom 0.3.1", "once_cell", - "rustix 0.38.44", + "rustix", "windows-sys 0.59.0", ] @@ -3102,7 +2964,6 @@ dependencies = [ "bytes", "libc", "mio", - "parking_lot", "pin-project-lite", "socket2", "windows-sys 0.52.0", @@ -3456,7 +3317,7 @@ checksum = "b7208998eaa3870dad37ec8836979581506e0c5c64c20c9e79e9d2a10d6f47bf" dependencies = [ "cc", "downcast-rs", - "rustix 0.38.44", + "rustix", "smallvec", "wayland-sys", ] @@ -3468,7 +3329,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2120de3d33638aaef5b9f4472bff75f07c56379cf76ea320bd3a3d65ecaf73f" dependencies = [ "bitflags 2.8.0", - "rustix 0.38.44", + "rustix", "wayland-backend", "wayland-scanner", ] @@ -3553,17 +3414,6 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" -[[package]] -name = "which" -version = "8.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3fabb953106c3c8eea8306e4393700d7657561cb43122571b172bbfb7c7ba1d" -dependencies = [ - "env_home", - "rustix 1.1.2", - "winsafe", -] - [[package]] name = "winapi" version = "0.3.9" @@ -3762,12 +3612,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "winsafe" -version = "0.0.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" - [[package]] name = "wit-bindgen-rt" version = "0.33.0" @@ -3816,7 +3660,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d91ffca73ee7f68ce055750bf9f6eca0780b8c85eff9bc046a3b0da41755e12" dependencies = [ "gethostname", - "rustix 0.38.44", + "rustix", "x11rb-protocol", ] diff --git a/Cargo.toml b/Cargo.toml index b388282..39b4cfb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,40 +1,56 @@ -[package] -name = "glimpse" +[workspace] +resolver = "2" +members = [ + "crates/cli", + "crates/core", + "crates/fetch", + "crates/tui" +] + +[workspace.package] version = "0.7.8" edition = "2021" -description = "A blazingly fast tool for peeking at codebases. Perfect for loading your codebase into an LLM's context." license = "MIT" -build = "build.rs" -[dependencies] +[workspace.dependencies] +# Internal crates +glimpse-core = { path = "crates/core" } +glimpse-fetch = { path = "crates/fetch" } +glimpse-tui = { path = "crates/tui" } + +# Common dependencies anyhow = "1.0.95" -arboard = { version = "3.4.1", features = ["wayland-data-control"] } -base64 = "0.22.1" -clap = { version = "4.5.23", features = ["derive"] } -colored = "2.2.0" -crossterm = "0.28.1" +serde = { version = "1.0.217", features = ["derive"] } +rayon = "1.10.0" + +# Core dependencies dirs = "5.0.1" -globset = "0.4.15" -ignore = "0.4.23" -indicatif = "0.17.9" once_cell = "1.20.2" -printpdf = "0.7.0" -ratatui = "0.29.0" -rayon = "1.10.0" -serde = { version = "1.0.217", features = ["derive"] } tempfile = "3.14.0" tiktoken-rs = "0.6.0" tokenizers = { version = "0.21.0", features = ["http"] } toml = "0.8.19" -walkdir = "2.5.0" + +# Fetch dependencies +arboard = { version = "3.4.1", features = ["wayland-data-control"] } +git2 = "0.18" +indicatif = "0.17.9" reqwest = { version = "0.11", features = ["blocking"] } scraper = "0.18" url = "2.5" -git2 = "0.18" -mockito = "1.4" + +# TUI dependencies +base64 = "0.22.1" +crossterm = "0.28.1" +ignore = "0.4.23" num-format = { version = "0.4.4" } -which = "8.0.0" +printpdf = "0.7.0" +ratatui = "0.29.0" -[build-dependencies] -serde = { version = "1.0.217", features = ["derive"] } -serde_yaml = "0.9.34" +# CLI dependencies +clap = { version = "4.5.23", features = ["derive"] } + +# Test dependencies +mockito = "1.4" +walkdir = "2.5.0" +which = "8.0.0" diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml new file mode 100644 index 0000000..6d58158 --- /dev/null +++ b/crates/cli/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "glimpse" +version.workspace = true +edition.workspace = true +license.workspace = true +description = "A blazingly fast tool for peeking at codebases" + +[[bin]] +name = "glimpse" +path = "src/main.rs" + +[dependencies] +glimpse-core.workspace = true +glimpse-fetch.workspace = true +glimpse-tui.workspace = true + +anyhow.workspace = true +arboard.workspace = true +base64.workspace = true +clap.workspace = true +ignore.workspace = true +indicatif.workspace = true +num-format.workspace = true +printpdf.workspace = true +rayon.workspace = true +serde.workspace = true + +[dev-dependencies] +tempfile.workspace = true diff --git a/src/analyzer.rs b/crates/cli/src/analyzer.rs similarity index 73% rename from src/analyzer.rs rename to crates/cli/src/analyzer.rs index 2d350e9..150afde 100644 --- a/src/analyzer.rs +++ b/crates/cli/src/analyzer.rs @@ -1,26 +1,24 @@ -use crate::cli::{Cli, Exclude, OutputFormat, TokenizerType}; -use crate::file_picker::FilePicker; -use crate::output::{ - display_token_counts, generate_output, generate_pdf, handle_output, FileEntry, -}; -use crate::source_detection; -use crate::tokenizer::TokenCounter; +use std::fs; +use std::path::{Path, PathBuf}; + use anyhow::Result; use ignore::{overrides::OverrideBuilder, WalkBuilder}; use indicatif::{ProgressBar, ProgressStyle}; use rayon::prelude::*; -use std::fs; -use std::path::{Path, PathBuf}; + +use glimpse_core::{is_source_file, Exclude, FileEntry, OutputFormat, TokenCounter, TokenizerType}; +use glimpse_tui::FilePicker; + +use crate::cli::Cli; +use crate::output::{display_token_counts, generate_output, generate_pdf, handle_output}; pub fn process_directory(args: &Cli) -> Result<()> { - // Configure thread pool if specified if let Some(threads) = args.threads { rayon::ThreadPoolBuilder::new() .num_threads(threads) .build_global()?; } - // Set up progress bar let pb = ProgressBar::new_spinner(); pb.set_style( ProgressStyle::default_spinner() @@ -30,25 +28,25 @@ pub fn process_directory(args: &Cli) -> Result<()> { pb.set_message("Scanning files..."); let output_format = args - .output - .clone() + .get_output_format() .expect("output format should be set from config"); let entries = process_entries(args)?; pb.finish(); if let Some(pdf_path) = &args.pdf { - let pdf_data = generate_pdf(&entries, args.output.clone().unwrap_or(OutputFormat::Both))?; + let pdf_data = generate_pdf( + &entries, + args.get_output_format().unwrap_or(OutputFormat::Both), + )?; fs::write(pdf_path, pdf_data)?; println!("PDF output written to: {}", pdf_path.display()); } else { - // Determine project name for XML output let project_name = if args.xml { Some(determine_project_name(&args.paths)) } else { None }; - // Handle output (print/copy/save) let output = generate_output(&entries, output_format, args.xml, project_name)?; handle_output(output, args)?; } @@ -65,14 +63,12 @@ fn determine_project_name(paths: &[String]) -> String { if let Some(first_path) = paths.first() { let path = std::path::Path::new(first_path); - // If it's a directory, use its name if path.is_dir() { if let Some(name) = path.file_name() { return name.to_string_lossy().to_string(); } } - // If it's a file, use the parent directory name if path.is_file() { if let Some(parent) = path.parent() { if let Some(name) = parent.file_name() { @@ -81,7 +77,6 @@ fn determine_project_name(paths: &[String]) -> String { } } - // Fallback to just the path itself first_path.clone() } else { "project".to_string() @@ -89,7 +84,6 @@ fn determine_project_name(paths: &[String]) -> String { } fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) -> bool { - // Basic file checks if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) { return false; } @@ -97,7 +91,6 @@ fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) - let path = entry.path(); let max_size = args.max_size.expect("max_size should be set from config"); - // Size check if !entry .metadata() .map(|m| m.len() <= max_size) @@ -106,7 +99,6 @@ fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) - return false; } - // Handle replacement mode with --only-include if let Some(ref only_includes) = args.only_include { let matches_only_include = matches_include_patterns(path, only_includes, base_path); @@ -114,7 +106,6 @@ fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) - return false; } - // Apply excludes if any if let Some(ref excludes) = args.exclude { return !matches_exclude_patterns(path, excludes, base_path); } @@ -122,25 +113,20 @@ fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) - return true; } - // Handle additive mode - // Check if it's a source file - let is_source = source_detection::is_source_file(path); + let is_source = is_source_file(path); - // Check if it matches additional include patterns let matches_include = if let Some(ref includes) = args.include { matches_include_patterns(path, includes, base_path) } else { false }; - // Include if EITHER source file OR matches include patterns let should_include = is_source || matches_include; if !should_include { return false; } - // Apply excludes to the union if let Some(ref excludes) = args.exclude { return !matches_exclude_patterns(path, excludes, base_path); } @@ -151,27 +137,23 @@ fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) - fn matches_include_patterns(path: &Path, includes: &[String], base_path: &Path) -> bool { let mut override_builder = OverrideBuilder::new(base_path); - // Add include patterns (positive) for pattern in includes { if let Err(e) = override_builder.add(pattern) { eprintln!("Warning: Invalid include pattern '{pattern}': {e}"); } } - let overrides = override_builder.build().unwrap_or_else(|_| { - // Return a default override that matches nothing if build fails - OverrideBuilder::new(base_path).build().unwrap() - }); + let overrides = override_builder + .build() + .unwrap_or_else(|_| OverrideBuilder::new(base_path).build().unwrap()); let match_result = overrides.matched(path, false); - // Must be whitelisted and not ignored match_result.is_whitelist() && !match_result.is_ignore() } fn matches_exclude_patterns(path: &Path, excludes: &[Exclude], base_path: &Path) -> bool { let mut override_builder = OverrideBuilder::new(base_path); - // Add exclude patterns (negative) for exclude in excludes { match exclude { Exclude::Pattern(pattern) => { @@ -185,7 +167,6 @@ fn matches_exclude_patterns(path: &Path, excludes: &[Exclude], base_path: &Path) } } Exclude::File(file_path) => { - // Handle file exclusions if file_path.is_absolute() { if file_path.exists() { if let Ok(relative_path) = file_path.strip_prefix(base_path) { @@ -209,10 +190,9 @@ fn matches_exclude_patterns(path: &Path, excludes: &[Exclude], base_path: &Path) } } - let overrides = override_builder.build().unwrap_or_else(|_| { - // Return a default override that matches nothing if build fails - OverrideBuilder::new(base_path).build().unwrap() - }); + let overrides = override_builder + .build() + .unwrap_or_else(|_| OverrideBuilder::new(base_path).build().unwrap()); let match_result = overrides.matched(path, false); match_result.is_ignore() @@ -231,7 +211,6 @@ pub fn process_entries(args: &Cli) -> Result> { ); let selected_paths = picker.run()?; - // Process selected files selected_paths .into_iter() .filter_map(|path| { @@ -270,7 +249,6 @@ pub fn process_entries(args: &Cli) -> Result> { all_entries.extend(dir_entries); } else if path.is_file() { - // Process single file let entry = ignore::WalkBuilder::new(path) .build() .next() @@ -290,10 +268,10 @@ pub fn process_entries(args: &Cli) -> Result> { Ok(entries) } -// Removed the is_excluded function as it's now handled by WalkBuilder overrides - pub fn create_token_counter(args: &Cli) -> Result { - match args.tokenizer.as_ref().unwrap_or(&TokenizerType::Tiktoken) { + let tokenizer_type = args.get_tokenizer_type().unwrap_or(TokenizerType::Tiktoken); + + match tokenizer_type { TokenizerType::Tiktoken => { if let Some(model) = &args.model { TokenCounter::new(model) @@ -315,10 +293,8 @@ pub fn create_token_counter(args: &Cli) -> Result { fn process_file(entry: &ignore::DirEntry, base_path: &Path) -> Result { let relative_path = if base_path.is_file() { - // If base_path is a file, use the file name as the relative path base_path.file_name().map(PathBuf::from).unwrap_or_default() } else { - // Otherwise, strip the base path as usual entry.path().strip_prefix(base_path)?.to_path_buf() }; let content = fs::read_to_string(entry.path())?; @@ -337,11 +313,12 @@ mod tests { use std::io::Write; use tempfile::{tempdir, TempDir}; + use crate::cli::CliOutputFormat; + fn setup_test_directory() -> Result<(TempDir, Vec)> { let dir = tempdir()?; let mut created_files = Vec::new(); - // Create a nested directory structure with various file types let test_files = vec![ ("src/main.rs", "fn main() {}"), ("src/lib.rs", "pub fn lib() {}"), @@ -376,9 +353,9 @@ mod tests { include: None, only_include: None, exclude: None, - max_size: Some(10 * 1024 * 1024), // 10MB + max_size: Some(10 * 1024 * 1024), max_depth: Some(10), - output: Some(OutputFormat::Both), + output: Some(CliOutputFormat::Both), file: None, print: true, threads: None, @@ -403,11 +380,9 @@ mod tests { let main_rs_path = dir.path().join("src/main.rs"); let test_cases = vec![ - // Pattern exclusions (Exclude::Pattern("**/*.rs".to_string()), true), (Exclude::Pattern("**/*.js".to_string()), false), (Exclude::Pattern("test/**".to_string()), false), - // File exclusions (Exclude::File(main_rs_path.clone()), true), (Exclude::File(PathBuf::from("nonexistent.rs")), false), ]; @@ -417,8 +392,6 @@ mod tests { match &exclude { Exclude::Pattern(pattern) => { - // For patterns that should exclude, we need to add a "!" prefix - // to make them negative patterns (exclusions) let exclude_pattern = if !pattern.starts_with('!') { format!("!{pattern}") } else { @@ -428,13 +401,11 @@ mod tests { } Exclude::File(file_path) => { if file_path.exists() { - // Get the file path relative to the test directory let rel_path = if file_path.is_absolute() { file_path.strip_prefix(dir.path()).unwrap_or(file_path) } else { file_path }; - // Add as a negative pattern let pattern = format!("!{}", rel_path.display()); override_builder.add(&pattern).unwrap(); } @@ -458,11 +429,9 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test excluding all Rust files cli.exclude = Some(vec![Exclude::Pattern("**/*.rs".to_string())]); let entries = process_entries(&cli)?; - // Verify no .rs files were processed for entry in &entries { assert_ne!( entry.path.extension().and_then(|ext| ext.to_str()), @@ -472,7 +441,6 @@ mod tests { ); } - // Test excluding specific directories cli.exclude = Some(vec![ Exclude::Pattern("**/node_modules/**".to_string()), Exclude::Pattern("**/target/**".to_string()), @@ -480,7 +448,6 @@ mod tests { ]); let entries = process_entries(&cli)?; - // Verify excluded directories were not processed for entry in &entries { let path_str = entry.path.to_string_lossy(); assert!( @@ -500,14 +467,11 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test including additional Rust files (should get all source files) cli.include = Some(vec!["**/*.rs".to_string()]); let entries = process_entries(&cli)?; - // Should include all source files (since .rs is already a source extension) assert!(!entries.is_empty(), "Should have found files"); - // Should include source files: .rs, .py, .md let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) @@ -516,23 +480,19 @@ mod tests { assert!(extensions.contains(&"py")); assert!(extensions.contains(&"md")); - // Test including a non-source extension as additional cli.include = Some(vec!["**/*.xyz".to_string()]); - - // Create a .xyz file fs::write(dir.path().join("test.xyz"), "data")?; let entries = process_entries(&cli)?; - // Should include BOTH .xyz files AND normal source files let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"xyz")); // Additional pattern - assert!(extensions.contains(&"rs")); // Normal source file - assert!(extensions.contains(&"py")); // Normal source file - assert!(extensions.contains(&"md")); // Normal source file + assert!(extensions.contains(&"xyz")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); Ok(()) } @@ -542,16 +502,13 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test additional includes with excludes - should get all source files plus additional, minus excludes cli.include = Some(vec!["**/*.xyz".to_string()]); cli.exclude = Some(vec![Exclude::Pattern("**/test.rs".to_string())]); - // Create a .xyz file fs::write(dir.path().join("test.xyz"), "data")?; let entries = process_entries(&cli)?; - // Should include all source files + .xyz files, but exclude test.rs assert!(!entries.is_empty(), "Should have found files"); let extensions: Vec<_> = entries @@ -559,13 +516,11 @@ mod tests { .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - // Should have .xyz (additional) plus source files (.rs, .py, .md) - assert!(extensions.contains(&"xyz")); // Additional pattern - assert!(extensions.contains(&"rs")); // Source files (but not test.rs) - assert!(extensions.contains(&"py")); // Source files - assert!(extensions.contains(&"md")); // Source files + assert!(extensions.contains(&"xyz")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); - // Verify test.rs was excluded for entry in &entries { assert!( !entry.path.to_string_lossy().contains("test.rs"), @@ -574,12 +529,10 @@ mod tests { ); } - // Test excluding a directory cli.include = Some(vec!["**/*.xyz".to_string()]); cli.exclude = Some(vec![Exclude::Pattern("**/nested/**".to_string())]); let entries = process_entries(&cli)?; - // Should include source files + .xyz, but exclude nested directory for entry in &entries { assert!( !entry.path.to_string_lossy().contains("nested"), @@ -596,15 +549,11 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test with depth limit of 1 cli.max_depth = Some(1); process_directory(&cli)?; - // Verify only top-level files were processed - // Test with depth limit of 2 cli.max_depth = Some(2); process_directory(&cli)?; - // Verify files up to depth 2 were processed Ok(()) } @@ -614,15 +563,11 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test without hidden files cli.hidden = false; process_directory(&cli)?; - // Verify hidden files were not processed - // Test with hidden files cli.hidden = true; process_directory(&cli)?; - // Verify hidden files were processed Ok(()) } @@ -634,7 +579,6 @@ mod tests { let cli = create_test_cli(rust_file); process_directory(&cli)?; - // Verify single file was processed correctly Ok(()) } @@ -643,19 +587,16 @@ mod tests { fn test_include_patterns_extend_source_detection() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create a .peb file (not recognized by source detection) let peb_path = dir.path().join("template.peb"); let mut peb_file = File::create(&peb_path)?; writeln!(peb_file, "template content")?; - // Create a .xyz file (also not recognized) let xyz_path = dir.path().join("data.xyz"); let mut xyz_file = File::create(&xyz_path)?; writeln!(xyz_file, "data content")?; let mut cli = create_test_cli(dir.path()); - // Test 1: Without include patterns, non-source files should be excluded cli.include = None; let entries = process_entries(&cli)?; assert!(!entries @@ -665,21 +606,18 @@ mod tests { .iter() .any(|e| e.path.extension().and_then(|ext| ext.to_str()) == Some("xyz"))); - // Test 2: With include patterns, should ADD to source detection cli.include = Some(vec!["*.peb".to_string()]); let entries = process_entries(&cli)?; - // Should include .peb files PLUS all normal source files let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"peb")); // Additional pattern - assert!(extensions.contains(&"rs")); // Normal source file - assert!(extensions.contains(&"py")); // Normal source file - assert!(extensions.contains(&"md")); // Normal source file + assert!(extensions.contains(&"peb")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); - // Test 3: Multiple include patterns (additive) cli.include = Some(vec!["*.peb".to_string(), "*.xyz".to_string()]); let entries = process_entries(&cli)?; @@ -687,13 +625,12 @@ mod tests { .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"peb")); // Additional pattern - assert!(extensions.contains(&"xyz")); // Additional pattern - assert!(extensions.contains(&"rs")); // Normal source file - assert!(extensions.contains(&"py")); // Normal source file - assert!(extensions.contains(&"md")); // Normal source file + assert!(extensions.contains(&"peb")); + assert!(extensions.contains(&"xyz")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); - // Test 4: Include + exclude patterns (union then subtract) cli.include = Some(vec!["*.peb".to_string(), "*.xyz".to_string()]); cli.exclude = Some(vec![Exclude::Pattern("*.xyz".to_string())]); let entries = process_entries(&cli)?; @@ -702,11 +639,11 @@ mod tests { .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"peb")); // Additional pattern, not excluded - assert!(!extensions.contains(&"xyz")); // Additional pattern, but excluded - assert!(extensions.contains(&"rs")); // Normal source file, not excluded - assert!(extensions.contains(&"py")); // Normal source file, not excluded - assert!(extensions.contains(&"md")); // Normal source file, not excluded + assert!(extensions.contains(&"peb")); + assert!(!extensions.contains(&"xyz")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); Ok(()) } @@ -715,7 +652,6 @@ mod tests { fn test_backward_compatibility_no_patterns() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Add some non-source files that should be ignored by default let binary_path = dir.path().join("binary.bin"); fs::write(&binary_path, b"\x00\x01\x02\x03")?; @@ -724,12 +660,10 @@ mod tests { let mut cli = create_test_cli(dir.path()); - // Test 1: No patterns specified - should only get source files cli.include = None; cli.exclude = None; let entries = process_entries(&cli)?; - // Should find source files (.rs, .py, .md) but not .bin or .conf let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) @@ -741,19 +675,17 @@ mod tests { assert!(!extensions.contains(&"bin")); assert!(!extensions.contains(&"conf")); - // Test 2: Only exclude patterns - should work as before cli.exclude = Some(vec![Exclude::Pattern("**/*.rs".to_string())]); let entries = process_entries(&cli)?; - // Should still apply source detection, but exclude .rs files let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(!extensions.contains(&"rs")); // Excluded - assert!(extensions.contains(&"py")); // Source file, not excluded - assert!(!extensions.contains(&"bin")); // Not source file + assert!(!extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(!extensions.contains(&"bin")); Ok(()) } @@ -762,23 +694,19 @@ mod tests { fn test_single_file_processing_with_patterns() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create a .peb file let peb_path = dir.path().join("template.peb"); fs::write(&peb_path, "template content")?; - // Test 1: Single .peb file without include patterns - should be rejected let mut cli = create_test_cli(&peb_path); cli.paths = vec![peb_path.to_string_lossy().to_string()]; cli.include = None; let entries = process_entries(&cli)?; assert_eq!(entries.len(), 0); - // Test 2: Single .peb file WITH include patterns - should be accepted cli.include = Some(vec!["*.peb".to_string()]); let entries = process_entries(&cli)?; assert_eq!(entries.len(), 1); - // Test 3: Single .rs file with exclude pattern - should be rejected let rs_path = dir.path().join("src/main.rs"); cli.paths = vec![rs_path.to_string_lossy().to_string()]; cli.include = None; @@ -793,45 +721,38 @@ mod tests { fn test_pattern_edge_cases() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create various test files fs::write(dir.path().join("test.peb"), "content")?; fs::write(dir.path().join("test.xyz"), "content")?; fs::write(dir.path().join("script.py"), "print('test')")?; let mut cli = create_test_cli(dir.path()); - // Test 1: Empty include patterns (edge case) - should still get source files cli.include = Some(vec![]); let entries = process_entries(&cli)?; - // With empty include patterns, should still get source files assert!(!entries.is_empty()); let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"rs")); // Source files should still be included + assert!(extensions.contains(&"rs")); assert!(extensions.contains(&"py")); assert!(extensions.contains(&"md")); - // Test 2: Include pattern that matches source files (additive) cli.include = Some(vec!["**/*.py".to_string()]); let entries = process_entries(&cli)?; - // Should include source files + additional .py matches let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"py")); // Both existing and additional - assert!(extensions.contains(&"rs")); // Source files - assert!(extensions.contains(&"md")); // Source files + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"md")); - // Test 3: Include everything, then exclude cli.include = Some(vec!["**/*".to_string()]); cli.exclude = Some(vec![Exclude::Pattern("**/*.rs".to_string())]); let entries = process_entries(&cli)?; - // Should include everything (.peb, .xyz, .py, .md from both source detection and include pattern) but not .rs files let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) @@ -851,19 +772,15 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test 1: Invalid glob pattern (this should not panic) cli.include = Some(vec!["[invalid".to_string()]); let _entries = process_entries(&cli)?; - // Should handle gracefully, possibly matching nothing - // Test 2: Mix of valid and invalid patterns cli.include = Some(vec![ "**/*.rs".to_string(), "[invalid".to_string(), "**/*.py".to_string(), ]); let _entries = process_entries(&cli)?; - // Should process valid patterns, ignore invalid ones Ok(()) } @@ -872,7 +789,6 @@ mod tests { fn test_include_patterns_are_additional() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create a .peb file fs::write(dir.path().join("template.peb"), "template content")?; let mut cli = create_test_cli(dir.path()); @@ -880,18 +796,16 @@ mod tests { let entries = process_entries(&cli)?; - // Should include BOTH .peb files AND normal source files let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"peb")); // Additional pattern - assert!(extensions.contains(&"rs")); // Normal source file - assert!(extensions.contains(&"py")); // Normal source file - assert!(extensions.contains(&"md")); // Normal source file + assert!(extensions.contains(&"peb")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); - // Should be more than just the .peb file assert!(entries.len() > 1); Ok(()) @@ -901,21 +815,18 @@ mod tests { fn test_only_include_replacement_behavior() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create various test files including non-source files fs::write(dir.path().join("config.conf"), "key=value")?; fs::write(dir.path().join("data.toml"), "[section]\nkey = 'value'")?; fs::write(dir.path().join("template.peb"), "template content")?; let mut cli = create_test_cli(dir.path()); - // Test 1: --only-include should ONLY include specified patterns, no other files cli.only_include = Some(vec!["*.conf".to_string()]); let entries = process_entries(&cli)?; assert_eq!(entries.len(), 1); assert!(entries[0].path.extension().and_then(|ext| ext.to_str()) == Some("conf")); - // Verify no other files are included let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) @@ -925,7 +836,6 @@ mod tests { assert!(!extensions.contains(&"md")); assert!(!extensions.contains(&"toml")); - // Test 2: Multiple patterns in --only-include cli.only_include = Some(vec!["*.conf".to_string(), "*.toml".to_string()]); let entries = process_entries(&cli)?; @@ -936,10 +846,9 @@ mod tests { .collect(); assert!(extensions.contains(&"conf")); assert!(extensions.contains(&"toml")); - assert!(!extensions.contains(&"rs")); // No other files + assert!(!extensions.contains(&"rs")); assert!(!extensions.contains(&"py")); - // Test 3: --only-include with exclude patterns cli.only_include = Some(vec![ "*.conf".to_string(), "*.toml".to_string(), @@ -948,22 +857,21 @@ mod tests { cli.exclude = Some(vec![Exclude::Pattern("*.toml".to_string())]); let entries = process_entries(&cli)?; - assert_eq!(entries.len(), 2); // conf and peb, but not toml (excluded) + assert_eq!(entries.len(), 2); let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); assert!(extensions.contains(&"conf")); assert!(extensions.contains(&"peb")); - assert!(!extensions.contains(&"toml")); // Excluded - assert!(!extensions.contains(&"rs")); // No other files + assert!(!extensions.contains(&"toml")); + assert!(!extensions.contains(&"rs")); - // Test 4: --only-include with pattern that matches nothing cli.only_include = Some(vec!["*.nonexistent".to_string()]); cli.exclude = None; let entries = process_entries(&cli)?; - assert_eq!(entries.len(), 0); // Should match nothing + assert_eq!(entries.len(), 0); Ok(()) } @@ -972,32 +880,27 @@ mod tests { fn test_only_include_vs_include_behavior_difference() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create a non-source file fs::write(dir.path().join("config.conf"), "key=value")?; let mut cli = create_test_cli(dir.path()); - // Test additive behavior with --include cli.include = Some(vec!["*.conf".to_string()]); cli.only_include = None; let additive_entries = process_entries(&cli)?; - // Should include conf + source files let additive_extensions: Vec<_> = additive_entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(additive_extensions.contains(&"conf")); // Additional pattern - assert!(additive_extensions.contains(&"rs")); // Source files - assert!(additive_extensions.contains(&"py")); // Source files - assert!(additive_extensions.contains(&"md")); // Source files + assert!(additive_extensions.contains(&"conf")); + assert!(additive_extensions.contains(&"rs")); + assert!(additive_extensions.contains(&"py")); + assert!(additive_extensions.contains(&"md")); - // Test replacement behavior with --only-include cli.include = None; cli.only_include = Some(vec!["*.conf".to_string()]); let replacement_entries = process_entries(&cli)?; - // Should include ONLY conf files assert_eq!(replacement_entries.len(), 1); assert!( replacement_entries[0] @@ -1011,12 +914,11 @@ mod tests { .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(replacement_extensions.contains(&"conf")); // Only pattern - assert!(!replacement_extensions.contains(&"rs")); // No source files - assert!(!replacement_extensions.contains(&"py")); // No source files - assert!(!replacement_extensions.contains(&"md")); // No source files + assert!(replacement_extensions.contains(&"conf")); + assert!(!replacement_extensions.contains(&"rs")); + assert!(!replacement_extensions.contains(&"py")); + assert!(!replacement_extensions.contains(&"md")); - // Verify the counts are different assert!(additive_entries.len() > replacement_entries.len()); Ok(()) @@ -1026,32 +928,27 @@ mod tests { fn test_only_include_single_file_processing() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create and test single file processing with a truly non-source file let config_path = dir.path().join("config.conf"); fs::write(&config_path, "key=value")?; let mut cli = create_test_cli(&config_path); cli.paths = vec![config_path.to_string_lossy().to_string()]; - // Test 1: Single non-source file without --only-include should be rejected cli.only_include = None; let entries = process_entries(&cli)?; assert_eq!(entries.len(), 0); - // Test 2: Single non-source file WITH --only-include should be accepted cli.only_include = Some(vec!["*.conf".to_string()]); let entries = process_entries(&cli)?; assert_eq!(entries.len(), 1); assert!(entries[0].path.extension().and_then(|ext| ext.to_str()) == Some("conf")); - // Test 3: Single source file WITH --only-include that doesn't match should be rejected let rs_path = dir.path().join("src/main.rs"); cli.paths = vec![rs_path.to_string_lossy().to_string()]; cli.only_include = Some(vec!["*.conf".to_string()]); let entries = process_entries(&cli)?; assert_eq!(entries.len(), 0); - // Test 4: Single source file WITH --only-include that matches should be accepted cli.only_include = Some(vec!["*.rs".to_string()]); let entries = process_entries(&cli)?; assert_eq!(entries.len(), 1); diff --git a/src/cli.rs b/crates/cli/src/cli.rs similarity index 66% rename from src/cli.rs rename to crates/cli/src/cli.rs index d8147e4..f1ed9ab 100644 --- a/src/cli.rs +++ b/crates/cli/src/cli.rs @@ -1,27 +1,58 @@ -use crate::config::Config; +use std::path::PathBuf; + use clap::{Parser, ValueEnum}; use serde::{Deserialize, Serialize}; -use std::path::PathBuf; + +use glimpse_core::{BackwardsCompatOutputFormat, Config, Exclude, OutputFormat, TokenizerType}; #[derive(Debug, Clone, ValueEnum, Serialize, Deserialize)] -pub enum OutputFormat { +pub enum CliOutputFormat { Tree, Files, Both, } +impl From for OutputFormat { + fn from(format: CliOutputFormat) -> Self { + match format { + CliOutputFormat::Tree => OutputFormat::Tree, + CliOutputFormat::Files => OutputFormat::Files, + CliOutputFormat::Both => OutputFormat::Both, + } + } +} + +impl From for CliOutputFormat { + fn from(format: OutputFormat) -> Self { + match format { + OutputFormat::Tree => CliOutputFormat::Tree, + OutputFormat::Files => CliOutputFormat::Files, + OutputFormat::Both => CliOutputFormat::Both, + } + } +} + +impl From for CliOutputFormat { + fn from(format: BackwardsCompatOutputFormat) -> Self { + let output_format: OutputFormat = format.into(); + output_format.into() + } +} + #[derive(Debug, Clone, ValueEnum)] -pub enum TokenizerType { +pub enum CliTokenizerType { Tiktoken, #[clap(name = "huggingface")] HuggingFace, } -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(untagged)] -pub enum Exclude { - File(PathBuf), - Pattern(String), +impl From for TokenizerType { + fn from(t: CliTokenizerType) -> Self { + match t { + CliTokenizerType::Tiktoken => TokenizerType::Tiktoken, + CliTokenizerType::HuggingFace => TokenizerType::HuggingFace, + } + } } #[derive(Parser, Debug, Clone)] @@ -31,95 +62,72 @@ pub enum Exclude { version )] pub struct Cli { - /// Files or directories to analyze (multiple allowed), or a single URL/git repository #[arg(default_value = ".")] pub paths: Vec, - /// Print the config file path and exit #[arg(long)] pub config_path: bool, - /// Additional patterns to include (e.g. "*.rs,*.go") - adds to source file detection #[arg(short, long, value_delimiter = ',')] pub include: Option>, - /// Only include files matching these patterns (e.g. "*.yml,*.toml") - replaces source file detection #[arg(long, value_delimiter = ',')] pub only_include: Option>, - /// Additional patterns to exclude #[arg(short, long, value_parser = parse_exclude, value_delimiter = ',')] pub exclude: Option>, - /// Maximum file size in bytes #[arg(short, long)] pub max_size: Option, - /// Maximum directory depth #[arg(long)] pub max_depth: Option, - /// Output format (tree, files, or both) #[arg(short, long, value_enum)] - pub output: Option, + pub output: Option, - /// Output file path (optional) #[arg(short = 'f', long, num_args = 0..=1, default_missing_value = "GLIMPSE.md")] pub file: Option, - /// Init glimpse config file #[arg(long, default_value_t = false)] pub config: bool, - /// Print to stdout instead #[arg(short, long)] pub print: bool, - /// Number of threads for parallel processing #[arg(short, long)] pub threads: Option, - /// Show hidden files and directories #[arg(short = 'H', long)] pub hidden: bool, - /// Don't respect .gitignore files #[arg(long)] pub no_ignore: bool, - /// Ignore Token Count #[arg(long)] pub no_tokens: bool, - /// Tokenizer to use (tiktoken or huggingface) #[arg(long, value_enum)] - pub tokenizer: Option, + pub tokenizer: Option, - /// Model to use for HuggingFace tokenizer #[arg(long)] pub model: Option, - /// Path to local tokenizer file #[arg(long)] pub tokenizer_file: Option, - /// Interactive mode #[arg(long)] pub interactive: bool, - /// Output as Pdf #[arg(long)] pub pdf: Option, - /// Traverse sublinks when processing URLs #[arg(long)] pub traverse_links: bool, - /// Maximum depth to traverse sublinks (default: 1) #[arg(long)] pub link_depth: Option, - /// Output in XML format for better LLM compatibility #[arg(short = 'x', long)] pub xml: bool, } @@ -128,14 +136,11 @@ impl Cli { pub fn parse_with_config(config: &Config) -> anyhow::Result { let mut cli = Self::parse(); - // Apply config defaults if CLI args aren't specified cli.max_size = cli.max_size.or(Some(config.max_size)); cli.max_depth = cli.max_depth.or(Some(config.max_depth)); - cli.output = cli.output.or(Some(OutputFormat::from( - config.default_output_format.clone(), - ))); + let output_format: OutputFormat = config.default_output_format.clone().into(); + cli.output = cli.output.or(Some(CliOutputFormat::from(output_format))); - // Merge excludes from config and CLI if let Some(mut excludes) = cli.exclude.take() { excludes.extend(config.default_excludes.clone()); cli.exclude = Some(excludes); @@ -143,19 +148,17 @@ impl Cli { cli.exclude = Some(config.default_excludes.clone()); } - // Set default tokenizer if none specified but token counting is enabled if !cli.no_tokens && cli.tokenizer.is_none() { cli.tokenizer = Some(match config.default_tokenizer.as_str() { - "huggingface" => TokenizerType::HuggingFace, - _ => TokenizerType::Tiktoken, + "huggingface" => CliTokenizerType::HuggingFace, + _ => CliTokenizerType::Tiktoken, }); } - // Set default model for HuggingFace if none specified if cli .tokenizer .as_ref() - .is_some_and(|t| matches!(t, TokenizerType::HuggingFace)) + .is_some_and(|t| matches!(t, CliTokenizerType::HuggingFace)) && cli.model.is_none() && cli.tokenizer_file.is_none() { @@ -172,7 +175,6 @@ impl Cli { } pub fn validate_args(&self, is_url: bool) -> anyhow::Result<()> { - // Validate that both include and only_include are not used together if self.include.is_some() && self.only_include.is_some() { return Err(anyhow::anyhow!( "Cannot use both --include and --only-include flags together. Use --include for additive behavior (add to source files) or --only-include for replacement behavior (only specified patterns)." @@ -195,6 +197,14 @@ impl Cli { } Ok(()) } + + pub fn get_output_format(&self) -> Option { + self.output.clone().map(|f| f.into()) + } + + pub fn get_tokenizer_type(&self) -> Option { + self.tokenizer.clone().map(|t| t.into()) + } } fn parse_exclude(value: &str) -> Result { diff --git a/src/main.rs b/crates/cli/src/main.rs similarity index 87% rename from src/main.rs rename to crates/cli/src/main.rs index adfc044..bb5c3f5 100644 --- a/src/main.rs +++ b/crates/cli/src/main.rs @@ -1,24 +1,20 @@ mod analyzer; mod cli; -mod config; -mod file_picker; -mod git_processor; mod output; -mod source_detection; -mod tokenizer; -mod url_processor; -use crate::analyzer::process_directory; -use crate::cli::Cli; -use crate::config::{ - get_config_path, load_config, load_repo_config, save_config, save_repo_config, RepoConfig, -}; -use crate::git_processor::GitProcessor; -use crate::url_processor::UrlProcessor; use std::fs; use std::io::{self, Write}; use std::path::{Path, PathBuf}; +use glimpse_core::{ + get_config_path, load_config, load_repo_config, save_config, save_repo_config, + BackwardsCompatOutputFormat, RepoConfig, +}; +use glimpse_fetch::{GitProcessor, UrlProcessor}; + +use crate::analyzer::process_directory; +use crate::cli::Cli; + fn is_url_or_git(path: &str) -> bool { GitProcessor::is_git_url(path) || path.starts_with("http://") || path.starts_with("https://") } @@ -62,7 +58,6 @@ fn main() -> anyhow::Result<()> { save_repo_config(&glimpse_file, &repo_config)?; println!("Configuration saved to {}", glimpse_file.display()); - // If the user explicitly saved a config, remove this directory from the skipped list if let Ok(canonical_root) = std::fs::canonicalize(&root_dir) { let root_str = canonical_root.to_string_lossy().to_string(); if let Some(pos) = config @@ -79,7 +74,6 @@ fn main() -> anyhow::Result<()> { let repo_config = load_repo_config(&glimpse_file)?; apply_repo_config(&mut args, &repo_config); } else if has_custom_options(&args) { - // Determine canonical root directory path for consistent tracking let canonical_root = std::fs::canonicalize(&root_dir).unwrap_or(root_dir.clone()); let root_str = canonical_root.to_string_lossy().to_string(); @@ -96,7 +90,6 @@ fn main() -> anyhow::Result<()> { save_repo_config(&glimpse_file, &repo_config)?; println!("Configuration saved to {}", glimpse_file.display()); - // In case it was previously skipped, remove from skipped list if let Some(pos) = config .skipped_prompt_repos .iter() @@ -106,7 +99,6 @@ fn main() -> anyhow::Result<()> { save_config(&config)?; } } else { - // Record that user declined for this project config.skipped_prompt_repos.push(root_str); save_config(&config)?; } @@ -139,15 +131,12 @@ fn main() -> anyhow::Result<()> { } let process_args = if subpaths.is_empty() { - // No subpaths specified, process the whole repo args.with_path(repo_path.to_str().unwrap()) } else { - // Process only the specified subpaths inside the repo let mut new_args = args.clone(); new_args.paths = subpaths .iter() .map(|sub| { - // Join with repo_path let mut joined = std::path::PathBuf::from(&repo_path); joined.push(sub); joined.to_string_lossy().to_string() @@ -170,7 +159,6 @@ fn main() -> anyhow::Result<()> { } else if args.print { println!("{content}"); } else { - // Default behavior for URLs if no -f or --print: copy to clipboard match arboard::Clipboard::new() .and_then(|mut clipboard| clipboard.set_text(content)) { @@ -196,14 +184,12 @@ fn find_containing_dir_with_glimpse(path: &Path) -> anyhow::Result { path.to_path_buf() }; - // Try to find a .glimpse file or go up until we reach the root loop { if current.join(".glimpse").exists() { return Ok(current); } if !current.pop() { - // If we can't go up anymore, just use the original path return Ok(if path.is_file() { path.parent().unwrap_or(Path::new(".")).to_path_buf() } else { @@ -214,14 +200,14 @@ fn find_containing_dir_with_glimpse(path: &Path) -> anyhow::Result { } fn create_repo_config_from_args(args: &Cli) -> RepoConfig { - use crate::config::BackwardsCompatOutputFormat; - RepoConfig { include: args.include.clone(), exclude: args.exclude.clone(), max_size: args.max_size, max_depth: args.max_depth, - output: args.output.clone().map(BackwardsCompatOutputFormat::from), + output: args + .get_output_format() + .map(BackwardsCompatOutputFormat::from), file: args.file.clone(), hidden: Some(args.hidden), no_ignore: Some(args.no_ignore), @@ -246,7 +232,8 @@ fn apply_repo_config(args: &mut Cli, repo_config: &RepoConfig) { } if let Some(ref output) = repo_config.output { - args.output = Some((*output).clone().into()); + let output_format: glimpse_core::OutputFormat = (*output).clone().into(); + args.output = Some(output_format.into()); } if let Some(ref file) = repo_config.file { diff --git a/src/output.rs b/crates/cli/src/output.rs similarity index 59% rename from src/output.rs rename to crates/cli/src/output.rs index ee305e4..5b0ddce 100644 --- a/src/output.rs +++ b/crates/cli/src/output.rs @@ -1,19 +1,14 @@ -use crate::{ - cli::{Cli, OutputFormat}, - tokenizer::TokenCounter, -}; +use std::fs; +use std::io::BufWriter; + use anyhow::Result; use base64::Engine; use num_format::{Buffer, Locale}; use printpdf::*; -use std::{fs, io::BufWriter, path::PathBuf}; -#[derive(Debug, Clone)] -pub struct FileEntry { - pub path: PathBuf, - pub content: String, - pub size: u64, -} +use glimpse_core::{FileEntry, OutputFormat, TokenCounter}; + +use crate::cli::Cli; pub fn generate_output( entries: &[FileEntry], @@ -73,7 +68,6 @@ pub fn generate_output( } } - // Add summary if xml_format { output.push_str("\n"); output.push_str(&format!("Total files: {}\n", entries.len())); @@ -117,7 +111,6 @@ pub fn display_token_counts(token_counter: TokenCounter, entries: &[FileEntry]) println!("Total tokens: {}", buf.as_str()); println!("\nBreakdown by file:"); - // Sorting breakdown let mut breakdown = token_count.breakdown; breakdown.sort_by(|(_, a), (_, b)| b.cmp(a)); let top_files = breakdown.iter().take(15); @@ -134,7 +127,6 @@ fn generate_tree(entries: &[FileEntry]) -> Result { let mut output = String::new(); let mut current_path = vec![]; - // Sort entries by path to ensure consistent output let mut sorted_entries = entries.to_vec(); sorted_entries.sort_by(|a, b| a.path.cmp(&b.path)); @@ -144,7 +136,6 @@ fn generate_tree(entries: &[FileEntry]) -> Result { for (i, component) in components.iter().enumerate() { if i >= current_path.len() || component != ¤t_path[i] { let prefix = " ".repeat(i); - // Always use └── for the last component of a file path if i == components.len() - 1 { output.push_str(&format!( "{}└── {}\n", @@ -152,7 +143,6 @@ fn generate_tree(entries: &[FileEntry]) -> Result { component.as_os_str().to_string_lossy() )); } else { - // For directories, check if it's the last one at this level let is_last_dir = sorted_entries .iter() .filter_map(|e| e.path.components().nth(i)) @@ -204,7 +194,6 @@ fn generate_files(entries: &[FileEntry], xml_format: bool) -> Result { } fn try_copy_with_osc52(content: &str) -> Result<(), Box> { - // OSC 52 sequence to set clipboard for special cases (like SSH) print!( "\x1B]52;c;{}\x07", base64::engine::general_purpose::STANDARD.encode(content) @@ -213,12 +202,10 @@ fn try_copy_with_osc52(content: &str) -> Result<(), Box> } pub fn handle_output(content: String, args: &Cli) -> Result<()> { - // Print to stdout if no other output method is specified if args.print { println!("{content}"); } - // Copy to clipboard if requested if !args.print { match arboard::Clipboard::new().and_then(|mut clipboard| clipboard.set_text(content.clone())) { Ok(_) => println!("Context prepared! Paste into your LLM of choice + Profit."), @@ -231,7 +218,6 @@ pub fn handle_output(content: String, args: &Cli) -> Result<()> { } } - // Write to file if path provided if let Some(file_path) = &args.file { fs::write(file_path, content)?; println!("Output written to: {}", file_path.display()); @@ -247,7 +233,6 @@ pub fn generate_pdf(entries: &[FileEntry], format: OutputFormat) -> Result { current_layer.use_text( @@ -270,7 +255,6 @@ pub fn generate_pdf(entries: &[FileEntry], format: OutputFormat) -> Result Result Result Result Vec { - vec![ - FileEntry { - path: PathBuf::from("src/main.rs"), - content: "fn main() {}\n".to_string(), - size: 12, - }, - FileEntry { - path: PathBuf::from("src/lib/utils.rs"), - content: "pub fn helper() {}\n".to_string(), - size: 18, - }, - ] - } - - #[test] - fn test_tree_output() { - let entries = create_test_entries(); - let tree = generate_tree(&entries).unwrap(); - let expected = "└── src/\n ├── lib/\n └── utils.rs\n └── main.rs\n"; - assert_eq!( - tree, expected, - "Tree output doesn't match expected structure" - ); - } - - #[test] - fn test_files_output() { - let entries = create_test_entries(); - let files = generate_files(&entries, false).unwrap(); - let expected = format!( - "\nFile: {}\n{}\n{}\n\nFile: {}\n{}\n{}\n", - "src/main.rs", - "=".repeat(48), - "fn main() {}\n", - "src/lib/utils.rs", - "=".repeat(48), - "pub fn helper() {}\n" - ); - assert_eq!(files, expected); - } - - #[test] - fn test_generate_output() { - let entries = create_test_entries(); - - // Test tree format - let tree_output = generate_output(&entries, OutputFormat::Tree, false, None).unwrap(); - assert!(tree_output.contains("Directory Structure:")); - assert!(tree_output.contains("src/")); - assert!(tree_output.contains("main.rs")); - - // Test files format - let files_output = generate_output(&entries, OutputFormat::Files, false, None).unwrap(); - assert!(files_output.contains("File Contents:")); - assert!(files_output.contains("fn main()")); - assert!(files_output.contains("pub fn helper()")); - - // Test both format - let both_output = generate_output(&entries, OutputFormat::Both, false, None).unwrap(); - assert!(both_output.contains("Directory Structure:")); - assert!(both_output.contains("File Contents:")); - } - - #[test] - fn test_xml_output() { - let entries = create_test_entries(); - - // Test XML tree format - let xml_tree_output = generate_output( - &entries, - OutputFormat::Tree, - true, - Some("test_project".to_string()), - ) - .unwrap(); - assert!(xml_tree_output.contains("")); - assert!(xml_tree_output.contains("")); - assert!(xml_tree_output.contains("")); - assert!(xml_tree_output.contains("")); - assert!(xml_tree_output.contains("")); - assert!(xml_tree_output.contains("")); - - // Test XML files format - let xml_files_output = generate_output( - &entries, - OutputFormat::Files, - true, - Some("test_project".to_string()), - ) - .unwrap(); - assert!(xml_files_output.contains("")); - assert!(xml_files_output.contains("")); - assert!(xml_files_output.contains("")); - assert!(xml_files_output.contains("")); - assert!(xml_files_output.contains("")); - assert!(xml_files_output.contains("")); - - // Test XML both format - let xml_both_output = generate_output( - &entries, - OutputFormat::Both, - true, - Some("test_project".to_string()), - ) - .unwrap(); - assert!(xml_both_output.contains("")); - assert!(xml_both_output.contains("")); - assert!(xml_both_output.contains("")); - assert!(xml_both_output.contains("")); - assert!(xml_both_output.contains("")); - assert!(xml_both_output.contains("")); - } - - #[test] - fn test_handle_output() { - use tempfile::tempdir; - - let temp_dir = tempdir().unwrap(); - let temp_file = temp_dir.path().join("test_output.txt"); - - let content = "Test content".to_string(); - let args = Cli { - config: false, - paths: vec![".".to_string()], - include: None, - only_include: None, - exclude: None, - max_size: Some(1000), - max_depth: Some(10), - output: Some(OutputFormat::Both), - file: Some(temp_file.clone()), - print: false, - threads: None, - hidden: false, - no_ignore: false, - no_tokens: true, - model: None, - tokenizer: Some(crate::cli::TokenizerType::Tiktoken), - tokenizer_file: None, - interactive: false, - pdf: None, - traverse_links: false, - link_depth: None, - config_path: false, - xml: false, - }; - - handle_output(content.clone(), &args).unwrap(); - - // Verify file content - let file_content = std::fs::read_to_string(temp_file).unwrap(); - assert_eq!(file_content, content); - } -} diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml new file mode 100644 index 0000000..a1e75aa --- /dev/null +++ b/crates/core/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "glimpse-core" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +anyhow.workspace = true +dirs.workspace = true +once_cell.workspace = true +serde.workspace = true +tempfile.workspace = true +tiktoken-rs.workspace = true +tokenizers.workspace = true +toml.workspace = true + +[build-dependencies] +serde = { version = "1.0.217", features = ["derive"] } +serde_yaml = "0.9" + +[dev-dependencies] +tempfile.workspace = true diff --git a/build.rs b/crates/core/build.rs similarity index 85% rename from build.rs rename to crates/core/build.rs index b8c4423..a37c73c 100644 --- a/build.rs +++ b/crates/core/build.rs @@ -19,22 +19,29 @@ struct Language { } fn main() { - println!("cargo:rerun-if-changed=languages.yml"); + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let languages_path = Path::new(&manifest_dir) + .parent() + .unwrap() + .parent() + .unwrap() + .join("languages.yml"); + + println!( + "cargo:rerun-if-changed={}", + languages_path.to_string_lossy() + ); - // Read and parse languages.yml let yaml_content = - std::fs::read_to_string("languages.yml").expect("Failed to read languages.yml"); + std::fs::read_to_string(&languages_path).expect("Failed to read languages.yml"); let languages: HashMap = serde_yaml::from_str(&yaml_content).expect("Failed to parse languages.yml"); - // Generate the rust code let mut code = String::new(); - // Add the use statements code.push_str("use once_cell::sync::Lazy;\n"); code.push_str("use std::collections::HashSet;\n\n"); - // Generate source extensions set code.push_str("pub static SOURCE_EXTENSIONS: Lazy> = Lazy::new(|| {\n"); code.push_str(" let mut set = HashSet::new();\n\n"); @@ -48,7 +55,6 @@ fn main() { code.push_str(" set\n"); code.push_str("});\n\n"); - // Generate filename mappings code.push_str("pub static KNOWN_FILENAMES: Lazy> = Lazy::new(|| {\n"); code.push_str(" let mut set = HashSet::new();\n\n"); @@ -61,7 +67,6 @@ fn main() { code.push_str(" set\n"); code.push_str("});\n\n"); - // Generate interpreter mappings code.push_str("pub static INTERPRETER_NAMES: Lazy> = Lazy::new(|| {\n"); code.push_str(" let mut set = HashSet::new();\n\n"); @@ -74,7 +79,6 @@ fn main() { code.push_str(" set\n"); code.push_str("});\n"); - // Write the generated code to file let out_dir = std::env::var_os("OUT_DIR").unwrap(); let dest_path = Path::new(&out_dir).join("languages.rs"); let mut f = File::create(dest_path).unwrap(); diff --git a/src/config.rs b/crates/core/src/config.rs similarity index 91% rename from src/config.rs rename to crates/core/src/config.rs index e9817e2..facd3ab 100644 --- a/src/config.rs +++ b/crates/core/src/config.rs @@ -2,7 +2,7 @@ use std::path::{Path, PathBuf}; use serde::{Deserialize, Serialize}; -use crate::cli::{Exclude, OutputFormat}; +use crate::types::{Exclude, OutputFormat}; #[derive(Debug, Serialize, Clone)] #[serde(into = "String")] @@ -37,7 +37,7 @@ impl<'de> Deserialize<'de> for BackwardsCompatOutputFormat { "tree" => OutputFormat::Tree, "files" => OutputFormat::Files, "both" => OutputFormat::Both, - _ => OutputFormat::Both, // Default to Both for unknown values + _ => OutputFormat::Both, }; Ok(BackwardsCompatOutputFormat(format)) } @@ -83,9 +83,6 @@ pub struct Config { #[serde(default)] pub traverse_links: bool, - /// List of canonical project directories for which the user has already declined to - /// save a local `.glimpse` configuration file. When a directory is present in this - /// list Glimpse will not prompt the user again. #[serde(default)] pub skipped_prompt_repos: Vec, } @@ -128,11 +125,9 @@ fn default_output_format() -> BackwardsCompatOutputFormat { fn default_excludes() -> Vec { vec![ - // Version control Exclude::Pattern("**/.git/**".to_string()), Exclude::Pattern("**/.svn/**".to_string()), Exclude::Pattern("**/.hg/**".to_string()), - // Build artifacts and dependencies Exclude::Pattern("**/target/**".to_string()), Exclude::Pattern("**/node_modules/**".to_string()), Exclude::Pattern("**/dist/**".to_string()), @@ -175,7 +170,6 @@ pub fn get_config_path() -> anyhow::Result { } } - // Fall back to platform-specific directory let config_dir = dirs::config_dir() .ok_or_else(|| anyhow::anyhow!("Could not determine config directory"))? .join("glimpse"); @@ -210,7 +204,6 @@ pub fn load_repo_config(path: &Path) -> anyhow::Result { } } -/// Persist the provided global configuration to disk, overriding any existing config file. pub fn save_config(config: &Config) -> anyhow::Result<()> { let config_path = get_config_path()?; diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs new file mode 100644 index 0000000..9b01bdd --- /dev/null +++ b/crates/core/src/lib.rs @@ -0,0 +1,12 @@ +pub mod config; +pub mod source_detection; +pub mod tokenizer; +pub mod types; + +pub use config::{ + get_config_path, load_config, load_repo_config, save_config, save_repo_config, + BackwardsCompatOutputFormat, Config, RepoConfig, +}; +pub use source_detection::is_source_file; +pub use tokenizer::{TokenCount, TokenCounter, TokenizerBackend}; +pub use types::{Exclude, FileEntry, OutputFormat, TokenizerType}; diff --git a/src/source_detection.rs b/crates/core/src/source_detection.rs similarity index 84% rename from src/source_detection.rs rename to crates/core/src/source_detection.rs index c65bd24..d9218f3 100644 --- a/src/source_detection.rs +++ b/crates/core/src/source_detection.rs @@ -1,10 +1,8 @@ use std::fs; use std::path::Path; -// Include the generated code include!(concat!(env!("OUT_DIR"), "/languages.rs")); -/// Extract interpreter from shebang line and exec pattern fn extract_interpreter(data: &str) -> Option { let lines: Vec<&str> = data.lines().take(2).collect(); @@ -35,7 +33,6 @@ fn extract_interpreter(data: &str) -> Option { let mut script = first_part.split('/').next_back()?.to_string(); - // Handle /usr/bin/env if script == "env" { for part in path.split_whitespace().skip(1) { if !part.starts_with('-') && !part.contains('=') { @@ -43,13 +40,11 @@ fn extract_interpreter(data: &str) -> Option { break; } } - // If we only found env with no valid interpreter, return None if script == "env" { return None; } } - // Strip version numbers (python2.7 -> python2) if let Some(idx) = script.find(|c: char| c.is_ascii_digit()) { if let Some(dot_idx) = script[idx..].find('.') { script.truncate(idx + dot_idx); @@ -59,30 +54,25 @@ fn extract_interpreter(data: &str) -> Option { Some(script) } -/// Detect language from shebang fn detect_by_shebang(data: &str) -> bool { extract_interpreter(data) .map(|script| INTERPRETER_NAMES.contains(script.as_str())) .unwrap_or(false) } -/// Checks if a given path is a source file pub fn is_source_file(path: &Path) -> bool { - // Check known filenames first if let Some(name) = path.file_name().and_then(|n| n.to_str()) { if KNOWN_FILENAMES.contains(name) { return true; } } - // Then check extension if let Some(ext) = path.extension().and_then(|e| e.to_str()) { if SOURCE_EXTENSIONS.contains(ext.to_lowercase().as_str()) { return true; } } - // Finally check shebang match fs::read_to_string(path) { Ok(content) => detect_by_shebang(&content), Err(_) => false, @@ -99,19 +89,14 @@ mod tests { #[test] fn test_interpreter_extraction() { let cases = vec![ - // Basic cases ("#!/usr/bin/python", Some("python")), ("#!/bin/bash", Some("bash")), - // env cases with flags and vars ("#!/usr/bin/env python", Some("python")), ("#!/usr/bin/env -S python3 -u", Some("python3")), ("#!/usr/bin/env FOO=bar python", Some("python")), - // Version stripping ("#!/usr/bin/python2.7", Some("python2")), ("#!/usr/bin/ruby1.9.3", Some("ruby1")), - // exec patterns ("#!/bin/sh\nexec python \"$0\" \"$@\"", Some("python")), - // Invalid cases ("no shebang", None), ("#!/", None), ("", None), @@ -130,19 +115,14 @@ mod tests { fn test_source_detection() { let dir = tempdir().unwrap(); - // Test cases: (filename, content, expected) let test_cases = vec![ - // Extensions ("test.rs", "", true), ("test.py", "", true), ("test.js", "", true), - // Known filenames ("Makefile", "", true), ("Dockerfile", "", true), - // Shebangs ("script", "#!/usr/bin/env python\nprint('hi')", true), ("run", "#!/usr/bin/node\nconsole.log()", true), - // Non-source files ("random.xyz", "", false), ("not-script", "just some text", false), ]; diff --git a/src/tokenizer.rs b/crates/core/src/tokenizer.rs similarity index 89% rename from src/tokenizer.rs rename to crates/core/src/tokenizer.rs index b68df15..8fee8f7 100644 --- a/src/tokenizer.rs +++ b/crates/core/src/tokenizer.rs @@ -1,8 +1,11 @@ -use anyhow::{anyhow, Result}; use std::path::PathBuf; + +use anyhow::{anyhow, Result}; use tiktoken_rs::get_bpe_from_model; use tokenizers::Tokenizer as HfTokenizer; +use crate::types::FileEntry; + pub enum TokenizerBackend { Tiktoken(tiktoken_rs::CoreBPE), HuggingFace(Box), @@ -10,7 +13,7 @@ pub enum TokenizerBackend { pub struct TokenCount { pub total_tokens: usize, - pub breakdown: Vec<(PathBuf, usize)>, // (file_path, token_count) + pub breakdown: Vec<(PathBuf, usize)>, } pub struct TokenCounter { @@ -57,10 +60,7 @@ impl TokenCounter { pub fn count_tokens(&self, text: &str) -> Result { match &self.backend { - TokenizerBackend::Tiktoken(bpe) => { - // tiktoken's encode_with_special_tokens is infallible - Ok(bpe.encode_with_special_tokens(text).len()) - } + TokenizerBackend::Tiktoken(bpe) => Ok(bpe.encode_with_special_tokens(text).len()), TokenizerBackend::HuggingFace(tokenizer) => tokenizer .encode(text, false) .map_err(|e| anyhow!("Failed to encode text with HuggingFace tokenizer: {}", e)) @@ -68,7 +68,7 @@ impl TokenCounter { } } - pub fn count_files(&self, entries: &[super::output::FileEntry]) -> Result { + pub fn count_files(&self, entries: &[FileEntry]) -> Result { let mut total_tokens = 0; let mut breakdown = Vec::new(); diff --git a/crates/core/src/types.rs b/crates/core/src/types.rs new file mode 100644 index 0000000..d0584cf --- /dev/null +++ b/crates/core/src/types.rs @@ -0,0 +1,30 @@ +use std::path::PathBuf; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum OutputFormat { + Tree, + Files, + Both, +} + +#[derive(Debug, Clone)] +pub enum TokenizerType { + Tiktoken, + HuggingFace, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum Exclude { + File(PathBuf), + Pattern(String), +} + +#[derive(Debug, Clone)] +pub struct FileEntry { + pub path: PathBuf, + pub content: String, + pub size: u64, +} diff --git a/crates/fetch/Cargo.toml b/crates/fetch/Cargo.toml new file mode 100644 index 0000000..e6968e0 --- /dev/null +++ b/crates/fetch/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "glimpse-fetch" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +glimpse-core.workspace = true + +anyhow.workspace = true +arboard.workspace = true +git2.workspace = true +indicatif.workspace = true +reqwest.workspace = true +scraper.workspace = true +tempfile.workspace = true +url.workspace = true diff --git a/src/git_processor.rs b/crates/fetch/src/git.rs similarity index 81% rename from src/git_processor.rs rename to crates/fetch/src/git.rs index 4fe34f7..45faf6f 100644 --- a/src/git_processor.rs +++ b/crates/fetch/src/git.rs @@ -1,6 +1,7 @@ +use std::path::PathBuf; + use anyhow::Result; use git2::Repository; -use std::path::PathBuf; use tempfile::TempDir; use url::Url; @@ -49,12 +50,6 @@ impl GitProcessor { } } -impl Drop for GitProcessor { - fn drop(&mut self) { - // Temp directory will be automatically cleaned up when dropped - } -} - #[cfg(test)] mod tests { use super::*; @@ -109,7 +104,7 @@ mod tests { temp_path.exists(), "Temp directory should exist during processor lifetime" ); - } // processor is dropped here + } assert!( !temp_path.exists(), "Temp directory should be cleaned up after drop" @@ -138,20 +133,4 @@ mod tests { assert_eq!(repo_name, expected_name, "Failed for URL: {url}"); } } - - #[test] - fn test_process_repo_creates_directory() { - let test_repo = "https://github.com/rust-lang/rust-analyzer.git"; - if let Ok(processor) = GitProcessor::new() { - match processor.process_repo(test_repo) { - Ok(path) => { - assert!(path.exists(), "Repository directory should exist"); - assert!(path.join(".git").exists(), "Git directory should exist"); - // Check for some common files that should be present - assert!(path.join("Cargo.toml").exists(), "Cargo.toml should exist"); - } - Err(e) => println!("Skipping clone test due to error: {e}"), - } - } - } } diff --git a/crates/fetch/src/lib.rs b/crates/fetch/src/lib.rs new file mode 100644 index 0000000..eea2448 --- /dev/null +++ b/crates/fetch/src/lib.rs @@ -0,0 +1,5 @@ +pub mod git; +pub mod url; + +pub use git::GitProcessor; +pub use url::UrlProcessor; diff --git a/src/url_processor.rs b/crates/fetch/src/url.rs similarity index 76% rename from src/url_processor.rs rename to crates/fetch/src/url.rs index 707f39a..a09e1da 100644 --- a/src/url_processor.rs +++ b/crates/fetch/src/url.rs @@ -1,9 +1,10 @@ +use std::collections::HashSet; + use anyhow::Result; use arboard::Clipboard; use indicatif::{ProgressBar, ProgressStyle}; use reqwest::blocking::Client; use scraper::{ElementRef, Html, Node, Selector}; -use std::collections::HashSet; use url::Url; pub struct UrlProcessor { @@ -229,7 +230,6 @@ impl UrlProcessor { #[cfg(test)] mod tests { use super::*; - use mockito::Server; #[test] fn test_new_processor() { @@ -293,116 +293,9 @@ mod tests { "#; let links = processor.extract_links(html, &base_url).unwrap(); - assert_eq!(links.len(), 3); // Only http(s) links should be included + assert_eq!(links.len(), 3); assert!(links.contains(&"https://example.com/page1".to_string())); assert!(links.contains(&"https://other.com/page2".to_string())); assert!(links.contains(&"https://example.com/#section".to_string())); } - - #[test] - fn test_process_url_with_mocked_server() { - let mut server = Server::new(); - let url = server.url(); - let _m = server - .mock("GET", "/") - .with_status(200) - .with_header("content-type", "text/html") - .with_body( - r#" - - -

Test Page

-

This is a test.

- Subpage - - - "#, - ) - .create(); - - let mut processor = UrlProcessor::new(1); - let result = processor.process_url(&url, false); - - assert!(result.is_ok()); - let content = result.unwrap(); - assert!(content.contains("# Test Page")); - assert!(content.contains("This is a test")); - } - - #[test] - fn test_process_url_with_link_traversal() { - let mut server = Server::new(); - let url = server.url(); - let _m1 = server - .mock("GET", "/") - .with_status(200) - .with_header("content-type", "text/html") - .with_body( - r#" - - -

Main Page

- Subpage - - - "#, - ) - .create(); - - let _m2 = server - .mock("GET", "/subpage") - .with_status(200) - .with_header("content-type", "text/html") - .with_body( - r#" - - -

Subpage

-

Subpage content.

- - - "#, - ) - .create(); - - let mut processor = UrlProcessor::new(1); - let result = processor.process_url(&url, true); - - assert!(result.is_ok()); - let content = result.unwrap(); - assert!(content.contains("# Main Page")); - assert!(content.contains("# Subpage")); - assert!(content.contains("Subpage content")); - } - - #[test] - fn test_process_node_formatting() { - let processor = UrlProcessor::new(1); - let base_url = Url::parse("https://example.com").unwrap(); - - let html = r#" - -

Heading 1

-

Heading 2

-

Normal paragraph

-
Quote text
- Code block -
    -
  • List item 1
  • -
  • List item 2
  • -
- - "#; - - let markdown = processor.html_to_markdown(html, &base_url); - - assert!(markdown.contains("# Heading 1")); - assert!(markdown.contains("# Heading 1")); - assert!(markdown.contains("## Heading 2")); - assert!(markdown.contains("Normal paragraph")); - assert!(markdown.contains("> Quote text")); - assert!(markdown.contains("Code block")); - assert!(markdown.contains("- List item 1")); - assert!(markdown.contains("- List item 2")); - } } diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml new file mode 100644 index 0000000..eb67d11 --- /dev/null +++ b/crates/tui/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "glimpse-tui" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +glimpse-core.workspace = true + +anyhow.workspace = true +arboard.workspace = true +base64.workspace = true +crossterm.workspace = true +ignore.workspace = true +num-format.workspace = true +printpdf.workspace = true +ratatui.workspace = true diff --git a/src/file_picker.rs b/crates/tui/src/file_picker.rs similarity index 95% rename from src/file_picker.rs rename to crates/tui/src/file_picker.rs index 1314b1c..2592cae 100644 --- a/src/file_picker.rs +++ b/crates/tui/src/file_picker.rs @@ -1,3 +1,9 @@ +use std::{ + io::{self, stdout}, + path::{Path, PathBuf}, + time::Duration, +}; + use anyhow::Result; use crossterm::{ event::{self, Event, KeyCode}, @@ -12,11 +18,6 @@ use ratatui::{ widgets::{Block, Borders, List, ListItem, ListState, Paragraph}, Terminal, }; -use std::{ - io::{self, stdout}, - path::{Path, PathBuf}, - time::Duration, -}; struct TerminalGuard; @@ -121,15 +122,14 @@ impl FilePicker { .direction(Direction::Vertical) .constraints( [ - Constraint::Length(1), // Current folder - Constraint::Percentage(80), // File list - Constraint::Percentage(20), // Selected files + Constraint::Length(1), + Constraint::Percentage(80), + Constraint::Percentage(20), ] .as_ref(), ) .split(f.area()); - // Current folder let current_path = self.get_relative_path(&self.current_dir); let folder = Paragraph::new(format!("📁 {}", current_path.display())).block( Block::default() @@ -139,7 +139,6 @@ impl FilePicker { f.render_widget(folder, chunks[0]); - // File list let items: Vec = self .files .iter() @@ -164,7 +163,6 @@ impl FilePicker { f.render_stateful_widget(list, chunks[1], &mut self.list_state.clone()); - // Selected files let selected_items: Vec = self .selected_files .iter() @@ -205,7 +203,6 @@ impl FilePicker { let entry = entry?; let path = entry.path(); - // skip hidden files if not showing them if !self.show_hidden && path .file_name() @@ -214,7 +211,6 @@ impl FilePicker { continue; } - // skip ignored files if respecting ignore if self.respect_ignore { let path_clone = path.clone(); let mut builder = ignore::gitignore::GitignoreBuilder::new(path_clone); @@ -227,7 +223,6 @@ impl FilePicker { } } - // skip files larger than max size if path.is_file() && entry.metadata()?.len() > self.max_size { continue; } @@ -235,7 +230,6 @@ impl FilePicker { files.push(path); } - // sort directories first, then files files.sort_by(|a, b| { if a.is_dir() && !b.is_dir() { std::cmp::Ordering::Less @@ -309,7 +303,6 @@ impl FilePicker { if let Some(selected) = self.selected_list_state.selected() { if selected < self.selected_files.len() { self.selected_files.remove(selected); - // Adjust the selection after removal if self.selected_files.is_empty() { self.selected_list_state.select(None); } else { @@ -342,7 +335,6 @@ impl FilePicker { } } - // If this was previously empty, move the buffer cursor to the first item if before == 0 && !self.selected_files.is_empty() { self.selected_list_state.select(Some(0)); } diff --git a/crates/tui/src/lib.rs b/crates/tui/src/lib.rs new file mode 100644 index 0000000..9dce8c9 --- /dev/null +++ b/crates/tui/src/lib.rs @@ -0,0 +1,3 @@ +pub mod file_picker; + +pub use file_picker::FilePicker; From 89716e7cd1d34004bb6c6d72a768e94e2d54e25a Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 19:24:38 +0530 Subject: [PATCH 04/35] add import_query to registry.toml for all 11 languages --- .gitignore | 1 + registry.toml | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) diff --git a/.gitignore b/.gitignore index cdbe4ee..b34c1bd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .direnv test/ +.todo.md diff --git a/registry.toml b/registry.toml index 29f6e94..3fe04dd 100644 --- a/registry.toml +++ b/registry.toml @@ -29,6 +29,18 @@ call_query = """ (parenthesized_expression (selector_expression field: (field_identifier) @name)) ]) @reference.call """ +import_query = """ +(import_spec + name: [ + (package_identifier) @alias + (dot) @dot_import + (blank_identifier) @side_effect + ]? + path: [ + (interpreted_string_literal) @path + (raw_string_literal) @path + ]) +""" [[language]] name = "zig" @@ -52,6 +64,15 @@ call_query = """ (field_expression member: (identifier) @name) ]) @reference.call """ +import_query = """ +(variable_declaration + (identifier) @name + (builtin_function + (builtin_identifier) @_builtin + (arguments + (string (string_content) @path))) + (#eq? @_builtin "@import")) @import +""" [[language]] name = "c" @@ -82,6 +103,13 @@ call_query = """ (parenthesized_expression (identifier) @name) ]) @reference.call """ +import_query = """ +(preproc_include + path: [ + (system_lib_string) @system_path + (string_literal) @local_path + ]) @include +""" [[language]] name = "cpp" @@ -155,6 +183,13 @@ call_query = """ (field_expression field: (field_identifier) @name) ]) @reference.call """ +import_query = """ +(preproc_include + path: [ + (system_lib_string) @system_path + (string_literal) @local_path + ]) @include +""" [[language]] name = "bash" @@ -174,6 +209,17 @@ definition_query = """ call_query = """ (command_name (word) @name) @reference.call """ +import_query = """ +(command + name: (command_name) @_cmd + argument: [ + (word) @path + (string) @path + (raw_string) @path + (concatenation) @path + ] + (#any-of? @_cmd "source" ".")) @import +""" [[language]] name = "python" @@ -204,6 +250,28 @@ call_query = """ (attribute attribute: (identifier) @name) ]) @reference.call """ +import_query = """ +(import_statement + name: [ + (dotted_name) @path + (aliased_import + name: (dotted_name) @path + alias: (identifier) @alias) + ]) @import + +(import_from_statement + module_name: [ + (dotted_name) @module + (relative_import) @module + ] + name: [ + (dotted_name) @name + (aliased_import + name: (dotted_name) @name + alias: (identifier) @alias) + ]? + (wildcard_import)? @wildcard) @import +""" [[language]] name = "typescript" @@ -244,6 +312,19 @@ call_query = """ (member_expression property: (property_identifier) @name) ]) @reference.call """ +import_query = """ +(import_statement + (import_clause + [ + (identifier) @default_import + (named_imports + (import_specifier + name: (identifier) @name + alias: (identifier)? @alias)) + (namespace_import (identifier) @namespace) + ]) + source: (string (string_fragment) @source)) @import +""" [[language]] name = "rust" @@ -268,6 +349,29 @@ call_query = """ (field_expression field: (field_identifier) @name) ]) @reference.call """ +import_query = """ +(use_declaration + argument: [ + (identifier) @path + (scoped_identifier) @path + (use_as_clause + path: [ + (identifier) @path + (scoped_identifier) @path + ] + alias: (identifier) @alias) + (use_list) @list + (scoped_use_list + path: [ + (identifier) @path + (scoped_identifier) @path + ]) + (use_wildcard) @wildcard + ]) @import + +(mod_item + name: (identifier) @mod_name) @mod_decl +""" [[language]] name = "javascript" @@ -307,6 +411,24 @@ call_query = """ (member_expression property: (property_identifier) @name) ]) @reference.call """ +import_query = """ +(import_statement + (import_clause + [ + (identifier) @default_import + (named_imports + (import_specifier + name: (identifier) @name + alias: (identifier)? @alias)) + (namespace_import (identifier) @namespace) + ]) + source: (string (string_fragment) @source)) @import + +(call_expression + function: (identifier) @_require + arguments: (arguments (string (string_fragment) @source)) + (#eq? @_require "require")) @require +""" [[language]] name = "java" @@ -342,6 +464,14 @@ call_query = """ (object_creation_expression type: (type_identifier) @name) @reference.call """ +import_query = """ +(import_declaration + [ + (scoped_identifier) @path + (identifier) @path + ] + (asterisk)? @wildcard) @import +""" [[language]] name = "scala" @@ -391,4 +521,13 @@ call_query = """ (identifier) @name (field_expression field: (identifier) @name) ]) @reference.call +""" +import_query = """ +(import_declaration + path: (_)+ @path + [ + (namespace_wildcard) @wildcard + (namespace_selectors) @selectors + (as_renamed_identifier) @alias + ]?) @import """ \ No newline at end of file From 8ec39fe769ab79db206ab042486242d1ba6832c9 Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 19:31:52 +0530 Subject: [PATCH 05/35] add glimpse-code crate scaffolding with module stubs --- Cargo.lock | 68 +++++++++++++++++++++++++++++++++++ Cargo.toml | 8 +++++ crates/code/Cargo.toml | 22 ++++++++++++ crates/code/src/compile.rs | 11 ++++++ crates/code/src/extract.rs | 28 +++++++++++++++ crates/code/src/graph.rs | 39 ++++++++++++++++++++ crates/code/src/heuristics.rs | 25 +++++++++++++ crates/code/src/imports.rs | 14 ++++++++ crates/code/src/index.rs | 30 ++++++++++++++++ crates/code/src/lib.rs | 13 +++++++ crates/code/src/loader.rs | 15 ++++++++ crates/code/src/queries.rs | 14 ++++++++ crates/code/src/registry.rs | 22 ++++++++++++ crates/code/src/resolve.rs | 23 ++++++++++++ crates/code/src/schema.rs | 58 ++++++++++++++++++++++++++++++ crates/code/src/storage.rs | 15 ++++++++ crates/code/src/workspace.rs | 19 ++++++++++ 17 files changed, 424 insertions(+) create mode 100644 crates/code/Cargo.toml create mode 100644 crates/code/src/compile.rs create mode 100644 crates/code/src/extract.rs create mode 100644 crates/code/src/graph.rs create mode 100644 crates/code/src/heuristics.rs create mode 100644 crates/code/src/imports.rs create mode 100644 crates/code/src/index.rs create mode 100644 crates/code/src/lib.rs create mode 100644 crates/code/src/loader.rs create mode 100644 crates/code/src/queries.rs create mode 100644 crates/code/src/registry.rs create mode 100644 crates/code/src/resolve.rs create mode 100644 crates/code/src/schema.rs create mode 100644 crates/code/src/storage.rs create mode 100644 crates/code/src/workspace.rs diff --git a/Cargo.lock b/Cargo.lock index 54d661c..5ca44e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -165,6 +165,15 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -947,6 +956,24 @@ dependencies = [ "tempfile", ] +[[package]] +name = "glimpse-code" +version = "0.7.8" +dependencies = [ + "anyhow", + "bincode", + "cc", + "dirs", + "glimpse-core", + "glimpse-fetch", + "libloading", + "serde", + "tempfile", + "toml", + "tree-sitter", + "walkdir", +] + [[package]] name = "glimpse-core" version = "0.7.8" @@ -1455,6 +1482,16 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libredox" version = "0.1.3" @@ -2689,6 +2726,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "string_cache" version = "0.8.8" @@ -3051,6 +3094,25 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tree-sitter" +version = "0.24.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce" + [[package]] name = "tree_magic_mini" version = "3.1.6" @@ -3445,6 +3507,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index 39b4cfb..4aeadb5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ resolver = "2" members = [ "crates/cli", + "crates/code", "crates/core", "crates/fetch", "crates/tui" @@ -14,6 +15,7 @@ license = "MIT" [workspace.dependencies] # Internal crates +glimpse-code = { path = "crates/code" } glimpse-core = { path = "crates/core" } glimpse-fetch = { path = "crates/fetch" } glimpse-tui = { path = "crates/tui" } @@ -23,6 +25,12 @@ anyhow = "1.0.95" serde = { version = "1.0.217", features = ["derive"] } rayon = "1.10.0" +# Code analysis dependencies +bincode = "1.3" +cc = "1.2" +libloading = "0.8" +tree-sitter = "0.24" + # Core dependencies dirs = "5.0.1" once_cell = "1.20.2" diff --git a/crates/code/Cargo.toml b/crates/code/Cargo.toml new file mode 100644 index 0000000..9fe9e2f --- /dev/null +++ b/crates/code/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "glimpse-code" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +anyhow.workspace = true +bincode.workspace = true +cc.workspace = true +dirs.workspace = true +libloading.workspace = true +serde.workspace = true +toml.workspace = true +tree-sitter.workspace = true +walkdir.workspace = true + +glimpse-core.workspace = true +glimpse-fetch.workspace = true + +[dev-dependencies] +tempfile.workspace = true diff --git a/crates/code/src/compile.rs b/crates/code/src/compile.rs new file mode 100644 index 0000000..c052407 --- /dev/null +++ b/crates/code/src/compile.rs @@ -0,0 +1,11 @@ +use std::path::PathBuf; + +use anyhow::Result; + +pub fn fetch_grammar(_repo_url: &str, _dest: &PathBuf) -> Result<()> { + todo!("clone grammar repository") +} + +pub fn compile_grammar(_grammar_dir: &PathBuf, _output: &PathBuf) -> Result<()> { + todo!("compile grammar with cc") +} diff --git a/crates/code/src/extract.rs b/crates/code/src/extract.rs new file mode 100644 index 0000000..1e59fcb --- /dev/null +++ b/crates/code/src/extract.rs @@ -0,0 +1,28 @@ +use std::path::Path; + +use anyhow::Result; +use tree_sitter::Tree; + +use super::schema::{Call, Definition, Import}; + +pub struct Extractor { + _language: String, +} + +impl Extractor { + pub fn new(_language: &str) -> Result { + todo!("initialize extractor with language queries") + } + + pub fn extract_definitions(&self, _tree: &Tree, _source: &[u8], _path: &Path) -> Vec { + todo!("extract definitions from parsed tree") + } + + pub fn extract_calls(&self, _tree: &Tree, _source: &[u8], _path: &Path) -> Vec { + todo!("extract call sites from parsed tree") + } + + pub fn extract_imports(&self, _tree: &Tree, _source: &[u8], _path: &Path) -> Vec { + todo!("extract imports from parsed tree") + } +} diff --git a/crates/code/src/graph.rs b/crates/code/src/graph.rs new file mode 100644 index 0000000..194c962 --- /dev/null +++ b/crates/code/src/graph.rs @@ -0,0 +1,39 @@ +use std::collections::{HashMap, HashSet}; + +use anyhow::Result; + +use super::index::Index; +use super::schema::Definition; + +pub type NodeId = usize; + +#[derive(Debug, Clone)] +pub struct CallGraphNode { + pub definition: Definition, + pub callees: HashSet, + pub callers: HashSet, +} + +#[derive(Debug, Default)] +pub struct CallGraph { + pub nodes: HashMap, + name_to_id: HashMap, +} + +impl CallGraph { + pub fn build(_index: &Index) -> Result { + todo!("build call graph from index") + } + + pub fn get_callees(&self, _node_id: NodeId) -> Vec<&CallGraphNode> { + todo!("get direct callees") + } + + pub fn get_transitive_callees(&self, _node_id: NodeId) -> Vec<&CallGraphNode> { + todo!("get all callees recursively") + } + + pub fn post_order(&self, _node_id: NodeId) -> Vec { + todo!("return nodes in post-order traversal") + } +} diff --git a/crates/code/src/heuristics.rs b/crates/code/src/heuristics.rs new file mode 100644 index 0000000..7647cbd --- /dev/null +++ b/crates/code/src/heuristics.rs @@ -0,0 +1,25 @@ +use std::path::PathBuf; + +use anyhow::Result; + +use super::index::Index; +use super::schema::Definition; + +pub fn resolve_same_file( + _callee: &str, + _file: &PathBuf, + _index: &Index, +) -> Option { + todo!("look for definition in same file") +} + +pub fn resolve_by_index( + _callee: &str, + _index: &Index, +) -> Option { + todo!("search index for definition by name") +} + +pub fn resolve_by_search(_callee: &str, _root: &PathBuf) -> Result> { + todo!("fallback to ripgrep search") +} diff --git a/crates/code/src/imports.rs b/crates/code/src/imports.rs new file mode 100644 index 0000000..2e9806a --- /dev/null +++ b/crates/code/src/imports.rs @@ -0,0 +1,14 @@ +use std::path::PathBuf; + +use anyhow::Result; + +use super::index::Index; +use super::schema::Definition; + +pub fn resolve_import( + _import_path: &str, + _from_file: &PathBuf, + _index: &Index, +) -> Result> { + todo!("resolve use statement to definition") +} diff --git a/crates/code/src/index.rs b/crates/code/src/index.rs new file mode 100644 index 0000000..e3095b3 --- /dev/null +++ b/crates/code/src/index.rs @@ -0,0 +1,30 @@ +use std::collections::HashMap; +use std::path::PathBuf; + +use anyhow::Result; +use serde::{Deserialize, Serialize}; + +use super::schema::FileRecord; + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct Index { + pub files: HashMap, + pub version: u32, +} + +impl Index { + pub fn new() -> Self { + Self { + files: HashMap::new(), + version: 1, + } + } + + pub fn is_stale(&self, _path: &PathBuf, _mtime: u64, _size: u64) -> bool { + todo!("check if file needs re-indexing") + } + + pub fn update(&mut self, _record: FileRecord) -> Result<()> { + todo!("add or update file record") + } +} diff --git a/crates/code/src/lib.rs b/crates/code/src/lib.rs new file mode 100644 index 0000000..77d690c --- /dev/null +++ b/crates/code/src/lib.rs @@ -0,0 +1,13 @@ +pub mod compile; +pub mod extract; +pub mod graph; +pub mod heuristics; +pub mod imports; +pub mod index; +pub mod loader; +pub mod queries; +pub mod registry; +pub mod resolve; +pub mod schema; +pub mod storage; +pub mod workspace; diff --git a/crates/code/src/loader.rs b/crates/code/src/loader.rs new file mode 100644 index 0000000..c5a3499 --- /dev/null +++ b/crates/code/src/loader.rs @@ -0,0 +1,15 @@ +use std::path::PathBuf; + +use anyhow::Result; +use tree_sitter::Language; + +pub fn cache_dir() -> PathBuf { + dirs::data_local_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join("glimpse") + .join("grammars") +} + +pub fn load_language(_name: &str) -> Result { + todo!("load compiled grammar via libloading") +} diff --git a/crates/code/src/queries.rs b/crates/code/src/queries.rs new file mode 100644 index 0000000..39319d7 --- /dev/null +++ b/crates/code/src/queries.rs @@ -0,0 +1,14 @@ +use anyhow::Result; +use tree_sitter::{Language, Query}; + +pub struct QuerySet { + pub definitions: Query, + pub calls: Query, + pub imports: Option, +} + +impl QuerySet { + pub fn load(_language: Language, _lang_name: &str) -> Result { + todo!("load and compile queries for language") + } +} diff --git a/crates/code/src/registry.rs b/crates/code/src/registry.rs new file mode 100644 index 0000000..025ed54 --- /dev/null +++ b/crates/code/src/registry.rs @@ -0,0 +1,22 @@ +use anyhow::Result; + +#[derive(Debug, Clone)] +pub struct LanguageEntry { + pub name: String, + pub grammar_repo: String, + pub import_query: Option, +} + +pub struct Registry { + languages: Vec, +} + +impl Registry { + pub fn load() -> Result { + todo!("parse registry.toml") + } + + pub fn get_language(&self, _name: &str) -> Option<&LanguageEntry> { + todo!("lookup language by name") + } +} diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs new file mode 100644 index 0000000..91907fb --- /dev/null +++ b/crates/code/src/resolve.rs @@ -0,0 +1,23 @@ +use std::path::PathBuf; + +use anyhow::Result; + +use super::index::Index; +use super::schema::{Call, Definition}; +use super::workspace::Workspace; + +pub struct Resolver { + _index: Index, + _workspace: Option, + _root: PathBuf, +} + +impl Resolver { + pub fn new(_index: Index, _workspace: Option, _root: PathBuf) -> Self { + todo!("initialize resolver") + } + + pub fn resolve(&self, _call: &Call) -> Result> { + todo!("resolve call to definition using all strategies") + } +} diff --git a/crates/code/src/schema.rs b/crates/code/src/schema.rs new file mode 100644 index 0000000..4a5bd9c --- /dev/null +++ b/crates/code/src/schema.rs @@ -0,0 +1,58 @@ +use std::path::PathBuf; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Span { + pub start_byte: usize, + pub end_byte: usize, + pub start_line: usize, + pub end_line: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Definition { + pub name: String, + pub kind: DefinitionKind, + pub span: Span, + pub file: PathBuf, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum DefinitionKind { + Function, + Method, + Class, + Struct, + Enum, + Trait, + Interface, + Module, + Other(String), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Call { + pub callee: String, + pub span: Span, + pub file: PathBuf, + pub caller: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Import { + pub module_path: String, + pub alias: Option, + pub span: Span, + pub file: PathBuf, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileRecord { + pub path: PathBuf, + pub mtime: u64, + pub size: u64, + pub definitions: Vec, + pub calls: Vec, + pub imports: Vec, +} diff --git a/crates/code/src/storage.rs b/crates/code/src/storage.rs new file mode 100644 index 0000000..803f128 --- /dev/null +++ b/crates/code/src/storage.rs @@ -0,0 +1,15 @@ +use std::path::Path; + +use anyhow::Result; + +use super::index::Index; + +pub const INDEX_DIR: &str = ".glimpse-index"; + +pub fn save_index(_index: &Index, _root: &Path) -> Result<()> { + todo!("serialize index with bincode") +} + +pub fn load_index(_root: &Path) -> Result> { + todo!("deserialize index from .glimpse-index/") +} diff --git a/crates/code/src/workspace.rs b/crates/code/src/workspace.rs new file mode 100644 index 0000000..3b378f6 --- /dev/null +++ b/crates/code/src/workspace.rs @@ -0,0 +1,19 @@ +use std::path::{Path, PathBuf}; + +use anyhow::Result; + +#[derive(Debug, Clone)] +pub struct Workspace { + pub root: PathBuf, + pub members: Vec, +} + +impl Workspace { + pub fn discover(_root: &Path) -> Result { + todo!("parse Cargo.toml and discover workspace members") + } + + pub fn resolve_crate(&self, _crate_name: &str) -> Option { + todo!("resolve crate name to path within workspace") + } +} From 80a38b478b196ddb3869a63674e70ab3d131eff9 Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 19:46:54 +0530 Subject: [PATCH 06/35] feat: implement grammar loading for glimpse-code --- AGENTS.md | 16 +++ Cargo.lock | 8 +- Cargo.toml | 2 +- crates/code/Cargo.toml | 3 + crates/code/src/compile.rs | 190 ++++++++++++++++++++++++++++++++++-- crates/code/src/loader.rs | 184 +++++++++++++++++++++++++++++++++- crates/code/src/registry.rs | 125 ++++++++++++++++++++++-- 7 files changed, 509 insertions(+), 19 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index d5d1d31..3395b9a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -9,6 +9,22 @@ Check `.todo.md` for current tasks and next steps. Keep it updated: - Add new tasks as they're discovered - Reference it before asking "what's next?" +## Commits + +Use `jj` for version control. Always commit after completing a phase: + +```bash +jj commit -m "feat: add glimpse-code crate scaffolding" +``` + +Use conventional commit prefixes: +- `feat` - new feature +- `fix` - bug fix +- `refactor` - restructure without behavior change +- `chore` - maintenance, dependencies, config +- `docs` - documentation only +- `test` - adding or updating tests + ## Build Commands ```bash diff --git a/Cargo.lock b/Cargo.lock index 5ca44e7..b7653c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -964,9 +964,11 @@ dependencies = [ "bincode", "cc", "dirs", + "git2", "glimpse-core", "glimpse-fetch", "libloading", + "once_cell", "serde", "tempfile", "toml", @@ -2574,6 +2576,7 @@ version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" dependencies = [ + "indexmap", "itoa", "memchr", "ryu", @@ -3096,13 +3099,14 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.24.7" +version = "0.25.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" +checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87" dependencies = [ "cc", "regex", "regex-syntax", + "serde_json", "streaming-iterator", "tree-sitter-language", ] diff --git a/Cargo.toml b/Cargo.toml index 4aeadb5..031ac9d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ rayon = "1.10.0" bincode = "1.3" cc = "1.2" libloading = "0.8" -tree-sitter = "0.24" +tree-sitter = "0.25" # Core dependencies dirs = "5.0.1" diff --git a/crates/code/Cargo.toml b/crates/code/Cargo.toml index 9fe9e2f..26613a7 100644 --- a/crates/code/Cargo.toml +++ b/crates/code/Cargo.toml @@ -9,8 +9,11 @@ anyhow.workspace = true bincode.workspace = true cc.workspace = true dirs.workspace = true +git2.workspace = true libloading.workspace = true +once_cell.workspace = true serde.workspace = true +tempfile.workspace = true toml.workspace = true tree-sitter.workspace = true walkdir.workspace = true diff --git a/crates/code/src/compile.rs b/crates/code/src/compile.rs index c052407..9c28e58 100644 --- a/crates/code/src/compile.rs +++ b/crates/code/src/compile.rs @@ -1,11 +1,189 @@ -use std::path::PathBuf; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; -use anyhow::Result; +use anyhow::{bail, Context, Result}; +use git2::Repository; -pub fn fetch_grammar(_repo_url: &str, _dest: &PathBuf) -> Result<()> { - todo!("clone grammar repository") +use super::loader::cache_dir; +use super::registry::LanguageEntry; + +fn sources_dir() -> PathBuf { + cache_dir().join("sources") +} + +pub fn fetch_grammar(lang: &LanguageEntry) -> Result { + let sources = sources_dir(); + fs::create_dir_all(&sources)?; + + let dest = sources.join(&lang.name); + + if dest.exists() { + return Ok(dest); + } + + Repository::clone(&lang.repo, &dest) + .with_context(|| format!("failed to clone grammar repo: {}", lang.repo))?; + + let repo = Repository::open(&dest)?; + let (object, reference) = repo.revparse_ext(&lang.branch)?; + repo.checkout_tree(&object, None)?; + match reference { + Some(r) => repo.set_head(r.name().unwrap())?, + None => repo.set_head_detached(object.id())?, + } + + Ok(dest) +} + +pub fn compile_grammar(lang: &LanguageEntry, grammar_dir: &Path) -> Result { + let output_dir = cache_dir(); + fs::create_dir_all(&output_dir)?; + + let lib_name = format!("tree-sitter-{}", lang.name); + let output_path = output_dir.join(lib_filename(&lib_name)); + + if output_path.exists() { + return Ok(output_path); + } + + let src_dir = match &lang.subpath { + Some(subpath) => grammar_dir.join(subpath).join("src"), + None => grammar_dir.join("src"), + }; + + let parser_c = src_dir.join("parser.c"); + if !parser_c.exists() { + bail!("parser.c not found at: {}", parser_c.display()); + } + + let temp_dir = tempfile::tempdir()?; + let mut objects = Vec::new(); + + objects.push(compile_c_file(&parser_c, &src_dir, temp_dir.path())?); + + let scanner_c = src_dir.join("scanner.c"); + if scanner_c.exists() { + objects.push(compile_c_file(&scanner_c, &src_dir, temp_dir.path())?); + } + + let scanner_cc = src_dir.join("scanner.cc"); + if scanner_cc.exists() { + objects.push(compile_cpp_file(&scanner_cc, &src_dir, temp_dir.path())?); + } + + link_shared_library(&objects, &output_path)?; + + Ok(output_path) +} + +fn compile_c_file(source: &Path, include_dir: &Path, out_dir: &Path) -> Result { + let obj_name = source.file_stem().unwrap().to_string_lossy(); + let obj_path = out_dir.join(format!("{}.o", obj_name)); + + let status = Command::new("cc") + .args(["-c", "-O3", "-fPIC", "-w"]) + .arg("-I") + .arg(include_dir) + .arg("-o") + .arg(&obj_path) + .arg(source) + .status() + .context("failed to run cc")?; + + if !status.success() { + bail!("failed to compile: {}", source.display()); + } + + Ok(obj_path) } -pub fn compile_grammar(_grammar_dir: &PathBuf, _output: &PathBuf) -> Result<()> { - todo!("compile grammar with cc") +fn compile_cpp_file(source: &Path, include_dir: &Path, out_dir: &Path) -> Result { + let obj_name = source.file_stem().unwrap().to_string_lossy(); + let obj_path = out_dir.join(format!("{}.o", obj_name)); + + let status = Command::new("c++") + .args(["-c", "-O3", "-fPIC", "-w"]) + .arg("-I") + .arg(include_dir) + .arg("-o") + .arg(&obj_path) + .arg(source) + .status() + .context("failed to run c++")?; + + if !status.success() { + bail!("failed to compile: {}", source.display()); + } + + Ok(obj_path) +} + +fn link_shared_library(objects: &[PathBuf], output: &Path) -> Result<()> { + let mut cmd = if cfg!(target_os = "macos") { + let mut c = Command::new("cc"); + c.args(["-dynamiclib", "-undefined", "dynamic_lookup"]); + c + } else if cfg!(target_os = "windows") { + let mut c = Command::new("link"); + c.arg("/DLL"); + c + } else { + let mut c = Command::new("cc"); + c.arg("-shared"); + c + }; + + for obj in objects { + cmd.arg(obj); + } + + if cfg!(target_os = "windows") { + cmd.arg(format!("/OUT:{}", output.display())); + } else { + cmd.arg("-o").arg(output); + } + + let status = cmd.status().context("failed to link shared library")?; + + if !status.success() { + bail!("failed to link shared library: {}", output.display()); + } + + Ok(()) +} + +fn lib_filename(name: &str) -> String { + if cfg!(target_os = "macos") { + format!("lib{}.dylib", name) + } else if cfg!(target_os = "windows") { + format!("{}.dll", name) + } else { + format!("lib{}.so", name) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_lib_filename() { + let name = "tree-sitter-rust"; + let filename = lib_filename(name); + + if cfg!(target_os = "macos") { + assert_eq!(filename, "libtree-sitter-rust.dylib"); + } else if cfg!(target_os = "windows") { + assert_eq!(filename, "tree-sitter-rust.dll"); + } else { + assert_eq!(filename, "libtree-sitter-rust.so"); + } + } + + #[test] + fn test_sources_dir() { + let dir = sources_dir(); + assert!(dir.ends_with("grammars/sources")); + } } diff --git a/crates/code/src/loader.rs b/crates/code/src/loader.rs index c5a3499..c5fb75e 100644 --- a/crates/code/src/loader.rs +++ b/crates/code/src/loader.rs @@ -1,8 +1,23 @@ +use std::collections::HashMap; use std::path::PathBuf; +use std::sync::Mutex; -use anyhow::Result; +use anyhow::{bail, Context, Result}; +use libloading::{Library, Symbol}; +use once_cell::sync::Lazy; +use tree_sitter::ffi::TSLanguage; use tree_sitter::Language; +use super::compile::{compile_grammar, fetch_grammar}; +use super::registry::{LanguageEntry, Registry}; + +type LanguageFn = unsafe extern "C" fn() -> *const TSLanguage; + +static LOADED_LANGUAGES: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::new())); + +static LOADED_LIBRARIES: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); + pub fn cache_dir() -> PathBuf { dirs::data_local_dir() .unwrap_or_else(|| PathBuf::from(".")) @@ -10,6 +25,169 @@ pub fn cache_dir() -> PathBuf { .join("grammars") } -pub fn load_language(_name: &str) -> Result { - todo!("load compiled grammar via libloading") +pub fn load_language(name: &str) -> Result { + { + let cache = LOADED_LANGUAGES.lock().unwrap(); + if let Some(lang) = cache.get(name) { + return Ok(lang.clone()); + } + } + + let registry = Registry::global(); + let entry = registry + .get(name) + .with_context(|| format!("unknown language: {}", name))?; + + load_language_entry(entry) +} + +pub fn load_language_by_extension(ext: &str) -> Result { + let registry = Registry::global(); + let entry = registry + .get_by_extension(ext) + .with_context(|| format!("no language for extension: {}", ext))?; + + { + let cache = LOADED_LANGUAGES.lock().unwrap(); + if let Some(lang) = cache.get(&entry.name) { + return Ok(lang.clone()); + } + } + + load_language_entry(entry) +} + +fn load_language_entry(entry: &LanguageEntry) -> Result { + let lib_path = compiled_lib_path(entry); + + if !lib_path.exists() { + let grammar_dir = fetch_grammar(entry)?; + compile_grammar(entry, &grammar_dir)?; + } + + if !lib_path.exists() { + bail!("compiled grammar not found: {}", lib_path.display()); + } + + let language = unsafe { load_language_from_lib(&lib_path, &entry.symbol) }?; + + { + let mut cache = LOADED_LANGUAGES.lock().unwrap(); + cache.insert(entry.name.clone(), language.clone()); + } + + Ok(language) +} + +fn compiled_lib_path(entry: &LanguageEntry) -> PathBuf { + let lib_name = format!("tree-sitter-{}", entry.name); + cache_dir().join(lib_filename(&lib_name)) +} + +fn lib_filename(name: &str) -> String { + if cfg!(target_os = "macos") { + format!("lib{}.dylib", name) + } else if cfg!(target_os = "windows") { + format!("{}.dll", name) + } else { + format!("lib{}.so", name) + } +} + +unsafe fn load_language_from_lib(lib_path: &PathBuf, symbol: &str) -> Result { + let lib = Library::new(lib_path) + .with_context(|| format!("failed to load library: {}", lib_path.display()))?; + + let func: Symbol = lib + .get(symbol.as_bytes()) + .with_context(|| format!("symbol not found: {}", symbol))?; + + let lang_ptr = func(); + let language = Language::from_raw(lang_ptr); + + LOADED_LIBRARIES.lock().unwrap().push(lib); + + Ok(language) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cache_dir() { + let dir = cache_dir(); + assert!(dir.to_string_lossy().contains("glimpse")); + assert!(dir.ends_with("grammars")); + } + + #[test] + fn test_lib_filename() { + let name = "tree-sitter-rust"; + let filename = lib_filename(name); + + if cfg!(target_os = "macos") { + assert_eq!(filename, "libtree-sitter-rust.dylib"); + } else if cfg!(target_os = "windows") { + assert_eq!(filename, "tree-sitter-rust.dll"); + } else { + assert_eq!(filename, "libtree-sitter-rust.so"); + } + } + + #[test] + fn test_compiled_lib_path() { + let registry = Registry::global(); + let rust = registry.get("rust").unwrap(); + let path = compiled_lib_path(rust); + assert!(path.to_string_lossy().contains("tree-sitter-rust")); + } + + #[test] + #[ignore] + fn test_load_rust_grammar() { + let language = load_language("rust").expect("failed to load rust grammar"); + + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&language) + .expect("failed to set language"); + + let source = r#" +fn main() { + println!("Hello, world!"); +} +"#; + + let tree = parser.parse(source, None).expect("failed to parse"); + let root = tree.root_node(); + + assert_eq!(root.kind(), "source_file"); + assert!(root.child_count() > 0); + + let func = root.child(0).expect("expected function"); + assert_eq!(func.kind(), "function_item"); + } + + #[test] + #[ignore] + fn test_load_by_extension() { + let language = load_language_by_extension("rs").expect("failed to load by extension"); + + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&language) + .expect("failed to set language"); + + let tree = parser.parse("fn foo() {}", None).expect("failed to parse"); + assert_eq!(tree.root_node().kind(), "source_file"); + } + + #[test] + #[ignore] + fn test_language_caching() { + let lang1 = load_language("rust").expect("first load failed"); + let lang2 = load_language("rust").expect("second load failed"); + assert_eq!(lang1.node_kind_count(), lang2.node_kind_count()); + } } diff --git a/crates/code/src/registry.rs b/crates/code/src/registry.rs index 025ed54..70dbc9f 100644 --- a/crates/code/src/registry.rs +++ b/crates/code/src/registry.rs @@ -1,22 +1,133 @@ -use anyhow::Result; +use std::collections::HashMap; +use std::sync::OnceLock; -#[derive(Debug, Clone)] +use anyhow::{Context, Result}; +use serde::Deserialize; + +static REGISTRY: OnceLock = OnceLock::new(); + +#[derive(Debug, Clone, Deserialize)] pub struct LanguageEntry { pub name: String, - pub grammar_repo: String, - pub import_query: Option, + pub extensions: Vec, + pub repo: String, + pub branch: String, + pub symbol: String, + pub subpath: Option, + pub definition_query: String, + pub call_query: String, + pub import_query: String, +} + +#[derive(Debug, Deserialize)] +struct RegistryFile { + language: Vec, } pub struct Registry { languages: Vec, + by_name: HashMap, + by_extension: HashMap, } impl Registry { pub fn load() -> Result { - todo!("parse registry.toml") + let registry_toml = include_str!("../../../registry.toml"); + Self::from_str(registry_toml) + } + + fn from_str(content: &str) -> Result { + let file: RegistryFile = + toml::from_str(content).context("failed to parse registry.toml")?; + + let mut by_name = HashMap::new(); + let mut by_extension = HashMap::new(); + + for (idx, lang) in file.language.iter().enumerate() { + by_name.insert(lang.name.clone(), idx); + for ext in &lang.extensions { + by_extension.insert(ext.clone(), idx); + } + } + + Ok(Self { + languages: file.language, + by_name, + by_extension, + }) + } + + pub fn global() -> &'static Registry { + REGISTRY.get_or_init(|| Self::load().expect("failed to load registry")) + } + + pub fn get(&self, name: &str) -> Option<&LanguageEntry> { + self.by_name.get(name).map(|&idx| &self.languages[idx]) + } + + pub fn get_by_extension(&self, ext: &str) -> Option<&LanguageEntry> { + self.by_extension.get(ext).map(|&idx| &self.languages[idx]) + } + + pub fn languages(&self) -> &[LanguageEntry] { + &self.languages + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_load_registry() { + let registry = Registry::load().expect("failed to load registry"); + assert!(!registry.languages.is_empty()); + } + + #[test] + fn test_get_rust() { + let registry = Registry::load().unwrap(); + let rust = registry.get("rust").expect("rust language not found"); + assert_eq!(rust.name, "rust"); + assert!(rust.extensions.contains(&"rs".to_string())); + assert_eq!(rust.symbol, "tree_sitter_rust"); + } + + #[test] + fn test_get_by_extension() { + let registry = Registry::load().unwrap(); + let rust = registry + .get_by_extension("rs") + .expect("rs extension not found"); + assert_eq!(rust.name, "rust"); + } + + #[test] + fn test_typescript_subpath() { + let registry = Registry::load().unwrap(); + let ts = registry.get("typescript").expect("typescript not found"); + assert_eq!(ts.subpath, Some("typescript".to_string())); } - pub fn get_language(&self, _name: &str) -> Option<&LanguageEntry> { - todo!("lookup language by name") + #[test] + fn test_all_languages_have_queries() { + let registry = Registry::load().unwrap(); + for lang in registry.languages() { + assert!( + !lang.definition_query.is_empty(), + "{} missing definition_query", + lang.name + ); + assert!( + !lang.call_query.is_empty(), + "{} missing call_query", + lang.name + ); + assert!( + !lang.import_query.is_empty(), + "{} missing import_query", + lang.name + ); + } } } From 725387f81a762b8c7494fa9e0ac4af54b32fa46b Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 20:01:15 +0530 Subject: [PATCH 07/35] refactor: consolidate code modules into 5 files --- crates/code/src/compile.rs | 189 --------------- crates/code/src/extract.rs | 16 +- crates/code/src/grammar.rs | 439 ++++++++++++++++++++++++++++++++++ crates/code/src/graph.rs | 3 +- crates/code/src/heuristics.rs | 25 -- crates/code/src/imports.rs | 14 -- crates/code/src/index.rs | 67 +++++- crates/code/src/lib.rs | 10 +- crates/code/src/loader.rs | 193 --------------- crates/code/src/queries.rs | 14 -- crates/code/src/registry.rs | 133 ---------- crates/code/src/resolve.rs | 42 +++- crates/code/src/schema.rs | 58 ----- crates/code/src/storage.rs | 15 -- crates/code/src/workspace.rs | 19 -- 15 files changed, 558 insertions(+), 679 deletions(-) delete mode 100644 crates/code/src/compile.rs create mode 100644 crates/code/src/grammar.rs delete mode 100644 crates/code/src/heuristics.rs delete mode 100644 crates/code/src/imports.rs delete mode 100644 crates/code/src/loader.rs delete mode 100644 crates/code/src/queries.rs delete mode 100644 crates/code/src/registry.rs delete mode 100644 crates/code/src/schema.rs delete mode 100644 crates/code/src/storage.rs delete mode 100644 crates/code/src/workspace.rs diff --git a/crates/code/src/compile.rs b/crates/code/src/compile.rs deleted file mode 100644 index 9c28e58..0000000 --- a/crates/code/src/compile.rs +++ /dev/null @@ -1,189 +0,0 @@ -use std::fs; -use std::path::{Path, PathBuf}; -use std::process::Command; - -use anyhow::{bail, Context, Result}; -use git2::Repository; - -use super::loader::cache_dir; -use super::registry::LanguageEntry; - -fn sources_dir() -> PathBuf { - cache_dir().join("sources") -} - -pub fn fetch_grammar(lang: &LanguageEntry) -> Result { - let sources = sources_dir(); - fs::create_dir_all(&sources)?; - - let dest = sources.join(&lang.name); - - if dest.exists() { - return Ok(dest); - } - - Repository::clone(&lang.repo, &dest) - .with_context(|| format!("failed to clone grammar repo: {}", lang.repo))?; - - let repo = Repository::open(&dest)?; - let (object, reference) = repo.revparse_ext(&lang.branch)?; - repo.checkout_tree(&object, None)?; - match reference { - Some(r) => repo.set_head(r.name().unwrap())?, - None => repo.set_head_detached(object.id())?, - } - - Ok(dest) -} - -pub fn compile_grammar(lang: &LanguageEntry, grammar_dir: &Path) -> Result { - let output_dir = cache_dir(); - fs::create_dir_all(&output_dir)?; - - let lib_name = format!("tree-sitter-{}", lang.name); - let output_path = output_dir.join(lib_filename(&lib_name)); - - if output_path.exists() { - return Ok(output_path); - } - - let src_dir = match &lang.subpath { - Some(subpath) => grammar_dir.join(subpath).join("src"), - None => grammar_dir.join("src"), - }; - - let parser_c = src_dir.join("parser.c"); - if !parser_c.exists() { - bail!("parser.c not found at: {}", parser_c.display()); - } - - let temp_dir = tempfile::tempdir()?; - let mut objects = Vec::new(); - - objects.push(compile_c_file(&parser_c, &src_dir, temp_dir.path())?); - - let scanner_c = src_dir.join("scanner.c"); - if scanner_c.exists() { - objects.push(compile_c_file(&scanner_c, &src_dir, temp_dir.path())?); - } - - let scanner_cc = src_dir.join("scanner.cc"); - if scanner_cc.exists() { - objects.push(compile_cpp_file(&scanner_cc, &src_dir, temp_dir.path())?); - } - - link_shared_library(&objects, &output_path)?; - - Ok(output_path) -} - -fn compile_c_file(source: &Path, include_dir: &Path, out_dir: &Path) -> Result { - let obj_name = source.file_stem().unwrap().to_string_lossy(); - let obj_path = out_dir.join(format!("{}.o", obj_name)); - - let status = Command::new("cc") - .args(["-c", "-O3", "-fPIC", "-w"]) - .arg("-I") - .arg(include_dir) - .arg("-o") - .arg(&obj_path) - .arg(source) - .status() - .context("failed to run cc")?; - - if !status.success() { - bail!("failed to compile: {}", source.display()); - } - - Ok(obj_path) -} - -fn compile_cpp_file(source: &Path, include_dir: &Path, out_dir: &Path) -> Result { - let obj_name = source.file_stem().unwrap().to_string_lossy(); - let obj_path = out_dir.join(format!("{}.o", obj_name)); - - let status = Command::new("c++") - .args(["-c", "-O3", "-fPIC", "-w"]) - .arg("-I") - .arg(include_dir) - .arg("-o") - .arg(&obj_path) - .arg(source) - .status() - .context("failed to run c++")?; - - if !status.success() { - bail!("failed to compile: {}", source.display()); - } - - Ok(obj_path) -} - -fn link_shared_library(objects: &[PathBuf], output: &Path) -> Result<()> { - let mut cmd = if cfg!(target_os = "macos") { - let mut c = Command::new("cc"); - c.args(["-dynamiclib", "-undefined", "dynamic_lookup"]); - c - } else if cfg!(target_os = "windows") { - let mut c = Command::new("link"); - c.arg("/DLL"); - c - } else { - let mut c = Command::new("cc"); - c.arg("-shared"); - c - }; - - for obj in objects { - cmd.arg(obj); - } - - if cfg!(target_os = "windows") { - cmd.arg(format!("/OUT:{}", output.display())); - } else { - cmd.arg("-o").arg(output); - } - - let status = cmd.status().context("failed to link shared library")?; - - if !status.success() { - bail!("failed to link shared library: {}", output.display()); - } - - Ok(()) -} - -fn lib_filename(name: &str) -> String { - if cfg!(target_os = "macos") { - format!("lib{}.dylib", name) - } else if cfg!(target_os = "windows") { - format!("{}.dll", name) - } else { - format!("lib{}.so", name) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_lib_filename() { - let name = "tree-sitter-rust"; - let filename = lib_filename(name); - - if cfg!(target_os = "macos") { - assert_eq!(filename, "libtree-sitter-rust.dylib"); - } else if cfg!(target_os = "windows") { - assert_eq!(filename, "tree-sitter-rust.dll"); - } else { - assert_eq!(filename, "libtree-sitter-rust.so"); - } - } - - #[test] - fn test_sources_dir() { - let dir = sources_dir(); - assert!(dir.ends_with("grammars/sources")); - } -} diff --git a/crates/code/src/extract.rs b/crates/code/src/extract.rs index 1e59fcb..6cc28fb 100644 --- a/crates/code/src/extract.rs +++ b/crates/code/src/extract.rs @@ -1,9 +1,21 @@ use std::path::Path; use anyhow::Result; -use tree_sitter::Tree; +use tree_sitter::{Language, Query, Tree}; -use super::schema::{Call, Definition, Import}; +use super::index::{Call, Definition, Import}; + +pub struct QuerySet { + pub definitions: Query, + pub calls: Query, + pub imports: Option, +} + +impl QuerySet { + pub fn load(_language: Language, _lang_name: &str) -> Result { + todo!("load and compile queries for language") + } +} pub struct Extractor { _language: String, diff --git a/crates/code/src/grammar.rs b/crates/code/src/grammar.rs new file mode 100644 index 0000000..35229fa --- /dev/null +++ b/crates/code/src/grammar.rs @@ -0,0 +1,439 @@ +use std::collections::HashMap; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::sync::{Mutex, OnceLock}; + +use anyhow::{bail, Context, Result}; +use git2::Repository; +use libloading::{Library, Symbol}; +use once_cell::sync::Lazy; +use serde::Deserialize; +use tree_sitter::ffi::TSLanguage; +use tree_sitter::Language; + +type LanguageFn = unsafe extern "C" fn() -> *const TSLanguage; + +static REGISTRY: OnceLock = OnceLock::new(); +static LOADED_LANGUAGES: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::new())); +static LOADED_LIBRARIES: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); + +#[derive(Debug, Clone, Deserialize)] +pub struct LanguageEntry { + pub name: String, + pub extensions: Vec, + pub repo: String, + pub branch: String, + pub symbol: String, + pub subpath: Option, + pub definition_query: String, + pub call_query: String, + pub import_query: String, +} + +#[derive(Debug, Deserialize)] +struct RegistryFile { + language: Vec, +} + +pub struct Registry { + languages: Vec, + by_name: HashMap, + by_extension: HashMap, +} + +impl Registry { + pub fn load() -> Result { + let registry_toml = include_str!("../../../registry.toml"); + Self::from_str(registry_toml) + } + + fn from_str(content: &str) -> Result { + let file: RegistryFile = + toml::from_str(content).context("failed to parse registry.toml")?; + + let mut by_name = HashMap::new(); + let mut by_extension = HashMap::new(); + + for (idx, lang) in file.language.iter().enumerate() { + by_name.insert(lang.name.clone(), idx); + for ext in &lang.extensions { + by_extension.insert(ext.clone(), idx); + } + } + + Ok(Self { + languages: file.language, + by_name, + by_extension, + }) + } + + pub fn global() -> &'static Registry { + REGISTRY.get_or_init(|| Self::load().expect("failed to load registry")) + } + + pub fn get(&self, name: &str) -> Option<&LanguageEntry> { + self.by_name.get(name).map(|&idx| &self.languages[idx]) + } + + pub fn get_by_extension(&self, ext: &str) -> Option<&LanguageEntry> { + self.by_extension.get(ext).map(|&idx| &self.languages[idx]) + } + + pub fn languages(&self) -> &[LanguageEntry] { + &self.languages + } +} + +fn sources_dir() -> PathBuf { + cache_dir().join("sources") +} + +pub fn fetch_grammar(lang: &LanguageEntry) -> Result { + let sources = sources_dir(); + fs::create_dir_all(&sources)?; + + let dest = sources.join(&lang.name); + + if dest.exists() { + return Ok(dest); + } + + Repository::clone(&lang.repo, &dest) + .with_context(|| format!("failed to clone grammar repo: {}", lang.repo))?; + + let repo = Repository::open(&dest)?; + let (object, reference) = repo.revparse_ext(&lang.branch)?; + repo.checkout_tree(&object, None)?; + match reference { + Some(r) => repo.set_head(r.name().unwrap())?, + None => repo.set_head_detached(object.id())?, + } + + Ok(dest) +} + +pub fn compile_grammar(lang: &LanguageEntry, grammar_dir: &Path) -> Result { + let output_dir = cache_dir(); + fs::create_dir_all(&output_dir)?; + + let lib_name = format!("tree-sitter-{}", lang.name); + let output_path = output_dir.join(lib_filename(&lib_name)); + + if output_path.exists() { + return Ok(output_path); + } + + let src_dir = match &lang.subpath { + Some(subpath) => grammar_dir.join(subpath).join("src"), + None => grammar_dir.join("src"), + }; + + let parser_c = src_dir.join("parser.c"); + if !parser_c.exists() { + bail!("parser.c not found at: {}", parser_c.display()); + } + + let temp_dir = tempfile::tempdir()?; + let mut objects = Vec::new(); + + objects.push(compile_c_file(&parser_c, &src_dir, temp_dir.path())?); + + let scanner_c = src_dir.join("scanner.c"); + if scanner_c.exists() { + objects.push(compile_c_file(&scanner_c, &src_dir, temp_dir.path())?); + } + + let scanner_cc = src_dir.join("scanner.cc"); + if scanner_cc.exists() { + objects.push(compile_cpp_file(&scanner_cc, &src_dir, temp_dir.path())?); + } + + link_shared_library(&objects, &output_path)?; + + Ok(output_path) +} + +fn compile_c_file(source: &Path, include_dir: &Path, out_dir: &Path) -> Result { + let obj_name = source.file_stem().unwrap().to_string_lossy(); + let obj_path = out_dir.join(format!("{}.o", obj_name)); + + let status = Command::new("cc") + .args(["-c", "-O3", "-fPIC", "-w"]) + .arg("-I") + .arg(include_dir) + .arg("-o") + .arg(&obj_path) + .arg(source) + .status() + .context("failed to run cc")?; + + if !status.success() { + bail!("failed to compile: {}", source.display()); + } + + Ok(obj_path) +} + +fn compile_cpp_file(source: &Path, include_dir: &Path, out_dir: &Path) -> Result { + let obj_name = source.file_stem().unwrap().to_string_lossy(); + let obj_path = out_dir.join(format!("{}.o", obj_name)); + + let status = Command::new("c++") + .args(["-c", "-O3", "-fPIC", "-w"]) + .arg("-I") + .arg(include_dir) + .arg("-o") + .arg(&obj_path) + .arg(source) + .status() + .context("failed to run c++")?; + + if !status.success() { + bail!("failed to compile: {}", source.display()); + } + + Ok(obj_path) +} + +fn link_shared_library(objects: &[PathBuf], output: &Path) -> Result<()> { + let mut cmd = if cfg!(target_os = "macos") { + let mut c = Command::new("cc"); + c.args(["-dynamiclib", "-undefined", "dynamic_lookup"]); + c + } else if cfg!(target_os = "windows") { + let mut c = Command::new("link"); + c.arg("/DLL"); + c + } else { + let mut c = Command::new("cc"); + c.arg("-shared"); + c + }; + + for obj in objects { + cmd.arg(obj); + } + + if cfg!(target_os = "windows") { + cmd.arg(format!("/OUT:{}", output.display())); + } else { + cmd.arg("-o").arg(output); + } + + let status = cmd.status().context("failed to link shared library")?; + + if !status.success() { + bail!("failed to link shared library: {}", output.display()); + } + + Ok(()) +} + +fn lib_filename(name: &str) -> String { + if cfg!(target_os = "macos") { + format!("lib{}.dylib", name) + } else if cfg!(target_os = "windows") { + format!("{}.dll", name) + } else { + format!("lib{}.so", name) + } +} + +pub fn cache_dir() -> PathBuf { + dirs::data_local_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join("glimpse") + .join("grammars") +} + +pub fn load_language(name: &str) -> Result { + { + let cache = LOADED_LANGUAGES.lock().unwrap(); + if let Some(lang) = cache.get(name) { + return Ok(lang.clone()); + } + } + + let registry = Registry::global(); + let entry = registry + .get(name) + .with_context(|| format!("unknown language: {}", name))?; + + load_language_entry(entry) +} + +pub fn load_language_by_extension(ext: &str) -> Result { + let registry = Registry::global(); + let entry = registry + .get_by_extension(ext) + .with_context(|| format!("no language for extension: {}", ext))?; + + { + let cache = LOADED_LANGUAGES.lock().unwrap(); + if let Some(lang) = cache.get(&entry.name) { + return Ok(lang.clone()); + } + } + + load_language_entry(entry) +} + +fn load_language_entry(entry: &LanguageEntry) -> Result { + let lib_path = compiled_lib_path(entry); + + if !lib_path.exists() { + let grammar_dir = fetch_grammar(entry)?; + compile_grammar(entry, &grammar_dir)?; + } + + if !lib_path.exists() { + bail!("compiled grammar not found: {}", lib_path.display()); + } + + let language = unsafe { load_language_from_lib(&lib_path, &entry.symbol) }?; + + { + let mut cache = LOADED_LANGUAGES.lock().unwrap(); + cache.insert(entry.name.clone(), language.clone()); + } + + Ok(language) +} + +fn compiled_lib_path(entry: &LanguageEntry) -> PathBuf { + let lib_name = format!("tree-sitter-{}", entry.name); + cache_dir().join(lib_filename(&lib_name)) +} + +unsafe fn load_language_from_lib(lib_path: &PathBuf, symbol: &str) -> Result { + let lib = Library::new(lib_path) + .with_context(|| format!("failed to load library: {}", lib_path.display()))?; + + let func: Symbol = lib + .get(symbol.as_bytes()) + .with_context(|| format!("symbol not found: {}", symbol))?; + + let lang_ptr = func(); + let language = Language::from_raw(lang_ptr); + + LOADED_LIBRARIES.lock().unwrap().push(lib); + + Ok(language) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_load_registry() { + let registry = Registry::load().expect("failed to load registry"); + assert!(!registry.languages.is_empty()); + } + + #[test] + fn test_get_rust() { + let registry = Registry::load().unwrap(); + let rust = registry.get("rust").expect("rust language not found"); + assert_eq!(rust.name, "rust"); + assert!(rust.extensions.contains(&"rs".to_string())); + assert_eq!(rust.symbol, "tree_sitter_rust"); + } + + #[test] + fn test_get_by_extension() { + let registry = Registry::load().unwrap(); + let rust = registry + .get_by_extension("rs") + .expect("rs extension not found"); + assert_eq!(rust.name, "rust"); + } + + #[test] + fn test_typescript_subpath() { + let registry = Registry::load().unwrap(); + let ts = registry.get("typescript").expect("typescript not found"); + assert_eq!(ts.subpath, Some("typescript".to_string())); + } + + #[test] + fn test_all_languages_have_queries() { + let registry = Registry::load().unwrap(); + for lang in registry.languages() { + assert!(!lang.definition_query.is_empty(), "{} missing definition_query", lang.name); + assert!(!lang.call_query.is_empty(), "{} missing call_query", lang.name); + assert!(!lang.import_query.is_empty(), "{} missing import_query", lang.name); + } + } + + #[test] + fn test_cache_dir() { + let dir = cache_dir(); + assert!(dir.to_string_lossy().contains("glimpse")); + assert!(dir.ends_with("grammars")); + } + + #[test] + fn test_sources_dir() { + let dir = sources_dir(); + assert!(dir.ends_with("grammars/sources")); + } + + #[test] + fn test_lib_filename() { + let name = "tree-sitter-rust"; + let filename = lib_filename(name); + if cfg!(target_os = "macos") { + assert_eq!(filename, "libtree-sitter-rust.dylib"); + } else if cfg!(target_os = "windows") { + assert_eq!(filename, "tree-sitter-rust.dll"); + } else { + assert_eq!(filename, "libtree-sitter-rust.so"); + } + } + + #[test] + fn test_compiled_lib_path() { + let registry = Registry::global(); + let rust = registry.get("rust").unwrap(); + let path = compiled_lib_path(rust); + assert!(path.to_string_lossy().contains("tree-sitter-rust")); + } + + #[test] + #[ignore] + fn test_load_rust_grammar() { + let language = load_language("rust").expect("failed to load rust grammar"); + let mut parser = tree_sitter::Parser::new(); + parser.set_language(&language).expect("failed to set language"); + + let source = "fn main() { println!(\"Hello\"); }"; + let tree = parser.parse(source, None).expect("failed to parse"); + let root = tree.root_node(); + + assert_eq!(root.kind(), "source_file"); + assert!(root.child_count() > 0); + } + + #[test] + #[ignore] + fn test_load_by_extension() { + let language = load_language_by_extension("rs").expect("failed to load by extension"); + let mut parser = tree_sitter::Parser::new(); + parser.set_language(&language).expect("failed to set language"); + + let tree = parser.parse("fn foo() {}", None).expect("failed to parse"); + assert_eq!(tree.root_node().kind(), "source_file"); + } + + #[test] + #[ignore] + fn test_language_caching() { + let lang1 = load_language("rust").expect("first load failed"); + let lang2 = load_language("rust").expect("second load failed"); + assert_eq!(lang1.node_kind_count(), lang2.node_kind_count()); + } +} diff --git a/crates/code/src/graph.rs b/crates/code/src/graph.rs index 194c962..32881c6 100644 --- a/crates/code/src/graph.rs +++ b/crates/code/src/graph.rs @@ -2,8 +2,7 @@ use std::collections::{HashMap, HashSet}; use anyhow::Result; -use super::index::Index; -use super::schema::Definition; +use super::index::{Definition, Index}; pub type NodeId = usize; diff --git a/crates/code/src/heuristics.rs b/crates/code/src/heuristics.rs deleted file mode 100644 index 7647cbd..0000000 --- a/crates/code/src/heuristics.rs +++ /dev/null @@ -1,25 +0,0 @@ -use std::path::PathBuf; - -use anyhow::Result; - -use super::index::Index; -use super::schema::Definition; - -pub fn resolve_same_file( - _callee: &str, - _file: &PathBuf, - _index: &Index, -) -> Option { - todo!("look for definition in same file") -} - -pub fn resolve_by_index( - _callee: &str, - _index: &Index, -) -> Option { - todo!("search index for definition by name") -} - -pub fn resolve_by_search(_callee: &str, _root: &PathBuf) -> Result> { - todo!("fallback to ripgrep search") -} diff --git a/crates/code/src/imports.rs b/crates/code/src/imports.rs deleted file mode 100644 index 2e9806a..0000000 --- a/crates/code/src/imports.rs +++ /dev/null @@ -1,14 +0,0 @@ -use std::path::PathBuf; - -use anyhow::Result; - -use super::index::Index; -use super::schema::Definition; - -pub fn resolve_import( - _import_path: &str, - _from_file: &PathBuf, - _index: &Index, -) -> Result> { - todo!("resolve use statement to definition") -} diff --git a/crates/code/src/index.rs b/crates/code/src/index.rs index e3095b3..e1e6aa7 100644 --- a/crates/code/src/index.rs +++ b/crates/code/src/index.rs @@ -1,10 +1,65 @@ use std::collections::HashMap; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use anyhow::Result; use serde::{Deserialize, Serialize}; -use super::schema::FileRecord; +pub const INDEX_DIR: &str = ".glimpse-index"; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Span { + pub start_byte: usize, + pub end_byte: usize, + pub start_line: usize, + pub end_line: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Definition { + pub name: String, + pub kind: DefinitionKind, + pub span: Span, + pub file: PathBuf, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum DefinitionKind { + Function, + Method, + Class, + Struct, + Enum, + Trait, + Interface, + Module, + Other(String), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Call { + pub callee: String, + pub span: Span, + pub file: PathBuf, + pub caller: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Import { + pub module_path: String, + pub alias: Option, + pub span: Span, + pub file: PathBuf, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileRecord { + pub path: PathBuf, + pub mtime: u64, + pub size: u64, + pub definitions: Vec, + pub calls: Vec, + pub imports: Vec, +} #[derive(Debug, Default, Serialize, Deserialize)] pub struct Index { @@ -28,3 +83,11 @@ impl Index { todo!("add or update file record") } } + +pub fn save_index(_index: &Index, _root: &Path) -> Result<()> { + todo!("serialize index with bincode") +} + +pub fn load_index(_root: &Path) -> Result> { + todo!("deserialize index from .glimpse-index/") +} diff --git a/crates/code/src/lib.rs b/crates/code/src/lib.rs index 77d690c..3bc6f18 100644 --- a/crates/code/src/lib.rs +++ b/crates/code/src/lib.rs @@ -1,13 +1,5 @@ -pub mod compile; pub mod extract; +pub mod grammar; pub mod graph; -pub mod heuristics; -pub mod imports; pub mod index; -pub mod loader; -pub mod queries; -pub mod registry; pub mod resolve; -pub mod schema; -pub mod storage; -pub mod workspace; diff --git a/crates/code/src/loader.rs b/crates/code/src/loader.rs deleted file mode 100644 index c5fb75e..0000000 --- a/crates/code/src/loader.rs +++ /dev/null @@ -1,193 +0,0 @@ -use std::collections::HashMap; -use std::path::PathBuf; -use std::sync::Mutex; - -use anyhow::{bail, Context, Result}; -use libloading::{Library, Symbol}; -use once_cell::sync::Lazy; -use tree_sitter::ffi::TSLanguage; -use tree_sitter::Language; - -use super::compile::{compile_grammar, fetch_grammar}; -use super::registry::{LanguageEntry, Registry}; - -type LanguageFn = unsafe extern "C" fn() -> *const TSLanguage; - -static LOADED_LANGUAGES: Lazy>> = - Lazy::new(|| Mutex::new(HashMap::new())); - -static LOADED_LIBRARIES: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); - -pub fn cache_dir() -> PathBuf { - dirs::data_local_dir() - .unwrap_or_else(|| PathBuf::from(".")) - .join("glimpse") - .join("grammars") -} - -pub fn load_language(name: &str) -> Result { - { - let cache = LOADED_LANGUAGES.lock().unwrap(); - if let Some(lang) = cache.get(name) { - return Ok(lang.clone()); - } - } - - let registry = Registry::global(); - let entry = registry - .get(name) - .with_context(|| format!("unknown language: {}", name))?; - - load_language_entry(entry) -} - -pub fn load_language_by_extension(ext: &str) -> Result { - let registry = Registry::global(); - let entry = registry - .get_by_extension(ext) - .with_context(|| format!("no language for extension: {}", ext))?; - - { - let cache = LOADED_LANGUAGES.lock().unwrap(); - if let Some(lang) = cache.get(&entry.name) { - return Ok(lang.clone()); - } - } - - load_language_entry(entry) -} - -fn load_language_entry(entry: &LanguageEntry) -> Result { - let lib_path = compiled_lib_path(entry); - - if !lib_path.exists() { - let grammar_dir = fetch_grammar(entry)?; - compile_grammar(entry, &grammar_dir)?; - } - - if !lib_path.exists() { - bail!("compiled grammar not found: {}", lib_path.display()); - } - - let language = unsafe { load_language_from_lib(&lib_path, &entry.symbol) }?; - - { - let mut cache = LOADED_LANGUAGES.lock().unwrap(); - cache.insert(entry.name.clone(), language.clone()); - } - - Ok(language) -} - -fn compiled_lib_path(entry: &LanguageEntry) -> PathBuf { - let lib_name = format!("tree-sitter-{}", entry.name); - cache_dir().join(lib_filename(&lib_name)) -} - -fn lib_filename(name: &str) -> String { - if cfg!(target_os = "macos") { - format!("lib{}.dylib", name) - } else if cfg!(target_os = "windows") { - format!("{}.dll", name) - } else { - format!("lib{}.so", name) - } -} - -unsafe fn load_language_from_lib(lib_path: &PathBuf, symbol: &str) -> Result { - let lib = Library::new(lib_path) - .with_context(|| format!("failed to load library: {}", lib_path.display()))?; - - let func: Symbol = lib - .get(symbol.as_bytes()) - .with_context(|| format!("symbol not found: {}", symbol))?; - - let lang_ptr = func(); - let language = Language::from_raw(lang_ptr); - - LOADED_LIBRARIES.lock().unwrap().push(lib); - - Ok(language) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_cache_dir() { - let dir = cache_dir(); - assert!(dir.to_string_lossy().contains("glimpse")); - assert!(dir.ends_with("grammars")); - } - - #[test] - fn test_lib_filename() { - let name = "tree-sitter-rust"; - let filename = lib_filename(name); - - if cfg!(target_os = "macos") { - assert_eq!(filename, "libtree-sitter-rust.dylib"); - } else if cfg!(target_os = "windows") { - assert_eq!(filename, "tree-sitter-rust.dll"); - } else { - assert_eq!(filename, "libtree-sitter-rust.so"); - } - } - - #[test] - fn test_compiled_lib_path() { - let registry = Registry::global(); - let rust = registry.get("rust").unwrap(); - let path = compiled_lib_path(rust); - assert!(path.to_string_lossy().contains("tree-sitter-rust")); - } - - #[test] - #[ignore] - fn test_load_rust_grammar() { - let language = load_language("rust").expect("failed to load rust grammar"); - - let mut parser = tree_sitter::Parser::new(); - parser - .set_language(&language) - .expect("failed to set language"); - - let source = r#" -fn main() { - println!("Hello, world!"); -} -"#; - - let tree = parser.parse(source, None).expect("failed to parse"); - let root = tree.root_node(); - - assert_eq!(root.kind(), "source_file"); - assert!(root.child_count() > 0); - - let func = root.child(0).expect("expected function"); - assert_eq!(func.kind(), "function_item"); - } - - #[test] - #[ignore] - fn test_load_by_extension() { - let language = load_language_by_extension("rs").expect("failed to load by extension"); - - let mut parser = tree_sitter::Parser::new(); - parser - .set_language(&language) - .expect("failed to set language"); - - let tree = parser.parse("fn foo() {}", None).expect("failed to parse"); - assert_eq!(tree.root_node().kind(), "source_file"); - } - - #[test] - #[ignore] - fn test_language_caching() { - let lang1 = load_language("rust").expect("first load failed"); - let lang2 = load_language("rust").expect("second load failed"); - assert_eq!(lang1.node_kind_count(), lang2.node_kind_count()); - } -} diff --git a/crates/code/src/queries.rs b/crates/code/src/queries.rs deleted file mode 100644 index 39319d7..0000000 --- a/crates/code/src/queries.rs +++ /dev/null @@ -1,14 +0,0 @@ -use anyhow::Result; -use tree_sitter::{Language, Query}; - -pub struct QuerySet { - pub definitions: Query, - pub calls: Query, - pub imports: Option, -} - -impl QuerySet { - pub fn load(_language: Language, _lang_name: &str) -> Result { - todo!("load and compile queries for language") - } -} diff --git a/crates/code/src/registry.rs b/crates/code/src/registry.rs deleted file mode 100644 index 70dbc9f..0000000 --- a/crates/code/src/registry.rs +++ /dev/null @@ -1,133 +0,0 @@ -use std::collections::HashMap; -use std::sync::OnceLock; - -use anyhow::{Context, Result}; -use serde::Deserialize; - -static REGISTRY: OnceLock = OnceLock::new(); - -#[derive(Debug, Clone, Deserialize)] -pub struct LanguageEntry { - pub name: String, - pub extensions: Vec, - pub repo: String, - pub branch: String, - pub symbol: String, - pub subpath: Option, - pub definition_query: String, - pub call_query: String, - pub import_query: String, -} - -#[derive(Debug, Deserialize)] -struct RegistryFile { - language: Vec, -} - -pub struct Registry { - languages: Vec, - by_name: HashMap, - by_extension: HashMap, -} - -impl Registry { - pub fn load() -> Result { - let registry_toml = include_str!("../../../registry.toml"); - Self::from_str(registry_toml) - } - - fn from_str(content: &str) -> Result { - let file: RegistryFile = - toml::from_str(content).context("failed to parse registry.toml")?; - - let mut by_name = HashMap::new(); - let mut by_extension = HashMap::new(); - - for (idx, lang) in file.language.iter().enumerate() { - by_name.insert(lang.name.clone(), idx); - for ext in &lang.extensions { - by_extension.insert(ext.clone(), idx); - } - } - - Ok(Self { - languages: file.language, - by_name, - by_extension, - }) - } - - pub fn global() -> &'static Registry { - REGISTRY.get_or_init(|| Self::load().expect("failed to load registry")) - } - - pub fn get(&self, name: &str) -> Option<&LanguageEntry> { - self.by_name.get(name).map(|&idx| &self.languages[idx]) - } - - pub fn get_by_extension(&self, ext: &str) -> Option<&LanguageEntry> { - self.by_extension.get(ext).map(|&idx| &self.languages[idx]) - } - - pub fn languages(&self) -> &[LanguageEntry] { - &self.languages - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_load_registry() { - let registry = Registry::load().expect("failed to load registry"); - assert!(!registry.languages.is_empty()); - } - - #[test] - fn test_get_rust() { - let registry = Registry::load().unwrap(); - let rust = registry.get("rust").expect("rust language not found"); - assert_eq!(rust.name, "rust"); - assert!(rust.extensions.contains(&"rs".to_string())); - assert_eq!(rust.symbol, "tree_sitter_rust"); - } - - #[test] - fn test_get_by_extension() { - let registry = Registry::load().unwrap(); - let rust = registry - .get_by_extension("rs") - .expect("rs extension not found"); - assert_eq!(rust.name, "rust"); - } - - #[test] - fn test_typescript_subpath() { - let registry = Registry::load().unwrap(); - let ts = registry.get("typescript").expect("typescript not found"); - assert_eq!(ts.subpath, Some("typescript".to_string())); - } - - #[test] - fn test_all_languages_have_queries() { - let registry = Registry::load().unwrap(); - for lang in registry.languages() { - assert!( - !lang.definition_query.is_empty(), - "{} missing definition_query", - lang.name - ); - assert!( - !lang.call_query.is_empty(), - "{} missing call_query", - lang.name - ); - assert!( - !lang.import_query.is_empty(), - "{} missing import_query", - lang.name - ); - } - } -} diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index 91907fb..81d4039 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -1,10 +1,44 @@ -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use anyhow::Result; -use super::index::Index; -use super::schema::{Call, Definition}; -use super::workspace::Workspace; +use super::index::{Call, Definition, Index}; + +#[derive(Debug, Clone)] +pub struct Workspace { + pub root: PathBuf, + pub members: Vec, +} + +impl Workspace { + pub fn discover(_root: &Path) -> Result { + todo!("parse Cargo.toml and discover workspace members") + } + + pub fn resolve_crate(&self, _crate_name: &str) -> Option { + todo!("resolve crate name to path within workspace") + } +} + +pub fn resolve_import( + _import_path: &str, + _from_file: &PathBuf, + _index: &Index, +) -> Result> { + todo!("resolve use statement to definition") +} + +pub fn resolve_same_file(_callee: &str, _file: &PathBuf, _index: &Index) -> Option { + todo!("look for definition in same file") +} + +pub fn resolve_by_index(_callee: &str, _index: &Index) -> Option { + todo!("search index for definition by name") +} + +pub fn resolve_by_search(_callee: &str, _root: &PathBuf) -> Result> { + todo!("fallback to ripgrep search") +} pub struct Resolver { _index: Index, diff --git a/crates/code/src/schema.rs b/crates/code/src/schema.rs deleted file mode 100644 index 4a5bd9c..0000000 --- a/crates/code/src/schema.rs +++ /dev/null @@ -1,58 +0,0 @@ -use std::path::PathBuf; - -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Span { - pub start_byte: usize, - pub end_byte: usize, - pub start_line: usize, - pub end_line: usize, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Definition { - pub name: String, - pub kind: DefinitionKind, - pub span: Span, - pub file: PathBuf, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum DefinitionKind { - Function, - Method, - Class, - Struct, - Enum, - Trait, - Interface, - Module, - Other(String), -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Call { - pub callee: String, - pub span: Span, - pub file: PathBuf, - pub caller: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Import { - pub module_path: String, - pub alias: Option, - pub span: Span, - pub file: PathBuf, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct FileRecord { - pub path: PathBuf, - pub mtime: u64, - pub size: u64, - pub definitions: Vec, - pub calls: Vec, - pub imports: Vec, -} diff --git a/crates/code/src/storage.rs b/crates/code/src/storage.rs deleted file mode 100644 index 803f128..0000000 --- a/crates/code/src/storage.rs +++ /dev/null @@ -1,15 +0,0 @@ -use std::path::Path; - -use anyhow::Result; - -use super::index::Index; - -pub const INDEX_DIR: &str = ".glimpse-index"; - -pub fn save_index(_index: &Index, _root: &Path) -> Result<()> { - todo!("serialize index with bincode") -} - -pub fn load_index(_root: &Path) -> Result> { - todo!("deserialize index from .glimpse-index/") -} diff --git a/crates/code/src/workspace.rs b/crates/code/src/workspace.rs deleted file mode 100644 index 3b378f6..0000000 --- a/crates/code/src/workspace.rs +++ /dev/null @@ -1,19 +0,0 @@ -use std::path::{Path, PathBuf}; - -use anyhow::Result; - -#[derive(Debug, Clone)] -pub struct Workspace { - pub root: PathBuf, - pub members: Vec, -} - -impl Workspace { - pub fn discover(_root: &Path) -> Result { - todo!("parse Cargo.toml and discover workspace members") - } - - pub fn resolve_crate(&self, _crate_name: &str) -> Option { - todo!("resolve crate name to path within workspace") - } -} From 78f2b3eaa416cdac94a4d1e340d71c31127f9a3b Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 20:08:45 +0530 Subject: [PATCH 08/35] feat: implement index storage with bincode serialization --- crates/code/src/index.rs | 204 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 194 insertions(+), 10 deletions(-) diff --git a/crates/code/src/index.rs b/crates/code/src/index.rs index e1e6aa7..0b37ce6 100644 --- a/crates/code/src/index.rs +++ b/crates/code/src/index.rs @@ -1,10 +1,15 @@ use std::collections::HashMap; +use std::fs::{self, File}; +use std::io::{BufReader, BufWriter}; use std::path::{Path, PathBuf}; +use std::time::SystemTime; -use anyhow::Result; +use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; pub const INDEX_DIR: &str = ".glimpse-index"; +pub const INDEX_FILE: &str = "index.bin"; +pub const INDEX_VERSION: u32 = 1; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Span { @@ -71,23 +76,202 @@ impl Index { pub fn new() -> Self { Self { files: HashMap::new(), - version: 1, + version: INDEX_VERSION, } } - pub fn is_stale(&self, _path: &PathBuf, _mtime: u64, _size: u64) -> bool { - todo!("check if file needs re-indexing") + pub fn is_stale(&self, path: &Path, mtime: u64, size: u64) -> bool { + match self.files.get(path) { + Some(record) => record.mtime != mtime || record.size != size, + None => true, + } + } + + pub fn update(&mut self, record: FileRecord) { + self.files.insert(record.path.clone(), record); + } + + pub fn remove(&mut self, path: &Path) { + self.files.remove(path); + } + + pub fn get(&self, path: &Path) -> Option<&FileRecord> { + self.files.get(path) + } + + pub fn definitions(&self) -> impl Iterator { + self.files.values().flat_map(|f| &f.definitions) } - pub fn update(&mut self, _record: FileRecord) -> Result<()> { - todo!("add or update file record") + pub fn calls(&self) -> impl Iterator { + self.files.values().flat_map(|f| &f.calls) } + + pub fn imports(&self) -> impl Iterator { + self.files.values().flat_map(|f| &f.imports) + } +} + +pub fn file_fingerprint(path: &Path) -> Result<(u64, u64)> { + let meta = fs::metadata(path).with_context(|| format!("failed to stat {}", path.display()))?; + let mtime = meta + .modified() + .unwrap_or(SystemTime::UNIX_EPOCH) + .duration_since(SystemTime::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + let size = meta.len(); + Ok((mtime, size)) +} + +pub fn index_path(root: &Path) -> PathBuf { + root.join(INDEX_DIR).join(INDEX_FILE) } -pub fn save_index(_index: &Index, _root: &Path) -> Result<()> { - todo!("serialize index with bincode") +pub fn save_index(index: &Index, root: &Path) -> Result<()> { + let dir = root.join(INDEX_DIR); + fs::create_dir_all(&dir).with_context(|| format!("failed to create {}", dir.display()))?; + + let path = dir.join(INDEX_FILE); + let file = File::create(&path).with_context(|| format!("failed to create {}", path.display()))?; + let writer = BufWriter::new(file); + + bincode::serialize_into(writer, index).context("failed to serialize index")?; + Ok(()) } -pub fn load_index(_root: &Path) -> Result> { - todo!("deserialize index from .glimpse-index/") +pub fn load_index(root: &Path) -> Result> { + let path = index_path(root); + if !path.exists() { + return Ok(None); + } + + let file = File::open(&path).with_context(|| format!("failed to open {}", path.display()))?; + let reader = BufReader::new(file); + + let index: Index = bincode::deserialize_from(reader).context("failed to deserialize index")?; + + if index.version != INDEX_VERSION { + return Ok(None); + } + + Ok(Some(index)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_test_record(name: &str) -> FileRecord { + FileRecord { + path: PathBuf::from(format!("src/{}.rs", name)), + mtime: 1234567890, + size: 1024, + definitions: vec![Definition { + name: format!("{}_fn", name), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 100, + start_line: 1, + end_line: 10, + }, + file: PathBuf::from(format!("src/{}.rs", name)), + }], + calls: vec![Call { + callee: "other_fn".to_string(), + span: Span { + start_byte: 50, + end_byte: 60, + start_line: 5, + end_line: 5, + }, + file: PathBuf::from(format!("src/{}.rs", name)), + caller: Some(format!("{}_fn", name)), + }], + imports: vec![Import { + module_path: "std::fs".to_string(), + alias: None, + span: Span { + start_byte: 0, + end_byte: 15, + start_line: 1, + end_line: 1, + }, + file: PathBuf::from(format!("src/{}.rs", name)), + }], + } + } + + #[test] + fn test_index_update_and_get() { + let mut index = Index::new(); + let record = make_test_record("main"); + + index.update(record.clone()); + let got = index.get(Path::new("src/main.rs")).unwrap(); + + assert_eq!(got.path, record.path); + assert_eq!(got.definitions.len(), 1); + assert_eq!(got.calls.len(), 1); + assert_eq!(got.imports.len(), 1); + } + + #[test] + fn test_index_is_stale() { + let mut index = Index::new(); + let record = make_test_record("lib"); + index.update(record); + + assert!(!index.is_stale(Path::new("src/lib.rs"), 1234567890, 1024)); + assert!(index.is_stale(Path::new("src/lib.rs"), 1234567891, 1024)); + assert!(index.is_stale(Path::new("src/lib.rs"), 1234567890, 2048)); + assert!(index.is_stale(Path::new("src/other.rs"), 1234567890, 1024)); + } + + #[test] + fn test_index_remove() { + let mut index = Index::new(); + index.update(make_test_record("foo")); + index.update(make_test_record("bar")); + + assert!(index.get(Path::new("src/foo.rs")).is_some()); + index.remove(Path::new("src/foo.rs")); + assert!(index.get(Path::new("src/foo.rs")).is_none()); + assert!(index.get(Path::new("src/bar.rs")).is_some()); + } + + #[test] + fn test_index_iterators() { + let mut index = Index::new(); + index.update(make_test_record("a")); + index.update(make_test_record("b")); + + assert_eq!(index.definitions().count(), 2); + assert_eq!(index.calls().count(), 2); + assert_eq!(index.imports().count(), 2); + } + + #[test] + fn test_save_and_load_index() { + let dir = tempfile::tempdir().unwrap(); + let mut index = Index::new(); + index.update(make_test_record("main")); + index.update(make_test_record("lib")); + + save_index(&index, dir.path()).unwrap(); + + let loaded = load_index(dir.path()).unwrap().unwrap(); + assert_eq!(loaded.version, INDEX_VERSION); + assert_eq!(loaded.files.len(), 2); + assert!(loaded.get(Path::new("src/main.rs")).is_some()); + assert!(loaded.get(Path::new("src/lib.rs")).is_some()); + } + + #[test] + fn test_load_index_nonexistent() { + let dir = tempfile::tempdir().unwrap(); + let result = load_index(dir.path()).unwrap(); + assert!(result.is_none()); + } } From b84e6d033c26e66102de59ad1b4fd280a2d5c783 Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 20:15:27 +0530 Subject: [PATCH 09/35] refactor: store index in local data dir instead of project --- crates/code/src/index.rs | 67 +++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/crates/code/src/index.rs b/crates/code/src/index.rs index 0b37ce6..8d8218b 100644 --- a/crates/code/src/index.rs +++ b/crates/code/src/index.rs @@ -1,5 +1,7 @@ use std::collections::HashMap; +use std::collections::hash_map::DefaultHasher; use std::fs::{self, File}; +use std::hash::{Hash, Hasher}; use std::io::{BufReader, BufWriter}; use std::path::{Path, PathBuf}; use std::time::SystemTime; @@ -7,7 +9,6 @@ use std::time::SystemTime; use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; -pub const INDEX_DIR: &str = ".glimpse-index"; pub const INDEX_FILE: &str = "index.bin"; pub const INDEX_VERSION: u32 = 1; @@ -124,15 +125,29 @@ pub fn file_fingerprint(path: &Path) -> Result<(u64, u64)> { Ok((mtime, size)) } -pub fn index_path(root: &Path) -> PathBuf { - root.join(INDEX_DIR).join(INDEX_FILE) +fn hash_path(path: &Path) -> String { + let mut hasher = DefaultHasher::new(); + path.hash(&mut hasher); + format!("{:016x}", hasher.finish()) +} + +fn index_dir() -> Result { + dirs::data_local_dir() + .map(|d| d.join("glimpse").join("indices")) + .context("could not determine local data directory") +} + +pub fn index_path(root: &Path) -> Result { + let canonical = root.canonicalize().unwrap_or_else(|_| root.to_path_buf()); + let hash = hash_path(&canonical); + Ok(index_dir()?.join(hash).join(INDEX_FILE)) } pub fn save_index(index: &Index, root: &Path) -> Result<()> { - let dir = root.join(INDEX_DIR); - fs::create_dir_all(&dir).with_context(|| format!("failed to create {}", dir.display()))?; + let path = index_path(root)?; + let dir = path.parent().unwrap(); + fs::create_dir_all(dir).with_context(|| format!("failed to create {}", dir.display()))?; - let path = dir.join(INDEX_FILE); let file = File::create(&path).with_context(|| format!("failed to create {}", path.display()))?; let writer = BufWriter::new(file); @@ -141,7 +156,7 @@ pub fn save_index(index: &Index, root: &Path) -> Result<()> { } pub fn load_index(root: &Path) -> Result> { - let path = index_path(root); + let path = index_path(root)?; if !path.exists() { return Ok(None); } @@ -149,7 +164,10 @@ pub fn load_index(root: &Path) -> Result> { let file = File::open(&path).with_context(|| format!("failed to open {}", path.display()))?; let reader = BufReader::new(file); - let index: Index = bincode::deserialize_from(reader).context("failed to deserialize index")?; + let index: Index = match bincode::deserialize_from(reader) { + Ok(idx) => idx, + Err(_) => return Ok(None), + }; if index.version != INDEX_VERSION { return Ok(None); @@ -158,6 +176,16 @@ pub fn load_index(root: &Path) -> Result> { Ok(Some(index)) } +pub fn clear_index(root: &Path) -> Result<()> { + let path = index_path(root)?; + if let Some(dir) = path.parent() { + if dir.exists() { + fs::remove_dir_all(dir).with_context(|| format!("failed to remove {}", dir.display()))?; + } + } + Ok(()) +} + #[cfg(test)] mod tests { use super::*; @@ -252,20 +280,37 @@ mod tests { assert_eq!(index.imports().count(), 2); } + #[test] + fn test_index_path_uses_data_dir() { + let path = index_path(Path::new("/some/project")).unwrap(); + let data_dir = dirs::data_local_dir().unwrap(); + assert!(path.starts_with(data_dir.join("glimpse").join("indices"))); + assert!(path.ends_with(INDEX_FILE)); + } + + #[test] + fn test_index_path_different_projects() { + let path1 = index_path(Path::new("/project/a")).unwrap(); + let path2 = index_path(Path::new("/project/b")).unwrap(); + assert_ne!(path1, path2); + } + #[test] fn test_save_and_load_index() { - let dir = tempfile::tempdir().unwrap(); + let project_dir = tempfile::tempdir().unwrap(); let mut index = Index::new(); index.update(make_test_record("main")); index.update(make_test_record("lib")); - save_index(&index, dir.path()).unwrap(); + save_index(&index, project_dir.path()).unwrap(); - let loaded = load_index(dir.path()).unwrap().unwrap(); + let loaded = load_index(project_dir.path()).unwrap().unwrap(); assert_eq!(loaded.version, INDEX_VERSION); assert_eq!(loaded.files.len(), 2); assert!(loaded.get(Path::new("src/main.rs")).is_some()); assert!(loaded.get(Path::new("src/lib.rs")).is_some()); + + clear_index(project_dir.path()).unwrap(); } #[test] From bb7bf79c7c06e4cf0ae486c45d28b77a913ecac9 Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 20:31:34 +0530 Subject: [PATCH 10/35] feat: implement tree-sitter extraction for all languages --- crates/code/src/extract.rs | 288 ++++++++++++++- crates/code/src/graph.rs | 1 + crates/code/src/resolve.rs | 7 +- crates/code/tests/extraction.rs | 633 ++++++++++++++++++++++++++++++++ 4 files changed, 912 insertions(+), 17 deletions(-) create mode 100644 crates/code/tests/extraction.rs diff --git a/crates/code/src/extract.rs b/crates/code/src/extract.rs index 6cc28fb..42c3f5b 100644 --- a/crates/code/src/extract.rs +++ b/crates/code/src/extract.rs @@ -1,40 +1,300 @@ use std::path::Path; -use anyhow::Result; -use tree_sitter::{Language, Query, Tree}; +use anyhow::{Context, Result}; +use tree_sitter::{Language, Node, Query, QueryCursor, StreamingIterator, Tree}; -use super::index::{Call, Definition, Import}; +use super::grammar::{LanguageEntry, Registry}; +use super::index::{Call, Definition, DefinitionKind, Import, Span}; pub struct QuerySet { pub definitions: Query, pub calls: Query, pub imports: Option, + def_name_idx: u32, + def_kind_indices: Vec<(u32, DefinitionKind)>, + call_name_idx: u32, + import_path_indices: Vec, + import_alias_idx: Option, } impl QuerySet { - pub fn load(_language: Language, _lang_name: &str) -> Result { - todo!("load and compile queries for language") + pub fn load(language: Language, entry: &LanguageEntry) -> Result { + let definitions = Query::new(&language, &entry.definition_query) + .with_context(|| format!("failed to compile definition query for {}", entry.name))?; + + let calls = Query::new(&language, &entry.call_query) + .with_context(|| format!("failed to compile call query for {}", entry.name))?; + + let imports = if entry.import_query.trim().is_empty() { + None + } else { + Some( + Query::new(&language, &entry.import_query) + .with_context(|| format!("failed to compile import query for {}", entry.name))?, + ) + }; + + let def_name_idx = definitions + .capture_index_for_name("name") + .unwrap_or(u32::MAX); + + let def_kind_indices = Self::build_definition_kind_indices(&definitions); + + let call_name_idx = calls.capture_index_for_name("name").unwrap_or(u32::MAX); + + let (import_path_indices, import_alias_idx) = if let Some(ref q) = imports { + let path_indices = ["path", "source", "system_path", "local_path", "module"] + .iter() + .filter_map(|name| q.capture_index_for_name(name)) + .collect(); + let alias = q.capture_index_for_name("alias"); + (path_indices, alias) + } else { + (vec![], None) + }; + + Ok(Self { + definitions, + calls, + imports, + def_name_idx, + def_kind_indices, + call_name_idx, + import_path_indices, + import_alias_idx, + }) + } + + fn build_definition_kind_indices(query: &Query) -> Vec<(u32, DefinitionKind)> { + let mut indices = Vec::new(); + + let kind_mappings = [ + ("function.definition", DefinitionKind::Function), + ("method.definition", DefinitionKind::Method), + ("class.definition", DefinitionKind::Class), + ("struct.definition", DefinitionKind::Struct), + ("enum.definition", DefinitionKind::Enum), + ("trait.definition", DefinitionKind::Trait), + ("interface.definition", DefinitionKind::Interface), + ("module.definition", DefinitionKind::Module), + ("object.definition", DefinitionKind::Other("object".into())), + ]; + + for (name, kind) in kind_mappings { + if let Some(idx) = query.capture_index_for_name(name) { + indices.push((idx, kind)); + } + } + + indices } } pub struct Extractor { - _language: String, + language: Language, + queries: QuerySet, } impl Extractor { - pub fn new(_language: &str) -> Result { - todo!("initialize extractor with language queries") + pub fn new(lang_name: &str) -> Result { + let registry = Registry::global(); + let entry = registry + .get(lang_name) + .with_context(|| format!("unknown language: {}", lang_name))?; + + let language = super::grammar::load_language(lang_name)?; + let queries = QuerySet::load(language.clone(), entry)?; + + Ok(Self { language, queries }) + } + + pub fn from_extension(ext: &str) -> Result { + let registry = Registry::global(); + let entry = registry + .get_by_extension(ext) + .with_context(|| format!("no language for extension: {}", ext))?; + + let language = super::grammar::load_language(&entry.name)?; + let queries = QuerySet::load(language.clone(), entry)?; + + Ok(Self { language, queries }) + } + + pub fn language(&self) -> &Language { + &self.language + } + + pub fn extract_definitions( + &self, + tree: &Tree, + source: &[u8], + path: &Path, + ) -> Vec { + let mut cursor = QueryCursor::new(); + let mut definitions = Vec::new(); + let mut matches = cursor.matches(&self.queries.definitions, tree.root_node(), source); + + while let Some(m) = matches.next() { + let mut name: Option<&str> = None; + let mut kind: Option = None; + let mut span_node: Option = None; + + for capture in m.captures { + if capture.index == self.queries.def_name_idx { + name = capture.node.utf8_text(source).ok(); + } + + for (kind_idx, kind_type) in &self.queries.def_kind_indices { + if capture.index == *kind_idx { + kind = Some(kind_type.clone()); + span_node = Some(capture.node); + break; + } + } + } + + if let (Some(name), Some(kind), Some(node)) = (name, kind, span_node) { + definitions.push(Definition { + name: name.to_string(), + kind, + span: node_to_span(&node), + file: path.to_path_buf(), + }); + } + } + + definitions + } + + pub fn extract_calls(&self, tree: &Tree, source: &[u8], path: &Path) -> Vec { + let definitions = self.extract_definitions(tree, source, path); + let mut cursor = QueryCursor::new(); + let mut calls = Vec::new(); + let mut matches = cursor.matches(&self.queries.calls, tree.root_node(), source); + + while let Some(m) = matches.next() { + let mut callee: Option<&str> = None; + let mut call_node: Option = None; + + for capture in m.captures { + if capture.index == self.queries.call_name_idx { + callee = capture.node.utf8_text(source).ok(); + call_node = Some(capture.node); + } + } + + if let (Some(callee), Some(node)) = (callee, call_node) { + let caller = find_enclosing_definition(&definitions, node.start_byte()); + + calls.push(Call { + callee: callee.to_string(), + span: node_to_span(&node), + file: path.to_path_buf(), + caller, + }); + } + } + + calls + } + + pub fn extract_imports(&self, tree: &Tree, source: &[u8], path: &Path) -> Vec { + let Some(ref import_query) = self.queries.imports else { + return Vec::new(); + }; + + let mut cursor = QueryCursor::new(); + let mut imports = Vec::new(); + let mut seen_ranges = std::collections::HashSet::new(); + let mut matches = cursor.matches(import_query, tree.root_node(), source); + + while let Some(m) = matches.next() { + let mut module_path: Option<&str> = None; + let mut alias: Option<&str> = None; + let mut import_node: Option = None; + + for capture in m.captures { + if self.queries.import_path_indices.contains(&capture.index) + && module_path.is_none() + { + module_path = capture.node.utf8_text(source).ok(); + import_node = Some(capture.node); + } + + if self.queries.import_alias_idx == Some(capture.index) { + alias = capture.node.utf8_text(source).ok(); + } + } + + if let (Some(module_path), Some(node)) = (module_path, import_node) { + let range = (node.start_byte(), node.end_byte()); + if seen_ranges.contains(&range) { + continue; + } + seen_ranges.insert(range); + + let cleaned_path = clean_import_path(module_path); + + imports.push(Import { + module_path: cleaned_path, + alias: alias.map(|s| s.to_string()), + span: node_to_span(&node), + file: path.to_path_buf(), + }); + } + } + + imports } +} - pub fn extract_definitions(&self, _tree: &Tree, _source: &[u8], _path: &Path) -> Vec { - todo!("extract definitions from parsed tree") +fn node_to_span(node: &Node) -> Span { + Span { + start_byte: node.start_byte(), + end_byte: node.end_byte(), + start_line: node.start_position().row + 1, + end_line: node.end_position().row + 1, } +} - pub fn extract_calls(&self, _tree: &Tree, _source: &[u8], _path: &Path) -> Vec { - todo!("extract call sites from parsed tree") +fn find_enclosing_definition(definitions: &[Definition], byte_offset: usize) -> Option { + definitions + .iter() + .filter(|d| d.span.start_byte <= byte_offset && byte_offset < d.span.end_byte) + .min_by_key(|d| d.span.end_byte - d.span.start_byte) + .map(|d| d.name.clone()) +} + +fn clean_import_path(path: &str) -> String { + path.trim_matches('"') + .trim_matches('\'') + .trim_matches('`') + .to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_clean_import_path() { + assert_eq!(clean_import_path("\"std::fs\""), "std::fs"); + assert_eq!(clean_import_path("'./module'"), "./module"); + assert_eq!(clean_import_path("std::path"), "std::path"); } - pub fn extract_imports(&self, _tree: &Tree, _source: &[u8], _path: &Path) -> Vec { - todo!("extract imports from parsed tree") + #[test] + fn test_span_fields() { + let span = Span { + start_byte: 10, + end_byte: 50, + start_line: 2, + end_line: 5, + }; + + assert_eq!(span.start_byte, 10); + assert_eq!(span.end_byte, 50); + assert_eq!(span.start_line, 2); + assert_eq!(span.end_line, 5); } } diff --git a/crates/code/src/graph.rs b/crates/code/src/graph.rs index 32881c6..b677b22 100644 --- a/crates/code/src/graph.rs +++ b/crates/code/src/graph.rs @@ -16,6 +16,7 @@ pub struct CallGraphNode { #[derive(Debug, Default)] pub struct CallGraph { pub nodes: HashMap, + #[allow(dead_code)] name_to_id: HashMap, } diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index 81d4039..a4cec2a 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -2,6 +2,7 @@ use std::path::{Path, PathBuf}; use anyhow::Result; +#[allow(unused_imports)] use super::index::{Call, Definition, Index}; #[derive(Debug, Clone)] @@ -22,13 +23,13 @@ impl Workspace { pub fn resolve_import( _import_path: &str, - _from_file: &PathBuf, + _from_file: &Path, _index: &Index, ) -> Result> { todo!("resolve use statement to definition") } -pub fn resolve_same_file(_callee: &str, _file: &PathBuf, _index: &Index) -> Option { +pub fn resolve_same_file(_callee: &str, _file: &Path, _index: &Index) -> Option { todo!("look for definition in same file") } @@ -36,7 +37,7 @@ pub fn resolve_by_index(_callee: &str, _index: &Index) -> Option { todo!("search index for definition by name") } -pub fn resolve_by_search(_callee: &str, _root: &PathBuf) -> Result> { +pub fn resolve_by_search(_callee: &str, _root: &Path) -> Result> { todo!("fallback to ripgrep search") } diff --git a/crates/code/tests/extraction.rs b/crates/code/tests/extraction.rs new file mode 100644 index 0000000..ba7b5d4 --- /dev/null +++ b/crates/code/tests/extraction.rs @@ -0,0 +1,633 @@ +use std::path::Path; + +use glimpse_code::extract::Extractor; +use glimpse_code::index::DefinitionKind; +use tree_sitter::Parser; + +fn parse_and_extract(lang: &str, source: &str) -> ExtractResult { + let extractor = Extractor::new(lang).expect(&format!("failed to load {}", lang)); + let mut parser = Parser::new(); + parser + .set_language(extractor.language()) + .expect("failed to set language"); + let tree = parser.parse(source, None).expect("failed to parse"); + let path = Path::new("test.src"); + + ExtractResult { + definitions: extractor.extract_definitions(&tree, source.as_bytes(), path), + calls: extractor.extract_calls(&tree, source.as_bytes(), path), + imports: extractor.extract_imports(&tree, source.as_bytes(), path), + } +} + +struct ExtractResult { + definitions: Vec, + calls: Vec, + imports: Vec, +} + +mod rust { + use super::*; + + const SAMPLE: &str = r#" +use std::fs; +use std::path::Path; +use crate::config::Config; + +fn main() { + let config = Config::load(); + helper(config); + println!("done"); +} + +fn helper(cfg: Config) { + cfg.validate(); + process(cfg); +} + +fn process(cfg: Config) { + fs::write("out.txt", cfg.data()); +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("rust", SAMPLE); + + assert_eq!(result.definitions.len(), 3); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"helper")); + assert!(names.contains(&"process")); + + for def in &result.definitions { + assert!(matches!(def.kind, DefinitionKind::Function)); + } + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("rust", SAMPLE); + + let callers: Vec<_> = result + .calls + .iter() + .filter_map(|c| c.caller.as_ref().map(|caller| (caller.as_str(), c.callee.as_str()))) + .collect(); + + assert!(callers.contains(&("main", "helper"))); + assert!(callers.contains(&("helper", "process"))); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("rust", SAMPLE); + + assert!(!result.imports.is_empty()); + let paths: Vec<_> = result.imports.iter().map(|i| &i.module_path).collect(); + assert!(paths.iter().any(|p| p.contains("std"))); + } +} + +mod python { + use super::*; + + const SAMPLE: &str = r#" +import os +from pathlib import Path +from typing import Optional + +def main(): + config = load_config() + process(config) + +def load_config(): + return Config() + +def process(config): + save(config.data) + +class Config: + def __init__(self): + self.data = {} + + def validate(self): + return True +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("python", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"load_config")); + assert!(names.contains(&"process")); + assert!(names.contains(&"Config")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("python", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"load_config")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("python", SAMPLE); + + assert!(!result.imports.is_empty()); + let paths: Vec<_> = result.imports.iter().map(|i| &i.module_path).collect(); + assert!(paths.iter().any(|p| p.contains("os") || p.contains("pathlib"))); + } +} + +mod typescript { + use super::*; + + const SAMPLE: &str = r#" +import { readFile } from 'fs'; +import path from 'path'; + +function main() { + const config = loadConfig(); + process(config); +} + +function loadConfig(): Config { + return new Config(); +} + +const process = (config: Config) => { + config.validate(); + save(config); +}; + +class Config { + validate() { + return true; + } +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("typescript", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadConfig")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("typescript", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadConfig")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("typescript", SAMPLE); + + assert!(!result.imports.is_empty()); + } +} + +mod javascript { + use super::*; + + const SAMPLE: &str = r#" +const fs = require('fs'); +import { join } from 'path'; + +function main() { + const data = loadData(); + process(data); +} + +function loadData() { + return fs.readFileSync('data.json'); +} + +const process = (data) => { + transform(data); +}; +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("javascript", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadData")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("javascript", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadData")); + assert!(callees.contains(&"process")); + } +} + +mod go { + use super::*; + + const SAMPLE: &str = r#" +package main + +import ( + "fmt" + "os" +) + +func main() { + config := loadConfig() + process(config) +} + +func loadConfig() *Config { + return &Config{} +} + +func process(cfg *Config) { + cfg.Validate() + save(cfg) +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("go", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadConfig")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("go", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadConfig")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("go", SAMPLE); + + assert!(!result.imports.is_empty()); + } +} + +mod c { + use super::*; + + const SAMPLE: &str = r#" +#include +#include "config.h" + +void process(Config* cfg); + +int main() { + Config* cfg = load_config(); + process(cfg); + return 0; +} + +Config* load_config() { + return malloc(sizeof(Config)); +} + +void process(Config* cfg) { + validate(cfg); + save(cfg); +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("c", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"load_config")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("c", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"load_config")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("c", SAMPLE); + + assert!(!result.imports.is_empty()); + } +} + +mod cpp { + use super::*; + + const SAMPLE: &str = r#" +#include +#include "config.hpp" + +class Processor { +public: + void run() { + process(); + } + + void process() { + helper(); + } +}; + +int main() { + Processor p; + p.run(); + return 0; +} + +void standalone() { + std::cout << "hello" << std::endl; +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("cpp", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"standalone")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("cpp", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"run")); + } +} + +mod java { + use super::*; + + const SAMPLE: &str = r#" +import java.util.List; +import com.example.Config; + +public class Main { + public static void main(String[] args) { + Config config = loadConfig(); + process(config); + } + + private static Config loadConfig() { + return new Config(); + } + + private static void process(Config cfg) { + cfg.validate(); + save(cfg); + } +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("java", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadConfig")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("java", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadConfig")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("java", SAMPLE); + + assert!(!result.imports.is_empty()); + } +} + +mod bash { + use super::*; + + const SAMPLE: &str = r#" +#!/bin/bash + +source ./config.sh + +main() { + load_config + process "$1" +} + +load_config() { + echo "loading" +} + +process() { + validate "$1" + save "$1" +} + +main "$@" +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("bash", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"load_config")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("bash", SAMPLE); + + // Bash treats all commands as calls + assert!(!result.calls.is_empty()); + } +} + +mod zig { + use super::*; + + const SAMPLE: &str = r#" +const std = @import("std"); + +pub fn main() void { + const config = loadConfig(); + process(config); +} + +fn loadConfig() Config { + return Config{}; +} + +fn process(cfg: Config) void { + cfg.validate(); + save(cfg); +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("zig", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadConfig")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("zig", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadConfig")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("zig", SAMPLE); + + assert!(!result.imports.is_empty()); + assert!(result.imports.iter().any(|i| i.module_path == "std")); + } +} + +mod scala { + use super::*; + + const SAMPLE: &str = r#" +import scala.collection.mutable +import com.example.Config + +object Main { + def main(args: Array[String]): Unit = { + val config = loadConfig() + process(config) + } + + def loadConfig(): Config = { + new Config() + } + + def process(cfg: Config): Unit = { + cfg.validate() + save(cfg) + } +} + +class Processor { + def run(): Unit = { + helper() + } +} + +trait Validator { + def validate(): Boolean +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("scala", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadConfig")); + assert!(names.contains(&"process")); + assert!(names.contains(&"Main")); + assert!(names.contains(&"Processor")); + assert!(names.contains(&"Validator")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("scala", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadConfig")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("scala", SAMPLE); + + assert!(!result.imports.is_empty()); + } +} From dba754026faf32ffba8e11eb09282d0da7b49270 Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 20:47:20 +0530 Subject: [PATCH 11/35] feat: implement WorkspaceDiscovery trait and RustWorkspace with resolution heuristics --- Cargo.lock | 9 + Cargo.toml | 3 + crates/code/Cargo.toml | 3 + crates/code/src/resolve.rs | 399 ++++++++++++++++++++++++++++++++++--- 4 files changed, 382 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b7653c8..4c558c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -967,9 +967,12 @@ dependencies = [ "git2", "glimpse-core", "glimpse-fetch", + "glob", "libloading", "once_cell", + "regex", "serde", + "serde_json", "tempfile", "toml", "tree-sitter", @@ -1021,6 +1024,12 @@ dependencies = [ "ratatui", ] +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "globset" version = "0.4.15" diff --git a/Cargo.toml b/Cargo.toml index 031ac9d..00e0e45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,9 @@ tree-sitter = "0.25" # Core dependencies dirs = "5.0.1" +glob = "0.3" +regex = "1.11" +serde_json = "1.0" once_cell = "1.20.2" tempfile = "3.14.0" tiktoken-rs = "0.6.0" diff --git a/crates/code/Cargo.toml b/crates/code/Cargo.toml index 26613a7..6f67e6f 100644 --- a/crates/code/Cargo.toml +++ b/crates/code/Cargo.toml @@ -10,9 +10,12 @@ bincode.workspace = true cc.workspace = true dirs.workspace = true git2.workspace = true +glob.workspace = true libloading.workspace = true once_cell.workspace = true +regex.workspace = true serde.workspace = true +serde_json.workspace = true tempfile.workspace = true toml.workspace = true tree-sitter.workspace = true diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index a4cec2a..5cad58a 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -1,58 +1,393 @@ +use std::fs; use std::path::{Path, PathBuf}; +use std::process::Command; -use anyhow::Result; +use anyhow::{Context, Result}; +use serde::Deserialize; -#[allow(unused_imports)] -use super::index::{Call, Definition, Index}; +use super::index::{Definition, Index}; + +pub trait WorkspaceDiscovery: Send + Sync { + fn discover(root: &Path) -> Result>> + where + Self: Sized; + + fn resolve_module(&self, module_path: &str) -> Option; + + fn root(&self) -> &Path; +} #[derive(Debug, Clone)] -pub struct Workspace { - pub root: PathBuf, - pub members: Vec, +pub struct RustWorkspace { + root: PathBuf, + members: Vec, +} + +#[derive(Debug, Clone)] +pub struct CrateMember { + pub name: String, + pub path: PathBuf, +} + +#[derive(Deserialize)] +struct CargoToml { + package: Option, + workspace: Option, +} + +#[derive(Deserialize)] +struct CargoPackage { + name: String, } -impl Workspace { - pub fn discover(_root: &Path) -> Result { - todo!("parse Cargo.toml and discover workspace members") +#[derive(Deserialize)] +struct CargoWorkspace { + members: Option>, +} + +impl WorkspaceDiscovery for RustWorkspace { + fn discover(root: &Path) -> Result>> { + let cargo_path = root.join("Cargo.toml"); + if !cargo_path.exists() { + return Ok(None); + } + + let content = fs::read_to_string(&cargo_path) + .with_context(|| format!("failed to read {}", cargo_path.display()))?; + + let cargo: CargoToml = + toml::from_str(&content).with_context(|| "failed to parse Cargo.toml")?; + + let mut members = Vec::new(); + + if let Some(ws) = cargo.workspace { + if let Some(member_globs) = ws.members { + for pattern in member_globs { + let expanded = expand_glob(root, &pattern)?; + for member_path in expanded { + if let Some(member) = parse_crate_member(&member_path)? { + members.push(member); + } + } + } + } + } + + if let Some(pkg) = cargo.package { + members.push(CrateMember { + name: pkg.name, + path: root.to_path_buf(), + }); + } + + if members.is_empty() { + return Ok(None); + } + + Ok(Some(Box::new(RustWorkspace { + root: root.to_path_buf(), + members, + }))) } - pub fn resolve_crate(&self, _crate_name: &str) -> Option { - todo!("resolve crate name to path within workspace") + fn resolve_module(&self, module_path: &str) -> Option { + let parts: Vec<&str> = module_path.split("::").collect(); + if parts.is_empty() { + return None; + } + + let crate_name = parts[0]; + + if crate_name == "crate" || crate_name == "self" || crate_name == "super" { + return None; + } + + let member = self.members.iter().find(|m| m.name == crate_name)?; + + let mut path = member.path.join("src"); + for part in &parts[1..] { + path = path.join(part); + } + + if path.with_extension("rs").exists() { + return Some(path.with_extension("rs")); + } + + let mod_path = path.join("mod.rs"); + if mod_path.exists() { + return Some(mod_path); + } + + None + } + + fn root(&self) -> &Path { + &self.root } } -pub fn resolve_import( - _import_path: &str, - _from_file: &Path, - _index: &Index, -) -> Result> { - todo!("resolve use statement to definition") +impl RustWorkspace { + pub fn members(&self) -> &[CrateMember] { + &self.members + } + + pub fn resolve_crate(&self, crate_name: &str) -> Option<&PathBuf> { + self.members + .iter() + .find(|m| m.name == crate_name) + .map(|m| &m.path) + } } -pub fn resolve_same_file(_callee: &str, _file: &Path, _index: &Index) -> Option { - todo!("look for definition in same file") +fn expand_glob(root: &Path, pattern: &str) -> Result> { + let full_pattern = root.join(pattern); + let pattern_str = full_pattern.to_string_lossy(); + + let mut results = Vec::new(); + + if pattern.contains('*') { + for entry in glob::glob(&pattern_str).with_context(|| "invalid glob pattern")? { + if let Ok(path) = entry { + if path.is_dir() && path.join("Cargo.toml").exists() { + results.push(path); + } + } + } + } else { + let path = root.join(pattern); + if path.is_dir() && path.join("Cargo.toml").exists() { + results.push(path); + } + } + + Ok(results) } -pub fn resolve_by_index(_callee: &str, _index: &Index) -> Option { - todo!("search index for definition by name") +fn parse_crate_member(path: &Path) -> Result> { + let cargo_path = path.join("Cargo.toml"); + if !cargo_path.exists() { + return Ok(None); + } + + let content = fs::read_to_string(&cargo_path) + .with_context(|| format!("failed to read {}", cargo_path.display()))?; + + let cargo: CargoToml = toml::from_str(&content).with_context(|| "failed to parse Cargo.toml")?; + + let name = cargo + .package + .map(|p| p.name) + .unwrap_or_else(|| path.file_name().unwrap().to_string_lossy().to_string()); + + Ok(Some(CrateMember { + name, + path: path.to_path_buf(), + })) } -pub fn resolve_by_search(_callee: &str, _root: &Path) -> Result> { - todo!("fallback to ripgrep search") +pub fn resolve_same_file(callee: &str, file: &Path, index: &Index) -> Option { + let record = index.get(file)?; + record + .definitions + .iter() + .find(|d| d.name == callee) + .cloned() } -pub struct Resolver { - _index: Index, - _workspace: Option, - _root: PathBuf, +pub fn resolve_by_index(callee: &str, index: &Index) -> Option { + index.definitions().find(|d| d.name == callee).cloned() } -impl Resolver { - pub fn new(_index: Index, _workspace: Option, _root: PathBuf) -> Self { - todo!("initialize resolver") +pub fn resolve_by_search(callee: &str, root: &Path) -> Result> { + let output = Command::new("rg") + .args([ + "--json", + "-e", + &format!(r"fn\s+{}\s*[\(<]", regex::escape(callee)), + "--type", + "rust", + root.to_string_lossy().as_ref(), + ]) + .output() + .context("failed to run ripgrep")?; + + if !output.status.success() { + return Ok(None); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + for line in stdout.lines() { + if let Ok(msg) = serde_json::from_str::(line) { + if let RgMessage::Match { data } = msg { + return Ok(Some(Definition { + name: callee.to_string(), + kind: super::index::DefinitionKind::Function, + span: super::index::Span { + start_byte: 0, + end_byte: 0, + start_line: data.line_number.unwrap_or(1) as usize, + end_line: data.line_number.unwrap_or(1) as usize, + }, + file: PathBuf::from(&data.path.text), + })); + } + } + } + + Ok(None) +} + +#[derive(Deserialize)] +#[serde(tag = "type", rename_all = "lowercase")] +enum RgMessage { + Match { data: RgMatchData }, + #[serde(other)] + Other, +} + +#[derive(Deserialize)] +struct RgMatchData { + path: RgText, + line_number: Option, +} + +#[derive(Deserialize)] +struct RgText { + text: String, +} + +pub struct Resolver<'a> { + index: &'a Index, + workspace: Option>, + root: PathBuf, +} + +impl<'a> Resolver<'a> { + pub fn new( + index: &'a Index, + workspace: Option>, + root: PathBuf, + ) -> Self { + Self { + index, + workspace, + root, + } + } + + pub fn resolve(&self, callee: &str, from_file: &Path) -> Result> { + if let Some(def) = resolve_same_file(callee, from_file, self.index) { + return Ok(Some(def)); + } + + if let Some(ref ws) = self.workspace { + if let Some(module_path) = ws.resolve_module(callee) { + if let Some(record) = self.index.get(&module_path) { + if let Some(def) = record.definitions.first() { + return Ok(Some(def.clone())); + } + } + } + } + + if let Some(def) = resolve_by_index(callee, self.index) { + return Ok(Some(def)); + } + + resolve_by_search(callee, &self.root) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + fn setup_rust_workspace() -> TempDir { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("Cargo.toml"), + r#" +[workspace] +members = ["crates/*"] + +[package] +name = "root-crate" +version = "0.1.0" +"#, + ) + .unwrap(); + + let crate_a = dir.path().join("crates/crate-a"); + fs::create_dir_all(crate_a.join("src")).unwrap(); + fs::write( + crate_a.join("Cargo.toml"), + r#" +[package] +name = "crate-a" +version = "0.1.0" +"#, + ) + .unwrap(); + fs::write(crate_a.join("src/lib.rs"), "pub fn foo() {}").unwrap(); + + let crate_b = dir.path().join("crates/crate-b"); + fs::create_dir_all(crate_b.join("src")).unwrap(); + fs::write( + crate_b.join("Cargo.toml"), + r#" +[package] +name = "crate-b" +version = "0.1.0" +"#, + ) + .unwrap(); + fs::write(crate_b.join("src/lib.rs"), "pub fn bar() {}").unwrap(); + + dir + } + + #[test] + fn test_rust_workspace_discovery() { + let dir = setup_rust_workspace(); + let ws = RustWorkspace::discover(dir.path()).unwrap().unwrap(); + + assert_eq!(ws.root(), dir.path()); + assert_eq!(ws.members().len(), 3); + + let names: Vec<_> = ws.members().iter().map(|m| m.name.as_str()).collect(); + assert!(names.contains(&"root-crate")); + assert!(names.contains(&"crate-a")); + assert!(names.contains(&"crate-b")); + } + + #[test] + fn test_rust_workspace_resolve_crate() { + let dir = setup_rust_workspace(); + let ws = RustWorkspace::discover(dir.path()).unwrap().unwrap(); + + let path = ws.resolve_crate("crate-a").unwrap(); + assert!(path.ends_with("crates/crate-a")); + + assert!(ws.resolve_crate("nonexistent").is_none()); + } + + #[test] + fn test_rust_workspace_resolve_module() { + let dir = setup_rust_workspace(); + let ws = RustWorkspace::discover(dir.path()).unwrap().unwrap(); + + assert!(ws.resolve_module("crate").is_none()); + assert!(ws.resolve_module("self").is_none()); + assert!(ws.resolve_module("super").is_none()); } - pub fn resolve(&self, _call: &Call) -> Result> { - todo!("resolve call to definition using all strategies") + #[test] + fn test_no_cargo_toml() { + let dir = TempDir::new().unwrap(); + let ws = RustWorkspace::discover(dir.path()).unwrap(); + assert!(ws.is_none()); } } From df1fe3742938273bf21f3411071d9b226dec7982 Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 20:53:06 +0530 Subject: [PATCH 12/35] feat: add GoWorkspace and TsWorkspace with comprehensive tests --- crates/code/src/resolve.rs | 650 ++++++++++++++++++++++++++++++++++++- 1 file changed, 649 insertions(+), 1 deletion(-) diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index 5cad58a..f4c6a57 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -103,8 +103,21 @@ impl WorkspaceDiscovery for RustWorkspace { } let member = self.members.iter().find(|m| m.name == crate_name)?; + let src_dir = member.path.join("src"); - let mut path = member.path.join("src"); + if parts.len() == 1 { + let lib_rs = src_dir.join("lib.rs"); + if lib_rs.exists() { + return Some(lib_rs); + } + let main_rs = src_dir.join("main.rs"); + if main_rs.exists() { + return Some(main_rs); + } + return None; + } + + let mut path = src_dir; for part in &parts[1..] { path = path.join(part); } @@ -139,6 +152,275 @@ impl RustWorkspace { } } +// ============================================================================= +// Go Workspace +// ============================================================================= + +#[derive(Debug, Clone)] +pub struct GoWorkspace { + root: PathBuf, + module_path: String, +} + +#[derive(Deserialize)] +struct GoMod { + #[serde(rename = "Module")] + module: GoModule, +} + +#[derive(Deserialize)] +struct GoModule { + #[serde(rename = "Path")] + path: String, +} + +impl WorkspaceDiscovery for GoWorkspace { + fn discover(root: &Path) -> Result>> { + let go_mod_path = root.join("go.mod"); + if !go_mod_path.exists() { + return Ok(None); + } + + let output = Command::new("go") + .args(["mod", "edit", "-json"]) + .current_dir(root) + .output() + .context("failed to run go mod edit -json")?; + + if !output.status.success() { + let content = fs::read_to_string(&go_mod_path)?; + if let Some(module_path) = parse_go_mod_fallback(&content) { + return Ok(Some(Box::new(GoWorkspace { + root: root.to_path_buf(), + module_path, + }))); + } + return Ok(None); + } + + let go_mod: GoMod = serde_json::from_slice(&output.stdout) + .context("failed to parse go mod output")?; + + Ok(Some(Box::new(GoWorkspace { + root: root.to_path_buf(), + module_path: go_mod.module.path, + }))) + } + + fn resolve_module(&self, module_path: &str) -> Option { + if !module_path.starts_with(&self.module_path) { + return None; + } + + let relative = module_path + .strip_prefix(&self.module_path)? + .trim_start_matches('/'); + + if relative.is_empty() { + let main_go = self.root.join("main.go"); + if main_go.exists() { + return Some(main_go); + } + for entry in fs::read_dir(&self.root).ok()? { + let entry = entry.ok()?; + let path = entry.path(); + if path.extension().map(|e| e == "go").unwrap_or(false) { + return Some(path); + } + } + return None; + } + + let pkg_dir = self.root.join(relative); + if pkg_dir.is_dir() { + for entry in fs::read_dir(&pkg_dir).ok()? { + let entry = entry.ok()?; + let path = entry.path(); + if path.extension().map(|e| e == "go").unwrap_or(false) + && !path + .file_name() + .map(|n| n.to_string_lossy().ends_with("_test.go")) + .unwrap_or(false) + { + return Some(path); + } + } + } + + None + } + + fn root(&self) -> &Path { + &self.root + } +} + +impl GoWorkspace { + pub fn module_path(&self) -> &str { + &self.module_path + } +} + +fn parse_go_mod_fallback(content: &str) -> Option { + for line in content.lines() { + let line = line.trim(); + if line.starts_with("module ") { + return Some(line.strip_prefix("module ")?.trim().to_string()); + } + } + None +} + +// ============================================================================= +// TypeScript/JavaScript Workspace +// ============================================================================= + +#[derive(Debug, Clone)] +pub struct TsWorkspace { + root: PathBuf, + name: String, + paths: Vec<(String, PathBuf)>, +} + +#[derive(Deserialize)] +struct PackageJson { + name: Option, + #[allow(dead_code)] + workspaces: Option>, +} + +#[derive(Deserialize)] +struct TsConfig { + #[serde(rename = "compilerOptions")] + compiler_options: Option, +} + +#[derive(Deserialize)] +struct TsCompilerOptions { + paths: Option>>, + #[serde(rename = "baseUrl")] + base_url: Option, +} + +impl WorkspaceDiscovery for TsWorkspace { + fn discover(root: &Path) -> Result>> { + let pkg_json_path = root.join("package.json"); + if !pkg_json_path.exists() { + return Ok(None); + } + + let content = fs::read_to_string(&pkg_json_path) + .with_context(|| format!("failed to read {}", pkg_json_path.display()))?; + + let pkg: PackageJson = + serde_json::from_str(&content).with_context(|| "failed to parse package.json")?; + + let name = pkg.name.unwrap_or_else(|| { + root.file_name() + .unwrap_or_default() + .to_string_lossy() + .to_string() + }); + + let mut paths = Vec::new(); + + let tsconfig_path = root.join("tsconfig.json"); + if tsconfig_path.exists() { + if let Ok(ts_content) = fs::read_to_string(&tsconfig_path) { + if let Ok(tsconfig) = serde_json::from_str::(&ts_content) { + if let Some(opts) = tsconfig.compiler_options { + let base = opts + .base_url + .map(|b| root.join(b)) + .unwrap_or_else(|| root.to_path_buf()); + + if let Some(path_map) = opts.paths { + for (alias, targets) in path_map { + if let Some(target) = targets.first() { + let clean_alias = alias.trim_end_matches("/*"); + let clean_target = target.trim_end_matches("/*"); + paths.push((clean_alias.to_string(), base.join(clean_target))); + } + } + } + } + } + } + } + + Ok(Some(Box::new(TsWorkspace { root: root.to_path_buf(), name, paths }))) + } + + fn resolve_module(&self, module_path: &str) -> Option { + if module_path.starts_with('.') { + return None; + } + + for (alias, target_dir) in &self.paths { + if module_path.starts_with(alias) { + let remainder = module_path.strip_prefix(alias)?.trim_start_matches('/'); + let base = if remainder.is_empty() { + target_dir.clone() + } else { + target_dir.join(remainder) + }; + + for ext in &["ts", "tsx", "js", "jsx"] { + let with_ext = base.with_extension(ext); + if with_ext.exists() { + return Some(with_ext); + } + } + + let index_path = base.join("index"); + for ext in &["ts", "tsx", "js", "jsx"] { + let with_ext = index_path.with_extension(ext); + if with_ext.exists() { + return Some(with_ext); + } + } + } + } + + let node_modules = self.root.join("node_modules").join(module_path); + if node_modules.exists() { + let pkg_json = node_modules.join("package.json"); + if pkg_json.exists() { + if let Ok(content) = fs::read_to_string(&pkg_json) { + if let Ok(pkg) = serde_json::from_str::(&content) { + if let Some(main) = pkg.get("main").and_then(|m| m.as_str()) { + let main_path = node_modules.join(main); + if main_path.exists() { + return Some(main_path); + } + } + } + } + } + } + + None + } + + fn root(&self) -> &Path { + &self.root + } +} + +impl TsWorkspace { + pub fn name(&self) -> &str { + &self.name + } + + pub fn paths(&self) -> &[(String, PathBuf)] { + &self.paths + } +} + +// ============================================================================= +// Helpers +// ============================================================================= + fn expand_glob(root: &Path, pattern: &str) -> Result> { let full_pattern = root.join(pattern); let pattern_str = full_pattern.to_string_lossy(); @@ -390,4 +672,370 @@ version = "0.1.0" let ws = RustWorkspace::discover(dir.path()).unwrap(); assert!(ws.is_none()); } + + #[test] + fn test_resolve_module_finds_file() { + let dir = setup_rust_workspace(); + let ws = RustWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("crate-a"); + assert!(resolved.is_some()); + assert!(resolved.unwrap().ends_with("src/lib.rs")); + } + + #[test] + fn test_resolve_same_file() { + use super::super::index::{Definition, DefinitionKind, FileRecord, Span}; + + let mut index = Index::new(); + let file = PathBuf::from("src/main.rs"); + + index.update(FileRecord { + path: file.clone(), + mtime: 0, + size: 0, + definitions: vec![ + Definition { + name: "foo".to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 3, + }, + file: file.clone(), + }, + Definition { + name: "bar".to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 20, + end_byte: 30, + start_line: 5, + end_line: 7, + }, + file: file.clone(), + }, + ], + calls: vec![], + imports: vec![], + }); + + let found = resolve_same_file("foo", &file, &index); + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "foo"); + + let found = resolve_same_file("bar", &file, &index); + assert!(found.is_some()); + + let not_found = resolve_same_file("baz", &file, &index); + assert!(not_found.is_none()); + + let wrong_file = resolve_same_file("foo", Path::new("src/other.rs"), &index); + assert!(wrong_file.is_none()); + } + + #[test] + fn test_resolve_by_index() { + use super::super::index::{Definition, DefinitionKind, FileRecord, Span}; + + let mut index = Index::new(); + + index.update(FileRecord { + path: PathBuf::from("src/a.rs"), + mtime: 0, + size: 0, + definitions: vec![Definition { + name: "alpha".to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 3, + }, + file: PathBuf::from("src/a.rs"), + }], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: PathBuf::from("src/b.rs"), + mtime: 0, + size: 0, + definitions: vec![Definition { + name: "beta".to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 3, + }, + file: PathBuf::from("src/b.rs"), + }], + calls: vec![], + imports: vec![], + }); + + let found = resolve_by_index("alpha", &index); + assert!(found.is_some()); + assert_eq!(found.as_ref().unwrap().file, PathBuf::from("src/a.rs")); + + let found = resolve_by_index("beta", &index); + assert!(found.is_some()); + assert_eq!(found.as_ref().unwrap().file, PathBuf::from("src/b.rs")); + + let not_found = resolve_by_index("gamma", &index); + assert!(not_found.is_none()); + } + + #[test] + fn test_resolver_prefers_same_file() { + use super::super::index::{Definition, DefinitionKind, FileRecord, Span}; + + let mut index = Index::new(); + let file_a = PathBuf::from("src/a.rs"); + let file_b = PathBuf::from("src/b.rs"); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![Definition { + name: "foo".to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 3, + }, + file: file_a.clone(), + }], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: file_b.clone(), + mtime: 0, + size: 0, + definitions: vec![Definition { + name: "foo".to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 10, + start_line: 10, + end_line: 12, + }, + file: file_b.clone(), + }], + calls: vec![], + imports: vec![], + }); + + let resolver = Resolver::new(&index, None, PathBuf::from(".")); + + let found = resolver.resolve("foo", &file_a).unwrap(); + assert!(found.is_some()); + assert_eq!(found.unwrap().file, file_a); + + let found = resolver.resolve("foo", &file_b).unwrap(); + assert!(found.is_some()); + assert_eq!(found.unwrap().file, file_b); + } + + // ========================================================================= + // Go Workspace Tests + // ========================================================================= + + fn setup_go_workspace() -> TempDir { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("go.mod"), + "module github.com/example/myproject\n\ngo 1.21\n", + ) + .unwrap(); + + fs::write(dir.path().join("main.go"), "package main\n\nfunc main() {}\n").unwrap(); + + let pkg_dir = dir.path().join("pkg/utils"); + fs::create_dir_all(&pkg_dir).unwrap(); + fs::write(pkg_dir.join("helpers.go"), "package utils\n\nfunc Helper() {}\n").unwrap(); + + let internal_dir = dir.path().join("internal/core"); + fs::create_dir_all(&internal_dir).unwrap(); + fs::write(internal_dir.join("core.go"), "package core\n\nfunc Process() {}\n").unwrap(); + + dir + } + + #[test] + fn test_go_workspace_discovery() { + let dir = setup_go_workspace(); + let ws = GoWorkspace::discover(dir.path()).unwrap(); + + assert!(ws.is_some()); + let ws = ws.unwrap(); + assert_eq!(ws.root(), dir.path()); + assert_eq!(ws.module_path(), "github.com/example/myproject"); + } + + #[test] + fn test_go_workspace_no_go_mod() { + let dir = TempDir::new().unwrap(); + let ws = GoWorkspace::discover(dir.path()).unwrap(); + assert!(ws.is_none()); + } + + #[test] + fn test_go_workspace_resolve_root() { + let dir = setup_go_workspace(); + let ws = GoWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("github.com/example/myproject"); + assert!(resolved.is_some()); + assert!(resolved.unwrap().ends_with("main.go")); + } + + #[test] + fn test_go_workspace_resolve_package() { + let dir = setup_go_workspace(); + let ws = GoWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("github.com/example/myproject/pkg/utils"); + assert!(resolved.is_some()); + assert!(resolved.unwrap().ends_with("helpers.go")); + } + + #[test] + fn test_go_workspace_resolve_external() { + let dir = setup_go_workspace(); + let ws = GoWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("github.com/other/package"); + assert!(resolved.is_none()); + } + + #[test] + fn test_go_mod_fallback_parsing() { + let content = "module github.com/foo/bar\n\ngo 1.21\n"; + let module = parse_go_mod_fallback(content); + assert_eq!(module, Some("github.com/foo/bar".to_string())); + + let content = "// comment\nmodule example.com/test \n"; + let module = parse_go_mod_fallback(content); + assert_eq!(module, Some("example.com/test".to_string())); + } + + // ========================================================================= + // TypeScript Workspace Tests + // ========================================================================= + + fn setup_ts_workspace() -> TempDir { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("package.json"), + r#"{"name": "my-app", "version": "1.0.0"}"#, + ) + .unwrap(); + + fs::write( + dir.path().join("tsconfig.json"), + r#"{ + "compilerOptions": { + "baseUrl": ".", + "paths": { + "@/*": ["src/*"], + "@utils/*": ["src/utils/*"] + } + } + }"#, + ) + .unwrap(); + + let src_dir = dir.path().join("src"); + fs::create_dir_all(&src_dir).unwrap(); + fs::write(src_dir.join("index.ts"), "export const main = () => {};\n").unwrap(); + + let utils_dir = src_dir.join("utils"); + fs::create_dir_all(&utils_dir).unwrap(); + fs::write(utils_dir.join("helpers.ts"), "export const helper = () => {};\n").unwrap(); + + let components_dir = src_dir.join("components"); + fs::create_dir_all(&components_dir).unwrap(); + fs::write(components_dir.join("Button.tsx"), "export const Button = () => null;\n") + .unwrap(); + fs::write(components_dir.join("index.ts"), "export * from './Button';\n").unwrap(); + + dir + } + + #[test] + fn test_ts_workspace_discovery() { + let dir = setup_ts_workspace(); + let ws = TsWorkspace::discover(dir.path()).unwrap(); + + assert!(ws.is_some()); + let ws = ws.unwrap(); + assert_eq!(ws.root(), dir.path()); + assert_eq!(ws.name(), "my-app"); + assert!(!ws.paths().is_empty()); + } + + #[test] + fn test_ts_workspace_no_package_json() { + let dir = TempDir::new().unwrap(); + let ws = TsWorkspace::discover(dir.path()).unwrap(); + assert!(ws.is_none()); + } + + #[test] + fn test_ts_workspace_resolve_alias() { + let dir = setup_ts_workspace(); + let ws = TsWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("@/index"); + assert!(resolved.is_some()); + assert!(resolved.unwrap().ends_with("src/index.ts")); + } + + #[test] + fn test_ts_workspace_resolve_utils_alias() { + let dir = setup_ts_workspace(); + let ws = TsWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("@utils/helpers"); + assert!(resolved.is_some()); + assert!(resolved.unwrap().ends_with("src/utils/helpers.ts")); + } + + #[test] + fn test_ts_workspace_resolve_index_file() { + let dir = setup_ts_workspace(); + let ws = TsWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("@/components"); + assert!(resolved.is_some()); + let path = resolved.unwrap(); + assert!(path.ends_with("components/index.ts")); + } + + #[test] + fn test_ts_workspace_relative_ignored() { + let dir = setup_ts_workspace(); + let ws = TsWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("./local"); + assert!(resolved.is_none()); + + let resolved = ws.resolve_module("../parent"); + assert!(resolved.is_none()); + } } From 3df7f851f79ac325af3adccb23b6d7f13201345e Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 21:03:23 +0530 Subject: [PATCH 13/35] feat: add PythonWorkspace and import tracing to Resolver --- crates/code/src/resolve.rs | 475 +++++++++++++++++++++++++++++++++++-- 1 file changed, 459 insertions(+), 16 deletions(-) diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index f4c6a57..774489a 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -152,9 +152,6 @@ impl RustWorkspace { } } -// ============================================================================= -// Go Workspace -// ============================================================================= #[derive(Debug, Clone)] pub struct GoWorkspace { @@ -271,9 +268,6 @@ fn parse_go_mod_fallback(content: &str) -> Option { None } -// ============================================================================= -// TypeScript/JavaScript Workspace -// ============================================================================= #[derive(Debug, Clone)] pub struct TsWorkspace { @@ -417,9 +411,151 @@ impl TsWorkspace { } } -// ============================================================================= -// Helpers -// ============================================================================= + +#[derive(Debug, Clone)] +pub struct PythonWorkspace { + root: PathBuf, + package_name: String, + src_dir: PathBuf, +} + +#[derive(Deserialize)] +struct PyProjectToml { + project: Option, + tool: Option, +} + +#[derive(Deserialize)] +struct PyProject { + name: Option, +} + +#[derive(Deserialize)] +struct PyToolSection { + poetry: Option, + setuptools: Option, +} + +#[derive(Deserialize)] +struct PyPoetry { + name: Option, +} + +#[derive(Deserialize)] +struct PySetuptools { + #[serde(rename = "package-dir")] + package_dir: Option>, +} + +impl WorkspaceDiscovery for PythonWorkspace { + fn discover(root: &Path) -> Result>> { + let pyproject_path = root.join("pyproject.toml"); + + let (package_name, src_dir) = if pyproject_path.exists() { + let content = fs::read_to_string(&pyproject_path) + .with_context(|| format!("failed to read {}", pyproject_path.display()))?; + + let pyproject: PyProjectToml = + toml::from_str(&content).with_context(|| "failed to parse pyproject.toml")?; + + let name = pyproject + .project + .and_then(|p| p.name) + .or_else(|| pyproject.tool.as_ref().and_then(|t| t.poetry.as_ref()).and_then(|p| p.name.clone())) + .unwrap_or_else(|| { + root.file_name() + .unwrap_or_default() + .to_string_lossy() + .to_string() + }); + + let src = pyproject + .tool + .and_then(|t| t.setuptools) + .and_then(|s| s.package_dir) + .and_then(|dirs| dirs.get("").cloned()) + .map(|dir| root.join(dir)) + .unwrap_or_else(|| { + let src_layout = root.join("src"); + if src_layout.exists() { + src_layout + } else { + root.to_path_buf() + } + }); + + (name, src) + } else { + let setup_py = root.join("setup.py"); + if !setup_py.exists() { + return Ok(None); + } + + let name = root + .file_name() + .unwrap_or_default() + .to_string_lossy() + .to_string(); + + let src = if root.join("src").exists() { + root.join("src") + } else { + root.to_path_buf() + }; + + (name, src) + }; + + Ok(Some(Box::new(PythonWorkspace { + root: root.to_path_buf(), + package_name, + src_dir, + }))) + } + + fn resolve_module(&self, module_path: &str) -> Option { + if module_path.starts_with('.') { + return None; + } + + let parts: Vec<&str> = module_path.split('.').collect(); + if parts.is_empty() { + return None; + } + + let mut path = self.src_dir.clone(); + for part in &parts { + path = path.join(part); + } + + let py_file = path.with_extension("py"); + if py_file.exists() { + return Some(py_file); + } + + let init_file = path.join("__init__.py"); + if init_file.exists() { + return Some(init_file); + } + + None + } + + fn root(&self) -> &Path { + &self.root + } +} + +impl PythonWorkspace { + pub fn package_name(&self) -> &str { + &self.package_name + } + + pub fn src_dir(&self) -> &Path { + &self.src_dir + } +} + fn expand_glob(root: &Path, pattern: &str) -> Result> { let full_pattern = root.join(pattern); @@ -562,6 +698,10 @@ impl<'a> Resolver<'a> { return Ok(Some(def)); } + if let Some(def) = self.resolve_via_imports(callee, from_file) { + return Ok(Some(def)); + } + if let Some(ref ws) = self.workspace { if let Some(module_path) = ws.resolve_module(callee) { if let Some(record) = self.index.get(&module_path) { @@ -578,6 +718,69 @@ impl<'a> Resolver<'a> { resolve_by_search(callee, &self.root) } + + fn resolve_via_imports(&self, callee: &str, from_file: &Path) -> Option { + let record = self.index.get(from_file)?; + + for import in &record.imports { + let visible_name = import + .alias + .as_deref() + .or_else(|| import.module_path.rsplit("::").next()) + .or_else(|| import.module_path.rsplit('.').next())?; + + if visible_name != callee { + continue; + } + + let original_name = import + .module_path + .rsplit("::") + .next() + .or_else(|| import.module_path.rsplit('.').next()) + .unwrap_or(callee); + + if let Some(ref ws) = self.workspace { + if let Some(resolved_path) = ws.resolve_module(&import.module_path) { + if let Some(target_record) = self.index.get(&resolved_path) { + if let Some(def) = target_record + .definitions + .iter() + .find(|d| d.name == original_name) + { + return Some(def.clone()); + } + } + } + } + + let module_path_normalized = import.module_path.replace('.', "::"); + let module_parts: Vec<&str> = module_path_normalized + .split("::") + .filter(|p| !p.is_empty() && *p != "crate" && *p != "self" && *p != "super") + .collect(); + + for def in self.index.definitions() { + if def.name != original_name { + continue; + } + + if module_parts.len() <= 1 { + return Some(def.clone()); + } + + let file_str = def.file.to_string_lossy(); + let path_parts = &module_parts[..module_parts.len() - 1]; + let matches = path_parts.iter().all(|part| file_str.contains(part)); + + if matches { + return Some(def.clone()); + } + } + } + + None + } } #[cfg(test)] @@ -849,9 +1052,114 @@ version = "0.1.0" assert_eq!(found.unwrap().file, file_b); } - // ========================================================================= - // Go Workspace Tests - // ========================================================================= + #[test] + fn test_resolver_import_tracing() { + use super::super::index::{Definition, DefinitionKind, FileRecord, Import, Span}; + + let mut index = Index::new(); + let main_file = PathBuf::from("src/main.rs"); + let utils_file = PathBuf::from("src/utils.rs"); + + index.update(FileRecord { + path: utils_file.clone(), + mtime: 0, + size: 0, + definitions: vec![Definition { + name: "helper".to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 50, + start_line: 1, + end_line: 5, + }, + file: utils_file.clone(), + }], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: main_file.clone(), + mtime: 0, + size: 0, + definitions: vec![], + calls: vec![], + imports: vec![Import { + module_path: "crate::utils::helper".to_string(), + alias: None, + span: Span { + start_byte: 0, + end_byte: 25, + start_line: 1, + end_line: 1, + }, + file: main_file.clone(), + }], + }); + + let resolver = Resolver::new(&index, None, PathBuf::from(".")); + let found = resolver.resolve("helper", &main_file).unwrap(); + + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "helper"); + } + + #[test] + fn test_resolver_import_tracing_with_alias() { + use super::super::index::{Definition, DefinitionKind, FileRecord, Import, Span}; + + let mut index = Index::new(); + let main_file = PathBuf::from("src/main.rs"); + let utils_file = PathBuf::from("src/utils.rs"); + + index.update(FileRecord { + path: utils_file.clone(), + mtime: 0, + size: 0, + definitions: vec![Definition { + name: "long_function_name".to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 50, + start_line: 1, + end_line: 5, + }, + file: utils_file.clone(), + }], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: main_file.clone(), + mtime: 0, + size: 0, + definitions: vec![], + calls: vec![], + imports: vec![Import { + module_path: "crate::utils::long_function_name".to_string(), + alias: Some("short".to_string()), + span: Span { + start_byte: 0, + end_byte: 40, + start_line: 1, + end_line: 1, + }, + file: main_file.clone(), + }], + }); + + let resolver = Resolver::new(&index, None, PathBuf::from(".")); + + let found = resolver.resolve("short", &main_file).unwrap(); + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "long_function_name"); + + let not_found = resolver.resolve("long_function_name", &main_file).unwrap(); + assert!(not_found.is_none() || not_found.unwrap().file != main_file); + } fn setup_go_workspace() -> TempDir { let dir = TempDir::new().unwrap(); @@ -933,10 +1241,6 @@ version = "0.1.0" assert_eq!(module, Some("example.com/test".to_string())); } - // ========================================================================= - // TypeScript Workspace Tests - // ========================================================================= - fn setup_ts_workspace() -> TempDir { let dir = TempDir::new().unwrap(); @@ -1038,4 +1342,143 @@ version = "0.1.0" let resolved = ws.resolve_module("../parent"); assert!(resolved.is_none()); } + + fn setup_python_workspace_src_layout() -> TempDir { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("pyproject.toml"), + r#" +[project] +name = "mypackage" +version = "0.1.0" + +[tool.setuptools] +package-dir = {"" = "src"} +"#, + ) + .unwrap(); + + let pkg_dir = dir.path().join("src/mypackage"); + fs::create_dir_all(&pkg_dir).unwrap(); + fs::write(pkg_dir.join("__init__.py"), "").unwrap(); + fs::write(pkg_dir.join("main.py"), "def main(): pass\n").unwrap(); + + let utils_dir = pkg_dir.join("utils"); + fs::create_dir_all(&utils_dir).unwrap(); + fs::write(utils_dir.join("__init__.py"), "").unwrap(); + fs::write(utils_dir.join("helpers.py"), "def helper(): pass\n").unwrap(); + + dir + } + + fn setup_python_workspace_flat_layout() -> TempDir { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("pyproject.toml"), + r#" +[project] +name = "flatpkg" +version = "0.1.0" +"#, + ) + .unwrap(); + + let pkg_dir = dir.path().join("flatpkg"); + fs::create_dir_all(&pkg_dir).unwrap(); + fs::write(pkg_dir.join("__init__.py"), "").unwrap(); + fs::write(pkg_dir.join("core.py"), "def process(): pass\n").unwrap(); + + dir + } + + #[test] + fn test_python_workspace_discovery_src_layout() { + let dir = setup_python_workspace_src_layout(); + let ws = PythonWorkspace::discover(dir.path()).unwrap(); + + assert!(ws.is_some()); + let ws = ws.unwrap(); + assert_eq!(ws.root(), dir.path()); + assert_eq!(ws.package_name(), "mypackage"); + assert!(ws.src_dir().ends_with("src")); + } + + #[test] + fn test_python_workspace_discovery_flat_layout() { + let dir = setup_python_workspace_flat_layout(); + let ws = PythonWorkspace::discover(dir.path()).unwrap(); + + assert!(ws.is_some()); + let ws = ws.unwrap(); + assert_eq!(ws.package_name(), "flatpkg"); + } + + #[test] + fn test_python_workspace_no_pyproject() { + let dir = TempDir::new().unwrap(); + let ws = PythonWorkspace::discover(dir.path()).unwrap(); + assert!(ws.is_none()); + } + + #[test] + fn test_python_workspace_resolve_module() { + let dir = setup_python_workspace_src_layout(); + let ws = PythonWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("mypackage.main"); + assert!(resolved.is_some()); + assert!(resolved.unwrap().ends_with("mypackage/main.py")); + } + + #[test] + fn test_python_workspace_resolve_package() { + let dir = setup_python_workspace_src_layout(); + let ws = PythonWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("mypackage.utils"); + assert!(resolved.is_some()); + assert!(resolved.unwrap().ends_with("utils/__init__.py")); + } + + #[test] + fn test_python_workspace_resolve_submodule() { + let dir = setup_python_workspace_src_layout(); + let ws = PythonWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("mypackage.utils.helpers"); + assert!(resolved.is_some()); + assert!(resolved.unwrap().ends_with("utils/helpers.py")); + } + + #[test] + fn test_python_workspace_relative_ignored() { + let dir = setup_python_workspace_src_layout(); + let ws = PythonWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module(".relative"); + assert!(resolved.is_none()); + } + + #[test] + fn test_python_workspace_poetry_project() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("pyproject.toml"), + r#" +[tool.poetry] +name = "poetry-project" +version = "0.1.0" +"#, + ) + .unwrap(); + + let src_dir = dir.path().join("src"); + fs::create_dir_all(&src_dir).unwrap(); + + let ws = PythonWorkspace::discover(dir.path()).unwrap().unwrap(); + assert_eq!(ws.package_name(), "poetry-project"); + } } From 9160fefb7da23cc3eb46e9cee43375ed539c9f82 Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 21:14:42 +0530 Subject: [PATCH 14/35] feat: replace rg subprocess with language-aware regex search --- Cargo.lock | 4 +- crates/code/src/resolve.rs | 214 ++++++++++++++++++++++++++++--------- 2 files changed, 167 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c558c2..eaac4f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1032,9 +1032,9 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "globset" -version = "0.4.15" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" dependencies = [ "aho-corasick", "bstr", diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index 774489a..de42fa7 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -616,62 +616,103 @@ pub fn resolve_by_index(callee: &str, index: &Index) -> Option { index.definitions().find(|d| d.name == callee).cloned() } +struct DefinitionPattern { + extensions: &'static [&'static str], + pattern: &'static str, +} + +const DEFINITION_PATTERNS: &[DefinitionPattern] = &[ + DefinitionPattern { + extensions: &["rs"], + pattern: r"fn\s+{NAME}\s*[<(]", + }, + DefinitionPattern { + extensions: &["go"], + pattern: r"func\s+(\([^)]*\)\s*)?{NAME}\s*[\[<(]", + }, + DefinitionPattern { + extensions: &["py"], + pattern: r"def\s+{NAME}\s*\(", + }, + DefinitionPattern { + extensions: &["ts", "tsx", "js", "jsx", "mjs", "cjs"], + pattern: r"(function\s+{NAME}|const\s+{NAME}\s*=|let\s+{NAME}\s*=|{NAME}\s*\([^)]*\)\s*\{)", + }, + DefinitionPattern { + extensions: &["java", "scala"], + pattern: r"(void|int|String|boolean|public|private|protected|static|def)\s+{NAME}\s*[<(]", + }, + DefinitionPattern { + extensions: &["c", "cpp", "cc", "cxx", "h", "hpp"], + pattern: r"\b\w+[\s*]+{NAME}\s*\(", + }, + DefinitionPattern { + extensions: &["zig"], + pattern: r"(fn|pub fn)\s+{NAME}\s*\(", + }, + DefinitionPattern { + extensions: &["sh", "bash"], + pattern: r"(function\s+{NAME}|{NAME}\s*\(\s*\))", + }, +]; + pub fn resolve_by_search(callee: &str, root: &Path) -> Result> { - let output = Command::new("rg") - .args([ - "--json", - "-e", - &format!(r"fn\s+{}\s*[\(<]", regex::escape(callee)), - "--type", - "rust", - root.to_string_lossy().as_ref(), - ]) - .output() - .context("failed to run ripgrep")?; - - if !output.status.success() { - return Ok(None); - } + use std::io::{BufRead, BufReader}; - let stdout = String::from_utf8_lossy(&output.stdout); - for line in stdout.lines() { - if let Ok(msg) = serde_json::from_str::(line) { - if let RgMessage::Match { data } = msg { - return Ok(Some(Definition { - name: callee.to_string(), - kind: super::index::DefinitionKind::Function, - span: super::index::Span { - start_byte: 0, - end_byte: 0, - start_line: data.line_number.unwrap_or(1) as usize, - end_line: data.line_number.unwrap_or(1) as usize, - }, - file: PathBuf::from(&data.path.text), - })); + let escaped = regex::escape(callee); + + for pattern_def in DEFINITION_PATTERNS { + let pattern = pattern_def.pattern.replace("{NAME}", &escaped); + + let re = match regex::Regex::new(&pattern) { + Ok(r) => r, + Err(_) => continue, + }; + + for entry in walkdir::WalkDir::new(root) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_file()) + { + let path = entry.path(); + + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + + if !pattern_def.extensions.contains(&ext) { + continue; } - } - } - Ok(None) -} + let file = match fs::File::open(path) { + Ok(f) => f, + Err(_) => continue, + }; -#[derive(Deserialize)] -#[serde(tag = "type", rename_all = "lowercase")] -enum RgMessage { - Match { data: RgMatchData }, - #[serde(other)] - Other, -} + let reader = BufReader::new(file); -#[derive(Deserialize)] -struct RgMatchData { - path: RgText, - line_number: Option, -} + for (line_num, line) in reader.lines().enumerate() { + let line = match line { + Ok(l) => l, + Err(_) => continue, + }; -#[derive(Deserialize)] -struct RgText { - text: String, + if re.is_match(&line) { + return Ok(Some(Definition { + name: callee.to_string(), + kind: super::index::DefinitionKind::Function, + span: super::index::Span { + start_byte: 0, + end_byte: 0, + start_line: line_num + 1, + end_line: line_num + 1, + }, + file: path.to_path_buf(), + })); + } + } + } + } + + Ok(None) } pub struct Resolver<'a> { @@ -1481,4 +1522,79 @@ version = "0.1.0" let ws = PythonWorkspace::discover(dir.path()).unwrap().unwrap(); assert_eq!(ws.package_name(), "poetry-project"); } + + #[test] + fn test_resolve_by_search_rust() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("lib.rs"), + "pub fn my_function() {\n println!(\"hello\");\n}\n", + ) + .unwrap(); + + let found = resolve_by_search("my_function", dir.path()).unwrap(); + assert!(found.is_some()); + let def = found.unwrap(); + assert_eq!(def.name, "my_function"); + assert_eq!(def.span.start_line, 1); + } + + #[test] + fn test_resolve_by_search_python() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("utils.py"), + "def helper_func():\n pass\n", + ) + .unwrap(); + + let found = resolve_by_search("helper_func", dir.path()).unwrap(); + assert!(found.is_some()); + let def = found.unwrap(); + assert_eq!(def.name, "helper_func"); + } + + #[test] + fn test_resolve_by_search_go() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("main.go"), + "package main\n\nfunc ProcessData() {\n}\n", + ) + .unwrap(); + + let found = resolve_by_search("ProcessData", dir.path()).unwrap(); + assert!(found.is_some()); + let def = found.unwrap(); + assert_eq!(def.name, "ProcessData"); + } + + #[test] + fn test_resolve_by_search_typescript() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("index.ts"), + "export function fetchData() {\n return null;\n}\n", + ) + .unwrap(); + + let found = resolve_by_search("fetchData", dir.path()).unwrap(); + assert!(found.is_some()); + let def = found.unwrap(); + assert_eq!(def.name, "fetchData"); + } + + #[test] + fn test_resolve_by_search_not_found() { + let dir = TempDir::new().unwrap(); + + fs::write(dir.path().join("lib.rs"), "pub fn other() {}\n").unwrap(); + + let found = resolve_by_search("nonexistent", dir.path()).unwrap(); + assert!(found.is_none()); + } } From a74e036d84929c942b29f2cfdf6a7f6bbc9c291c Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 21:18:21 +0530 Subject: [PATCH 15/35] feat: track discovered files for lazy index population --- crates/code/src/resolve.rs | 53 +++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index de42fa7..0533d1a 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -1,3 +1,5 @@ +use std::cell::RefCell; +use std::collections::HashSet; use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; @@ -719,6 +721,7 @@ pub struct Resolver<'a> { index: &'a Index, workspace: Option>, root: PathBuf, + discovered_files: RefCell>, } impl<'a> Resolver<'a> { @@ -731,6 +734,7 @@ impl<'a> Resolver<'a> { index, workspace, root, + discovered_files: RefCell::new(HashSet::new()), } } @@ -757,7 +761,20 @@ impl<'a> Resolver<'a> { return Ok(Some(def)); } - resolve_by_search(callee, &self.root) + if let Some(def) = resolve_by_search(callee, &self.root)? { + self.discovered_files.borrow_mut().insert(def.file.clone()); + return Ok(Some(def)); + } + + Ok(None) + } + + pub fn files_to_index(&self) -> Vec { + self.discovered_files.borrow().iter().cloned().collect() + } + + pub fn clear_discovered(&self) { + self.discovered_files.borrow_mut().clear(); } fn resolve_via_imports(&self, callee: &str, from_file: &Path) -> Option { @@ -1597,4 +1614,38 @@ version = "0.1.0" let found = resolve_by_search("nonexistent", dir.path()).unwrap(); assert!(found.is_none()); } + + #[test] + fn test_resolver_tracks_discovered_files() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("utils.rs"), + "pub fn discovered_func() {}\n", + ) + .unwrap(); + + fs::write( + dir.path().join("helpers.rs"), + "pub fn another_func() {}\n", + ) + .unwrap(); + + let index = Index::new(); + let resolver = Resolver::new(&index, None, dir.path().to_path_buf()); + + assert!(resolver.files_to_index().is_empty()); + + let _ = resolver.resolve("discovered_func", Path::new("main.rs")); + let files = resolver.files_to_index(); + assert_eq!(files.len(), 1); + assert!(files[0].ends_with("utils.rs")); + + let _ = resolver.resolve("another_func", Path::new("main.rs")); + let files = resolver.files_to_index(); + assert_eq!(files.len(), 2); + + resolver.clear_discovered(); + assert!(resolver.files_to_index().is_empty()); + } } From 4fc28ec008ff38fa5b164319151dc7abdfe7bf88 Mon Sep 17 00:00:00 2001 From: ro Date: Sat, 27 Dec 2025 22:19:10 -0800 Subject: [PATCH 16/35] feat: use grep crate for fast definition search, upgrade to rust nightly --- Cargo.lock | 107 +++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + crates/code/Cargo.toml | 1 + crates/code/src/resolve.rs | 57 ++++++++++---------- flake.nix | 2 +- 5 files changed, 138 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eaac4f7..9ef9f72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -670,6 +670,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "encoding_rs_io" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83" +dependencies = [ + "encoding_rs", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -968,6 +977,7 @@ dependencies = [ "glimpse-core", "glimpse-fetch", "glob", + "grep", "libloading", "once_cell", "regex", @@ -1043,6 +1053,85 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "grep" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "309217bc53e2c691c314389c7fa91f9cd1a998cda19e25544ea47d94103880c3" +dependencies = [ + "grep-cli", + "grep-matcher", + "grep-printer", + "grep-regex", + "grep-searcher", +] + +[[package]] +name = "grep-cli" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf32d263c5d5cc2a23ce587097f5ddafdb188492ba2e6fb638eaccdc22453631" +dependencies = [ + "bstr", + "globset", + "libc", + "log", + "termcolor", + "winapi-util", +] + +[[package]] +name = "grep-matcher" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36d7b71093325ab22d780b40d7df3066ae4aebb518ba719d38c697a8228a8023" +dependencies = [ + "memchr", +] + +[[package]] +name = "grep-printer" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd76035e87871f51c1ee5b793e32122b3ccf9c692662d9622ef1686ff5321acb" +dependencies = [ + "bstr", + "grep-matcher", + "grep-searcher", + "log", + "serde", + "serde_json", + "termcolor", +] + +[[package]] +name = "grep-regex" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce0c256c3ad82bcc07b812c15a45ec1d398122e8e15124f96695234db7112ef" +dependencies = [ + "bstr", + "grep-matcher", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "grep-searcher" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac63295322dc48ebb20a25348147905d816318888e64f531bfc2a2bc0577dc34" +dependencies = [ + "bstr", + "encoding_rs", + "encoding_rs_io", + "grep-matcher", + "log", + "memchr", + "memmap2", +] + [[package]] name = "h2" version = "0.3.26" @@ -1647,6 +1736,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +dependencies = [ + "libc", +] + [[package]] name = "mime" version = "0.3.17" @@ -2888,6 +2986,15 @@ dependencies = [ "utf-8", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "thiserror" version = "1.0.69" diff --git a/Cargo.toml b/Cargo.toml index 00e0e45..1fb96f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ rayon = "1.10.0" # Code analysis dependencies bincode = "1.3" cc = "1.2" +grep = "0.4" libloading = "0.8" tree-sitter = "0.25" diff --git a/crates/code/Cargo.toml b/crates/code/Cargo.toml index 6f67e6f..11d39ea 100644 --- a/crates/code/Cargo.toml +++ b/crates/code/Cargo.toml @@ -11,6 +11,7 @@ cc.workspace = true dirs.workspace = true git2.workspace = true glob.workspace = true +grep.workspace = true libloading.workspace = true once_cell.workspace = true regex.workspace = true diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index 0533d1a..395f285 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -659,15 +659,17 @@ const DEFINITION_PATTERNS: &[DefinitionPattern] = &[ ]; pub fn resolve_by_search(callee: &str, root: &Path) -> Result> { - use std::io::{BufRead, BufReader}; + use grep::regex::RegexMatcher; + use grep::searcher::sinks::UTF8; + use grep::searcher::Searcher; let escaped = regex::escape(callee); for pattern_def in DEFINITION_PATTERNS { let pattern = pattern_def.pattern.replace("{NAME}", &escaped); - let re = match regex::Regex::new(&pattern) { - Ok(r) => r, + let matcher = match RegexMatcher::new(&pattern) { + Ok(m) => m, Err(_) => continue, }; @@ -684,32 +686,29 @@ pub fn resolve_by_search(callee: &str, root: &Path) -> Result continue; } - let file = match fs::File::open(path) { - Ok(f) => f, - Err(_) => continue, - }; - - let reader = BufReader::new(file); - - for (line_num, line) in reader.lines().enumerate() { - let line = match line { - Ok(l) => l, - Err(_) => continue, - }; - - if re.is_match(&line) { - return Ok(Some(Definition { - name: callee.to_string(), - kind: super::index::DefinitionKind::Function, - span: super::index::Span { - start_byte: 0, - end_byte: 0, - start_line: line_num + 1, - end_line: line_num + 1, - }, - file: path.to_path_buf(), - })); - } + let mut found: Option<(u64, PathBuf)> = None; + + let _ = Searcher::new().search_path( + &matcher, + path, + UTF8(|line_num, _line| { + found = Some((line_num, path.to_path_buf())); + Ok(false) + }), + ); + + if let Some((line_num, file_path)) = found { + return Ok(Some(Definition { + name: callee.to_string(), + kind: super::index::DefinitionKind::Function, + span: super::index::Span { + start_byte: 0, + end_byte: 0, + start_line: line_num as usize, + end_line: line_num as usize, + }, + file: file_path, + })); } } } diff --git a/flake.nix b/flake.nix index 883033e..ed3c640 100644 --- a/flake.nix +++ b/flake.nix @@ -71,7 +71,7 @@ buildInputs = with pkgs; [ - rust-bin.stable.latest.default + rust-bin.nightly.latest.default pkg-config openssl cacert From 22cbff20dc087727fd9ef5475192b059fbc65f4d Mon Sep 17 00:00:00 2001 From: ro Date: Sun, 28 Dec 2025 21:05:24 -0800 Subject: [PATCH 17/35] refactor: stricter language-specific import resolution --- crates/code/src/extract.rs | 23 +-- crates/code/src/grammar.rs | 26 ++- crates/code/src/index.rs | 8 +- crates/code/src/resolve.rs | 331 ++++++++++++++++++++++++++------ crates/code/tests/extraction.rs | 10 +- 5 files changed, 316 insertions(+), 82 deletions(-) diff --git a/crates/code/src/extract.rs b/crates/code/src/extract.rs index 42c3f5b..09e830a 100644 --- a/crates/code/src/extract.rs +++ b/crates/code/src/extract.rs @@ -25,14 +25,14 @@ impl QuerySet { let calls = Query::new(&language, &entry.call_query) .with_context(|| format!("failed to compile call query for {}", entry.name))?; - let imports = if entry.import_query.trim().is_empty() { - None - } else { - Some( - Query::new(&language, &entry.import_query) - .with_context(|| format!("failed to compile import query for {}", entry.name))?, - ) - }; + let imports = + if entry.import_query.trim().is_empty() { + None + } else { + Some(Query::new(&language, &entry.import_query).with_context(|| { + format!("failed to compile import query for {}", entry.name) + })?) + }; let def_name_idx = definitions .capture_index_for_name("name") @@ -124,12 +124,7 @@ impl Extractor { &self.language } - pub fn extract_definitions( - &self, - tree: &Tree, - source: &[u8], - path: &Path, - ) -> Vec { + pub fn extract_definitions(&self, tree: &Tree, source: &[u8], path: &Path) -> Vec { let mut cursor = QueryCursor::new(); let mut definitions = Vec::new(); let mut matches = cursor.matches(&self.queries.definitions, tree.root_node(), source); diff --git a/crates/code/src/grammar.rs b/crates/code/src/grammar.rs index 35229fa..e8481a0 100644 --- a/crates/code/src/grammar.rs +++ b/crates/code/src/grammar.rs @@ -363,9 +363,21 @@ mod tests { fn test_all_languages_have_queries() { let registry = Registry::load().unwrap(); for lang in registry.languages() { - assert!(!lang.definition_query.is_empty(), "{} missing definition_query", lang.name); - assert!(!lang.call_query.is_empty(), "{} missing call_query", lang.name); - assert!(!lang.import_query.is_empty(), "{} missing import_query", lang.name); + assert!( + !lang.definition_query.is_empty(), + "{} missing definition_query", + lang.name + ); + assert!( + !lang.call_query.is_empty(), + "{} missing call_query", + lang.name + ); + assert!( + !lang.import_query.is_empty(), + "{} missing import_query", + lang.name + ); } } @@ -408,7 +420,9 @@ mod tests { fn test_load_rust_grammar() { let language = load_language("rust").expect("failed to load rust grammar"); let mut parser = tree_sitter::Parser::new(); - parser.set_language(&language).expect("failed to set language"); + parser + .set_language(&language) + .expect("failed to set language"); let source = "fn main() { println!(\"Hello\"); }"; let tree = parser.parse(source, None).expect("failed to parse"); @@ -423,7 +437,9 @@ mod tests { fn test_load_by_extension() { let language = load_language_by_extension("rs").expect("failed to load by extension"); let mut parser = tree_sitter::Parser::new(); - parser.set_language(&language).expect("failed to set language"); + parser + .set_language(&language) + .expect("failed to set language"); let tree = parser.parse("fn foo() {}", None).expect("failed to parse"); assert_eq!(tree.root_node().kind(), "source_file"); diff --git a/crates/code/src/index.rs b/crates/code/src/index.rs index 8d8218b..33fa59d 100644 --- a/crates/code/src/index.rs +++ b/crates/code/src/index.rs @@ -1,5 +1,5 @@ -use std::collections::HashMap; use std::collections::hash_map::DefaultHasher; +use std::collections::HashMap; use std::fs::{self, File}; use std::hash::{Hash, Hasher}; use std::io::{BufReader, BufWriter}; @@ -148,7 +148,8 @@ pub fn save_index(index: &Index, root: &Path) -> Result<()> { let dir = path.parent().unwrap(); fs::create_dir_all(dir).with_context(|| format!("failed to create {}", dir.display()))?; - let file = File::create(&path).with_context(|| format!("failed to create {}", path.display()))?; + let file = + File::create(&path).with_context(|| format!("failed to create {}", path.display()))?; let writer = BufWriter::new(file); bincode::serialize_into(writer, index).context("failed to serialize index")?; @@ -180,7 +181,8 @@ pub fn clear_index(root: &Path) -> Result<()> { let path = index_path(root)?; if let Some(dir) = path.parent() { if dir.exists() { - fs::remove_dir_all(dir).with_context(|| format!("failed to remove {}", dir.display()))?; + fs::remove_dir_all(dir) + .with_context(|| format!("failed to remove {}", dir.display()))?; } } Ok(()) diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index 395f285..f549833 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -154,7 +154,6 @@ impl RustWorkspace { } } - #[derive(Debug, Clone)] pub struct GoWorkspace { root: PathBuf, @@ -197,8 +196,8 @@ impl WorkspaceDiscovery for GoWorkspace { return Ok(None); } - let go_mod: GoMod = serde_json::from_slice(&output.stdout) - .context("failed to parse go mod output")?; + let go_mod: GoMod = + serde_json::from_slice(&output.stdout).context("failed to parse go mod output")?; Ok(Some(Box::new(GoWorkspace { root: root.to_path_buf(), @@ -270,6 +269,33 @@ fn parse_go_mod_fallback(content: &str) -> Option { None } +fn path_matches_module(file: &Path, module_parts: &[&str]) -> bool { + if module_parts.is_empty() { + return true; + } + + let file_no_ext = file.with_extension(""); + let file_parts: Vec<&str> = file_no_ext + .components() + .filter_map(|c| c.as_os_str().to_str()) + .collect(); + + let dir_parts = if module_parts.len() > 1 { + &module_parts[..module_parts.len() - 1] + } else { + return true; + }; + + if dir_parts.len() > file_parts.len() { + return false; + } + + dir_parts + .iter() + .rev() + .zip(file_parts.iter().rev()) + .all(|(module, file)| *module == *file) +} #[derive(Debug, Clone)] pub struct TsWorkspace { @@ -344,7 +370,11 @@ impl WorkspaceDiscovery for TsWorkspace { } } - Ok(Some(Box::new(TsWorkspace { root: root.to_path_buf(), name, paths }))) + Ok(Some(Box::new(TsWorkspace { + root: root.to_path_buf(), + name, + paths, + }))) } fn resolve_module(&self, module_path: &str) -> Option { @@ -413,7 +443,6 @@ impl TsWorkspace { } } - #[derive(Debug, Clone)] pub struct PythonWorkspace { root: PathBuf, @@ -463,7 +492,13 @@ impl WorkspaceDiscovery for PythonWorkspace { let name = pyproject .project .and_then(|p| p.name) - .or_else(|| pyproject.tool.as_ref().and_then(|t| t.poetry.as_ref()).and_then(|p| p.name.clone())) + .or_else(|| { + pyproject + .tool + .as_ref() + .and_then(|t| t.poetry.as_ref()) + .and_then(|p| p.name.clone()) + }) .unwrap_or_else(|| { root.file_name() .unwrap_or_default() @@ -558,7 +593,6 @@ impl PythonWorkspace { } } - fn expand_glob(root: &Path, pattern: &str) -> Result> { let full_pattern = root.join(pattern); let pattern_str = full_pattern.to_string_lossy(); @@ -566,11 +600,12 @@ fn expand_glob(root: &Path, pattern: &str) -> Result> { let mut results = Vec::new(); if pattern.contains('*') { - for entry in glob::glob(&pattern_str).with_context(|| "invalid glob pattern")? { - if let Ok(path) = entry { - if path.is_dir() && path.join("Cargo.toml").exists() { - results.push(path); - } + for path in glob::glob(&pattern_str) + .with_context(|| "invalid glob pattern")? + .flatten() + { + if path.is_dir() && path.join("Cargo.toml").exists() { + results.push(path); } } } else { @@ -592,7 +627,8 @@ fn parse_crate_member(path: &Path) -> Result> { let content = fs::read_to_string(&cargo_path) .with_context(|| format!("failed to read {}", cargo_path.display()))?; - let cargo: CargoToml = toml::from_str(&content).with_context(|| "failed to parse Cargo.toml")?; + let cargo: CargoToml = + toml::from_str(&content).with_context(|| "failed to parse Cargo.toml")?; let name = cargo .package @@ -777,60 +813,177 @@ impl<'a> Resolver<'a> { } fn resolve_via_imports(&self, callee: &str, from_file: &Path) -> Option { + let ext = from_file.extension().and_then(|e| e.to_str()).unwrap_or(""); + + match ext { + "rs" => self.resolve_rust_import(callee, from_file), + "py" => self.resolve_python_import(callee, from_file), + "go" => self.resolve_go_import(callee, from_file), + "ts" | "tsx" | "js" | "jsx" | "mjs" | "cjs" => { + self.resolve_ts_import(callee, from_file) + } + _ => self.resolve_generic_import(callee, from_file), + } + } + + fn resolve_rust_import(&self, callee: &str, from_file: &Path) -> Option { let record = self.index.get(from_file)?; for import in &record.imports { - let visible_name = import - .alias - .as_deref() - .or_else(|| import.module_path.rsplit("::").next()) - .or_else(|| import.module_path.rsplit('.').next())?; + let segments: Vec<&str> = import.module_path.split("::").collect(); + let symbol_name = segments.last()?; + + let visible_name = import.alias.as_deref().unwrap_or(symbol_name); + if visible_name != callee { + continue; + } + + if let Some(ref ws) = self.workspace { + if let Some(resolved_path) = ws.resolve_module(&import.module_path) { + if let Some(def) = self.find_def_in_file(&resolved_path, symbol_name) { + return Some(def); + } + } + } + + let filtered_parts: Vec<&str> = segments + .iter() + .copied() + .filter(|p| !p.is_empty() && *p != "crate" && *p != "self" && *p != "super") + .collect(); + + for def in self.index.definitions() { + if def.name != *symbol_name { + continue; + } + if path_matches_module(&def.file, &filtered_parts) { + return Some(def.clone()); + } + } + } + + None + } + + fn resolve_python_import(&self, callee: &str, from_file: &Path) -> Option { + let record = self.index.get(from_file)?; + + for import in &record.imports { + let segments: Vec<&str> = import.module_path.split('.').collect(); + let symbol_name = segments.last()?; + let visible_name = import.alias.as_deref().unwrap_or(symbol_name); if visible_name != callee { continue; } - let original_name = import - .module_path - .rsplit("::") - .next() - .or_else(|| import.module_path.rsplit('.').next()) - .unwrap_or(callee); + if let Some(ref ws) = self.workspace { + if let Some(resolved_path) = ws.resolve_module(&import.module_path) { + if let Some(def) = self.find_def_in_file(&resolved_path, symbol_name) { + return Some(def); + } + } + } + + for def in self.index.definitions() { + if def.name != *symbol_name { + continue; + } + if path_matches_module(&def.file, &segments) { + return Some(def.clone()); + } + } + } + + None + } + + fn resolve_go_import(&self, callee: &str, from_file: &Path) -> Option { + let record = self.index.get(from_file)?; + + for import in &record.imports { + let pkg_name = import + .alias + .as_deref() + .or_else(|| import.module_path.rsplit('/').next())?; + + if pkg_name != callee { + continue; + } if let Some(ref ws) = self.workspace { if let Some(resolved_path) = ws.resolve_module(&import.module_path) { if let Some(target_record) = self.index.get(&resolved_path) { - if let Some(def) = target_record - .definitions - .iter() - .find(|d| d.name == original_name) - { + if let Some(def) = target_record.definitions.first() { return Some(def.clone()); } } } } + } - let module_path_normalized = import.module_path.replace('.', "::"); - let module_parts: Vec<&str> = module_path_normalized - .split("::") - .filter(|p| !p.is_empty() && *p != "crate" && *p != "self" && *p != "super") - .collect(); + None + } + + fn resolve_ts_import(&self, callee: &str, from_file: &Path) -> Option { + let record = self.index.get(from_file)?; + + for import in &record.imports { + let segments: Vec<&str> = import.module_path.split('/').collect(); + let module_name = segments.last()?; + + let visible_name = import.alias.as_deref().unwrap_or(module_name); + if visible_name != callee { + continue; + } + + if let Some(ref ws) = self.workspace { + if let Some(resolved_path) = ws.resolve_module(&import.module_path) { + if let Some(def) = self.find_def_in_file(&resolved_path, callee) { + return Some(def); + } + } + } for def in self.index.definitions() { - if def.name != original_name { + if def.name != callee { continue; } - - if module_parts.len() <= 1 { + if path_matches_module(&def.file, &segments) { return Some(def.clone()); } + } + } + + None + } - let file_str = def.file.to_string_lossy(); - let path_parts = &module_parts[..module_parts.len() - 1]; - let matches = path_parts.iter().all(|part| file_str.contains(part)); + fn resolve_generic_import(&self, callee: &str, from_file: &Path) -> Option { + let record = self.index.get(from_file)?; + + for import in &record.imports { + let normalized = import.module_path.replace('.', "::"); + let segments: Vec<&str> = normalized.split("::").filter(|p| !p.is_empty()).collect(); + let symbol_name = segments.last()?; + + let visible_name = import.alias.as_deref().unwrap_or(symbol_name); + if visible_name != callee { + continue; + } + + if let Some(ref ws) = self.workspace { + if let Some(resolved_path) = ws.resolve_module(&import.module_path) { + if let Some(def) = self.find_def_in_file(&resolved_path, symbol_name) { + return Some(def); + } + } + } - if matches { + for def in self.index.definitions() { + if def.name != *symbol_name { + continue; + } + if path_matches_module(&def.file, &segments) { return Some(def.clone()); } } @@ -838,6 +991,11 @@ impl<'a> Resolver<'a> { None } + + fn find_def_in_file(&self, file: &Path, name: &str) -> Option { + let record = self.index.get(file)?; + record.definitions.iter().find(|d| d.name == name).cloned() + } } #[cfg(test)] @@ -1227,15 +1385,27 @@ version = "0.1.0" ) .unwrap(); - fs::write(dir.path().join("main.go"), "package main\n\nfunc main() {}\n").unwrap(); + fs::write( + dir.path().join("main.go"), + "package main\n\nfunc main() {}\n", + ) + .unwrap(); let pkg_dir = dir.path().join("pkg/utils"); fs::create_dir_all(&pkg_dir).unwrap(); - fs::write(pkg_dir.join("helpers.go"), "package utils\n\nfunc Helper() {}\n").unwrap(); + fs::write( + pkg_dir.join("helpers.go"), + "package utils\n\nfunc Helper() {}\n", + ) + .unwrap(); let internal_dir = dir.path().join("internal/core"); fs::create_dir_all(&internal_dir).unwrap(); - fs::write(internal_dir.join("core.go"), "package core\n\nfunc Process() {}\n").unwrap(); + fs::write( + internal_dir.join("core.go"), + "package core\n\nfunc Process() {}\n", + ) + .unwrap(); dir } @@ -1327,13 +1497,24 @@ version = "0.1.0" let utils_dir = src_dir.join("utils"); fs::create_dir_all(&utils_dir).unwrap(); - fs::write(utils_dir.join("helpers.ts"), "export const helper = () => {};\n").unwrap(); + fs::write( + utils_dir.join("helpers.ts"), + "export const helper = () => {};\n", + ) + .unwrap(); let components_dir = src_dir.join("components"); fs::create_dir_all(&components_dir).unwrap(); - fs::write(components_dir.join("Button.tsx"), "export const Button = () => null;\n") - .unwrap(); - fs::write(components_dir.join("index.ts"), "export * from './Button';\n").unwrap(); + fs::write( + components_dir.join("Button.tsx"), + "export const Button = () => null;\n", + ) + .unwrap(); + fs::write( + components_dir.join("index.ts"), + "export * from './Button';\n", + ) + .unwrap(); dir } @@ -1618,17 +1799,9 @@ version = "0.1.0" fn test_resolver_tracks_discovered_files() { let dir = TempDir::new().unwrap(); - fs::write( - dir.path().join("utils.rs"), - "pub fn discovered_func() {}\n", - ) - .unwrap(); + fs::write(dir.path().join("utils.rs"), "pub fn discovered_func() {}\n").unwrap(); - fs::write( - dir.path().join("helpers.rs"), - "pub fn another_func() {}\n", - ) - .unwrap(); + fs::write(dir.path().join("helpers.rs"), "pub fn another_func() {}\n").unwrap(); let index = Index::new(); let resolver = Resolver::new(&index, None, dir.path().to_path_buf()); @@ -1647,4 +1820,46 @@ version = "0.1.0" resolver.clear_discovered(); assert!(resolver.files_to_index().is_empty()); } + + #[test] + fn test_path_matches_module_exact_suffix() { + let file = PathBuf::from("src/utils/helpers.rs"); + assert!(path_matches_module(&file, &["utils", "helpers", "func"])); + assert!(path_matches_module(&file, &["helpers", "func"])); + assert!(!path_matches_module(&file, &["other", "helpers", "func"])); + } + + #[test] + fn test_path_matches_module_single_part() { + let file = PathBuf::from("src/main.rs"); + assert!(path_matches_module(&file, &["func"])); + assert!(path_matches_module(&file, &[])); + } + + #[test] + fn test_path_matches_module_no_false_substring_match() { + let file = PathBuf::from("src/my_utils/helpers.rs"); + assert!(!path_matches_module(&file, &["utils", "helpers", "func"])); + assert!(path_matches_module(&file, &["my_utils", "helpers", "func"])); + } + + #[test] + fn test_path_matches_module_deep_path() { + let file = PathBuf::from("crates/core/src/utils/helpers.rs"); + assert!(path_matches_module(&file, &["utils", "helpers", "func"])); + assert!(path_matches_module(&file, &["src", "utils", "helpers", "func"])); + assert!(!path_matches_module( + &file, + &["wrong", "src", "utils", "helpers", "func"] + )); + } + + #[test] + fn test_path_matches_module_too_many_parts() { + let file = PathBuf::from("src/helpers.rs"); + assert!(!path_matches_module( + &file, + &["deeply", "nested", "utils", "helpers", "func"] + )); + } } diff --git a/crates/code/tests/extraction.rs b/crates/code/tests/extraction.rs index ba7b5d4..f1e19fe 100644 --- a/crates/code/tests/extraction.rs +++ b/crates/code/tests/extraction.rs @@ -75,7 +75,11 @@ fn process(cfg: Config) { let callers: Vec<_> = result .calls .iter() - .filter_map(|c| c.caller.as_ref().map(|caller| (caller.as_str(), c.callee.as_str()))) + .filter_map(|c| { + c.caller + .as_ref() + .map(|caller| (caller.as_str(), c.callee.as_str())) + }) .collect(); assert!(callers.contains(&("main", "helper"))); @@ -148,7 +152,9 @@ class Config: assert!(!result.imports.is_empty()); let paths: Vec<_> = result.imports.iter().map(|i| &i.module_path).collect(); - assert!(paths.iter().any(|p| p.contains("os") || p.contains("pathlib"))); + assert!(paths + .iter() + .any(|p| p.contains("os") || p.contains("pathlib"))); } } From 20e19e1f2aa23697c90d17913a57a91c90fe8eea Mon Sep 17 00:00:00 2001 From: ro Date: Sun, 28 Dec 2025 21:16:10 -0800 Subject: [PATCH 18/35] feat: add zig, java, scala import resolvers with local package discovery --- crates/code/src/resolve.rs | 401 ++++++++++++++++++++++++++++++++++++- 1 file changed, 400 insertions(+), 1 deletion(-) diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index f549833..4cf0023 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -297,6 +297,29 @@ fn path_matches_module(file: &Path, module_parts: &[&str]) -> bool { .all(|(module, file)| *module == *file) } +fn path_matches_java_package(file: &Path, package_parts: &[&str]) -> bool { + let file_parts: Vec<&str> = file + .components() + .filter_map(|c| c.as_os_str().to_str()) + .collect(); + + if package_parts.is_empty() { + return true; + } + + if package_parts.len() > file_parts.len() { + return false; + } + + for window in file_parts.windows(package_parts.len()) { + if window == package_parts { + return true; + } + } + + false +} + #[derive(Debug, Clone)] pub struct TsWorkspace { root: PathBuf, @@ -593,6 +616,115 @@ impl PythonWorkspace { } } +#[derive(Debug, Clone)] +pub struct ZigWorkspace { + root: PathBuf, + packages: Vec<(String, PathBuf)>, +} + +impl WorkspaceDiscovery for ZigWorkspace { + fn discover(root: &Path) -> Result>> { + let zon_path = root.join("build.zig.zon"); + if !zon_path.exists() { + return Ok(None); + } + + let content = fs::read_to_string(&zon_path) + .with_context(|| format!("failed to read {}", zon_path.display()))?; + + let packages = parse_zig_zon_deps(&content, root); + + Ok(Some(Box::new(ZigWorkspace { + root: root.to_path_buf(), + packages, + }))) + } + + fn resolve_module(&self, module_path: &str) -> Option { + if module_path == "std" { + return None; + } + + if module_path.ends_with(".zig") || module_path.contains('/') { + let resolved = self.root.join("src").join(module_path); + if resolved.exists() { + return Some(resolved); + } + let resolved = self.root.join(module_path); + if resolved.exists() { + return Some(resolved); + } + return None; + } + + for (name, pkg_path) in &self.packages { + if name == module_path { + return find_zig_root_file(pkg_path, name); + } + } + + None + } + + fn root(&self) -> &Path { + &self.root + } +} + +impl ZigWorkspace { + pub fn packages(&self) -> &[(String, PathBuf)] { + &self.packages + } +} + +fn parse_zig_zon_deps(content: &str, root: &Path) -> Vec<(String, PathBuf)> { + let mut packages = Vec::new(); + + for line in content.lines() { + let line = line.trim(); + if !line.starts_with('.') || !line.contains('=') { + continue; + } + + let Some(name_end) = line.find('=') else { + continue; + }; + let name = line[1..name_end].trim().trim_matches(|c| c == ' '); + + if !line.contains(".path") { + continue; + } + + if let Some(path_start) = line.find(".path") { + let rest = &line[path_start..]; + if let Some(quote_start) = rest.find('"') { + let after_quote = &rest[quote_start + 1..]; + if let Some(quote_end) = after_quote.find('"') { + let path_str = &after_quote[..quote_end]; + packages.push((name.to_string(), root.join(path_str))); + } + } + } + } + + packages +} + +fn find_zig_root_file(pkg_path: &Path, name: &str) -> Option { + let candidates = [ + pkg_path.join("src/root.zig"), + pkg_path.join("src/lib.zig"), + pkg_path.join("src/main.zig"), + pkg_path.join(format!("src/{}.zig", name)), + pkg_path.join(format!("{}.zig", name)), + pkg_path.join("root.zig"), + pkg_path.join("lib.zig"), + pkg_path.join("main.zig"), + ]; + + candidates.into_iter().find(|c| c.exists()) +} + fn expand_glob(root: &Path, pattern: &str) -> Result> { let full_pattern = root.join(pattern); let pattern_str = full_pattern.to_string_lossy(); @@ -819,6 +951,9 @@ impl<'a> Resolver<'a> { "rs" => self.resolve_rust_import(callee, from_file), "py" => self.resolve_python_import(callee, from_file), "go" => self.resolve_go_import(callee, from_file), + "zig" => self.resolve_zig_import(callee, from_file), + "java" => self.resolve_java_import(callee, from_file), + "scala" | "sc" => self.resolve_scala_import(callee, from_file), "ts" | "tsx" | "js" | "jsx" | "mjs" | "cjs" => { self.resolve_ts_import(callee, from_file) } @@ -958,6 +1093,121 @@ impl<'a> Resolver<'a> { None } + fn resolve_zig_import(&self, callee: &str, from_file: &Path) -> Option { + let record = self.index.get(from_file)?; + + for import in &record.imports { + let import_name = import.alias.as_deref().unwrap_or(""); + if import_name != callee { + continue; + } + + let import_path = &import.module_path; + + if import_path == "std" { + continue; + } + + if import_path.ends_with(".zig") || import_path.contains('/') { + let from_dir = from_file.parent()?; + let resolved = from_dir.join(import_path); + if resolved.exists() { + if let Some(record) = self.index.get(&resolved) { + if let Some(def) = record.definitions.first() { + return Some(def.clone()); + } + } + } + continue; + } + + if let Some(ref ws) = self.workspace { + if let Some(resolved_path) = ws.resolve_module(import_path) { + if let Some(target_record) = self.index.get(&resolved_path) { + if let Some(def) = target_record.definitions.first() { + return Some(def.clone()); + } + } + } + } + } + + None + } + + fn resolve_java_import(&self, callee: &str, from_file: &Path) -> Option { + let record = self.index.get(from_file)?; + + for import in &record.imports { + let segments: Vec<&str> = import.module_path.split('.').collect(); + let class_name = segments.last()?; + + if *class_name != callee && *class_name != "*" { + continue; + } + + if *class_name == "*" { + let pkg_segments = &segments[..segments.len() - 1]; + for def in self.index.definitions() { + if def.name != callee { + continue; + } + if path_matches_java_package(&def.file, pkg_segments) { + return Some(def.clone()); + } + } + } else { + for def in self.index.definitions() { + if def.name != *class_name { + continue; + } + if path_matches_java_package(&def.file, &segments[..segments.len() - 1]) { + return Some(def.clone()); + } + } + } + } + + None + } + + fn resolve_scala_import(&self, callee: &str, from_file: &Path) -> Option { + let record = self.index.get(from_file)?; + + for import in &record.imports { + let segments: Vec<&str> = import.module_path.split('.').collect(); + let symbol_name = segments.last()?; + + let visible_name = import.alias.as_deref().unwrap_or(symbol_name); + if visible_name != callee && *symbol_name != "_" { + continue; + } + + if *symbol_name == "_" { + let pkg_segments = &segments[..segments.len() - 1]; + for def in self.index.definitions() { + if def.name != callee { + continue; + } + if path_matches_java_package(&def.file, pkg_segments) { + return Some(def.clone()); + } + } + } else { + for def in self.index.definitions() { + if def.name != *symbol_name { + continue; + } + if path_matches_java_package(&def.file, &segments[..segments.len() - 1]) { + return Some(def.clone()); + } + } + } + } + + None + } + fn resolve_generic_import(&self, callee: &str, from_file: &Path) -> Option { let record = self.index.get(from_file)?; @@ -1847,7 +2097,10 @@ version = "0.1.0" fn test_path_matches_module_deep_path() { let file = PathBuf::from("crates/core/src/utils/helpers.rs"); assert!(path_matches_module(&file, &["utils", "helpers", "func"])); - assert!(path_matches_module(&file, &["src", "utils", "helpers", "func"])); + assert!(path_matches_module( + &file, + &["src", "utils", "helpers", "func"] + )); assert!(!path_matches_module( &file, &["wrong", "src", "utils", "helpers", "func"] @@ -1862,4 +2115,150 @@ version = "0.1.0" &["deeply", "nested", "utils", "helpers", "func"] )); } + + fn setup_zig_workspace() -> TempDir { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("build.zig.zon"), + r#" +.{ + .name = .myproject, + .dependencies = .{ + .utils = .{ .path = "vendor/utils" }, + .remote_pkg = .{ .url = "https://example.com/pkg.tar.gz" }, + }, +} +"#, + ) + .unwrap(); + + fs::write( + dir.path().join("build.zig"), + "const std = @import(\"std\");\n", + ) + .unwrap(); + + let src_dir = dir.path().join("src"); + fs::create_dir_all(&src_dir).unwrap(); + fs::write(src_dir.join("main.zig"), "const std = @import(\"std\");\n").unwrap(); + fs::write(src_dir.join("helper.zig"), "pub fn help() void {}\n").unwrap(); + + let vendor_dir = dir.path().join("vendor/utils"); + fs::create_dir_all(&vendor_dir).unwrap(); + fs::write(vendor_dir.join("main.zig"), "pub fn utilFn() void {}\n").unwrap(); + + dir + } + + #[test] + fn test_zig_workspace_discovery() { + let dir = setup_zig_workspace(); + let ws = ZigWorkspace::discover(dir.path()).unwrap(); + + assert!(ws.is_some()); + let ws = ws.unwrap(); + assert_eq!(ws.root(), dir.path()); + assert_eq!(ws.packages().len(), 1); + assert_eq!(ws.packages()[0].0, "utils"); + } + + #[test] + fn test_zig_workspace_no_zon() { + let dir = TempDir::new().unwrap(); + let ws = ZigWorkspace::discover(dir.path()).unwrap(); + assert!(ws.is_none()); + } + + #[test] + fn test_zig_workspace_resolve_local_package() { + let dir = setup_zig_workspace(); + let ws = ZigWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("utils"); + assert!(resolved.is_some()); + assert!(resolved.unwrap().ends_with("vendor/utils/main.zig")); + } + + #[test] + fn test_zig_workspace_skip_std() { + let dir = setup_zig_workspace(); + let ws = ZigWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("std"); + assert!(resolved.is_none()); + } + + #[test] + fn test_zig_workspace_skip_unknown_package() { + let dir = setup_zig_workspace(); + let ws = ZigWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("remote_pkg"); + assert!(resolved.is_none()); + + let resolved = ws.resolve_module("nonexistent"); + assert!(resolved.is_none()); + } + + #[test] + fn test_zig_workspace_resolve_relative_import() { + let dir = setup_zig_workspace(); + let ws = ZigWorkspace::discover(dir.path()).unwrap().unwrap(); + + let resolved = ws.resolve_module("helper.zig"); + assert!(resolved.is_some()); + } + + #[test] + fn test_parse_zig_zon_deps() { + let content = r#" +.{ + .name = .test, + .dependencies = .{ + .foo = .{ .path = "libs/foo" }, + .bar = .{ .path = "vendor/bar" }, + .remote = .{ .url = "https://example.com" }, + }, +} +"#; + let root = Path::new("/project"); + let deps = parse_zig_zon_deps(content, root); + + assert_eq!(deps.len(), 2); + assert!(deps + .iter() + .any(|(n, p)| n == "foo" && p == Path::new("/project/libs/foo"))); + assert!(deps + .iter() + .any(|(n, p)| n == "bar" && p == Path::new("/project/vendor/bar"))); + } + + #[test] + fn test_path_matches_java_package() { + let file = PathBuf::from("src/main/java/com/example/MyClass.java"); + assert!(path_matches_java_package(&file, &["com", "example"])); + assert!(path_matches_java_package(&file, &["example"])); + assert!(!path_matches_java_package(&file, &["org", "example"])); + } + + #[test] + fn test_path_matches_java_package_empty() { + let file = PathBuf::from("src/MyClass.java"); + assert!(path_matches_java_package(&file, &[])); + } + + #[test] + fn test_path_matches_java_package_deeply_nested() { + let file = PathBuf::from("src/main/java/com/example/internal/utils/Helper.java"); + assert!(path_matches_java_package( + &file, + &["com", "example", "internal", "utils"] + )); + assert!(path_matches_java_package(&file, &["internal", "utils"])); + assert!(!path_matches_java_package( + &file, + &["com", "other", "internal", "utils"] + )); + } } From adfe3bf9c2745e9cd08bf7ef703197a10707b73d Mon Sep 17 00:00:00 2001 From: ro Date: Sun, 28 Dec 2025 21:45:12 -0800 Subject: [PATCH 19/35] refactor: simplify module resolution with universal glob-based search --- crates/code/src/resolve.rs | 2319 ++++++++---------------------------- 1 file changed, 487 insertions(+), 1832 deletions(-) diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index 4cf0023..aba1b82 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -1,789 +1,385 @@ use std::cell::RefCell; use std::collections::HashSet; -use std::fs; use std::path::{Path, PathBuf}; -use std::process::Command; -use anyhow::{Context, Result}; -use serde::Deserialize; - -use super::index::{Definition, Index}; - -pub trait WorkspaceDiscovery: Send + Sync { - fn discover(root: &Path) -> Result>> - where - Self: Sized; - - fn resolve_module(&self, module_path: &str) -> Option; - - fn root(&self) -> &Path; -} - -#[derive(Debug, Clone)] -pub struct RustWorkspace { - root: PathBuf, - members: Vec, -} - -#[derive(Debug, Clone)] -pub struct CrateMember { - pub name: String, - pub path: PathBuf, -} - -#[derive(Deserialize)] -struct CargoToml { - package: Option, - workspace: Option, -} - -#[derive(Deserialize)] -struct CargoPackage { - name: String, -} +use anyhow::Result; + +use super::index::{Definition, DefinitionKind, Index, Span}; + +const SYSTEM_HEADERS: &[&str] = &[ + "stdio", + "stdlib", + "string", + "math", + "time", + "errno", + "assert", + "ctype", + "signal", + "stdarg", + "stddef", + "setjmp", + "locale", + "limits", + "float", + "iso646", + "stdbool", + "stdint", + "inttypes", + "wchar", + "wctype", + "fenv", + "complex", + "tgmath", + "stdalign", + "stdnoreturn", + "stdatomic", + "threads", + "uchar", + "iostream", + "vector", + "string", + "map", + "set", + "unordered_map", + "unordered_set", + "algorithm", + "memory", + "functional", + "utility", + "tuple", + "array", + "deque", + "list", + "forward_list", + "stack", + "queue", + "priority_queue", + "bitset", + "valarray", + "regex", + "random", + "chrono", + "ratio", + "thread", + "mutex", + "condition_variable", + "future", + "atomic", + "filesystem", + "optional", + "variant", + "any", + "string_view", + "charconv", + "execution", + "span", + "ranges", + "numbers", + "concepts", + "coroutine", + "compare", + "version", + "source_location", + "format", + "bit", + "numbers", + "typeinfo", + "typeindex", + "type_traits", + "initializer_list", + "new", + "exception", + "stdexcept", + "system_error", + "cerrno", + "cassert", + "cctype", + "cfenv", + "cfloat", + "cinttypes", + "climits", + "clocale", + "cmath", + "csetjmp", + "csignal", + "cstdarg", + "cstddef", + "cstdint", + "cstdio", + "cstdlib", + "cstring", + "ctime", + "cuchar", + "cwchar", + "cwctype", + "codecvt", + "fstream", + "iomanip", + "ios", + "iosfwd", + "istream", + "ostream", + "sstream", + "streambuf", + "syncstream", + "iterator", + "locale", + "numeric", + "limits", + "unistd", + "fcntl", + "sys/", + "pthread", + "netinet/", + "arpa/", + "dirent", + "dlfcn", + "poll", + "sched", + "semaphore", + "spawn", + "termios", +]; -#[derive(Deserialize)] -struct CargoWorkspace { - members: Option>, +fn is_system_header(path: &str) -> bool { + let clean = path.trim_matches(|c| c == '<' || c == '>' || c == '"'); + SYSTEM_HEADERS.iter().any(|s| clean.starts_with(s)) } -impl WorkspaceDiscovery for RustWorkspace { - fn discover(root: &Path) -> Result>> { - let cargo_path = root.join("Cargo.toml"); - if !cargo_path.exists() { - return Ok(None); - } - - let content = fs::read_to_string(&cargo_path) - .with_context(|| format!("failed to read {}", cargo_path.display()))?; - - let cargo: CargoToml = - toml::from_str(&content).with_context(|| "failed to parse Cargo.toml")?; - - let mut members = Vec::new(); - - if let Some(ws) = cargo.workspace { - if let Some(member_globs) = ws.members { - for pattern in member_globs { - let expanded = expand_glob(root, &pattern)?; - for member_path in expanded { - if let Some(member) = parse_crate_member(&member_path)? { - members.push(member); - } - } - } - } - } - - if let Some(pkg) = cargo.package { - members.push(CrateMember { - name: pkg.name, - path: root.to_path_buf(), - }); - } - - if members.is_empty() { - return Ok(None); - } - - Ok(Some(Box::new(RustWorkspace { - root: root.to_path_buf(), - members, - }))) - } - - fn resolve_module(&self, module_path: &str) -> Option { - let parts: Vec<&str> = module_path.split("::").collect(); - if parts.is_empty() { - return None; - } - - let crate_name = parts[0]; - - if crate_name == "crate" || crate_name == "self" || crate_name == "super" { - return None; - } - - let member = self.members.iter().find(|m| m.name == crate_name)?; - let src_dir = member.path.join("src"); - - if parts.len() == 1 { - let lib_rs = src_dir.join("lib.rs"); - if lib_rs.exists() { - return Some(lib_rs); - } - let main_rs = src_dir.join("main.rs"); - if main_rs.exists() { - return Some(main_rs); - } - return None; - } - - let mut path = src_dir; - for part in &parts[1..] { - path = path.join(part); - } - - if path.with_extension("rs").exists() { - return Some(path.with_extension("rs")); - } +fn normalize_to_patterns(import_path: &str, lang: &str) -> Vec { + let clean = import_path.trim_matches(|c| c == '"' || c == '\'' || c == '<' || c == '>'); - let mod_path = path.join("mod.rs"); - if mod_path.exists() { - return Some(mod_path); + match lang { + "rust" | "rs" => normalize_rust_import(clean), + "python" | "py" => normalize_python_import(clean), + "go" => normalize_go_import(clean), + "typescript" | "ts" | "tsx" | "javascript" | "js" | "mjs" | "cjs" | "jsx" => { + normalize_js_import(clean) } - - None - } - - fn root(&self) -> &Path { - &self.root + "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" => normalize_c_import(clean), + "java" => normalize_java_import(clean), + "scala" | "sc" => normalize_scala_import(clean), + "zig" => normalize_zig_import(clean), + _ => vec![format!("**/{}", clean)], } } -impl RustWorkspace { - pub fn members(&self) -> &[CrateMember] { - &self.members - } +fn normalize_rust_import(path: &str) -> Vec { + let stripped = path + .trim_start_matches("crate::") + .trim_start_matches("self::") + .trim_start_matches("super::"); - pub fn resolve_crate(&self, crate_name: &str) -> Option<&PathBuf> { - self.members - .iter() - .find(|m| m.name == crate_name) - .map(|m| &m.path) + let parts: Vec<&str> = stripped.split("::").filter(|p| !p.is_empty()).collect(); + if parts.is_empty() { + return vec![]; } -} - -#[derive(Debug, Clone)] -pub struct GoWorkspace { - root: PathBuf, - module_path: String, -} - -#[derive(Deserialize)] -struct GoMod { - #[serde(rename = "Module")] - module: GoModule, -} -#[derive(Deserialize)] -struct GoModule { - #[serde(rename = "Path")] - path: String, + let file_path = parts.join("/"); + vec![ + format!("**/{}.rs", file_path), + format!("**/{}/mod.rs", file_path), + format!("**/src/{}.rs", file_path), + format!("**/src/{}/mod.rs", file_path), + ] } -impl WorkspaceDiscovery for GoWorkspace { - fn discover(root: &Path) -> Result>> { - let go_mod_path = root.join("go.mod"); - if !go_mod_path.exists() { - return Ok(None); - } - - let output = Command::new("go") - .args(["mod", "edit", "-json"]) - .current_dir(root) - .output() - .context("failed to run go mod edit -json")?; - - if !output.status.success() { - let content = fs::read_to_string(&go_mod_path)?; - if let Some(module_path) = parse_go_mod_fallback(&content) { - return Ok(Some(Box::new(GoWorkspace { - root: root.to_path_buf(), - module_path, - }))); - } - return Ok(None); - } - - let go_mod: GoMod = - serde_json::from_slice(&output.stdout).context("failed to parse go mod output")?; - - Ok(Some(Box::new(GoWorkspace { - root: root.to_path_buf(), - module_path: go_mod.module.path, - }))) - } - - fn resolve_module(&self, module_path: &str) -> Option { - if !module_path.starts_with(&self.module_path) { - return None; - } - - let relative = module_path - .strip_prefix(&self.module_path)? - .trim_start_matches('/'); - - if relative.is_empty() { - let main_go = self.root.join("main.go"); - if main_go.exists() { - return Some(main_go); - } - for entry in fs::read_dir(&self.root).ok()? { - let entry = entry.ok()?; - let path = entry.path(); - if path.extension().map(|e| e == "go").unwrap_or(false) { - return Some(path); - } - } - return None; - } - - let pkg_dir = self.root.join(relative); - if pkg_dir.is_dir() { - for entry in fs::read_dir(&pkg_dir).ok()? { - let entry = entry.ok()?; - let path = entry.path(); - if path.extension().map(|e| e == "go").unwrap_or(false) - && !path - .file_name() - .map(|n| n.to_string_lossy().ends_with("_test.go")) - .unwrap_or(false) - { - return Some(path); - } - } - } - - None - } - - fn root(&self) -> &Path { - &self.root +fn normalize_python_import(path: &str) -> Vec { + if path.starts_with('.') { + return vec![]; } -} -impl GoWorkspace { - pub fn module_path(&self) -> &str { - &self.module_path + let parts: Vec<&str> = path.split('.').collect(); + if parts.is_empty() { + return vec![]; } -} -fn parse_go_mod_fallback(content: &str) -> Option { - for line in content.lines() { - let line = line.trim(); - if line.starts_with("module ") { - return Some(line.strip_prefix("module ")?.trim().to_string()); - } - } - None + let file_path = parts.join("/"); + vec![ + format!("**/{}.py", file_path), + format!("**/{}/__init__.py", file_path), + format!("**/src/{}.py", file_path), + format!("**/src/{}/__init__.py", file_path), + ] } -fn path_matches_module(file: &Path, module_parts: &[&str]) -> bool { - if module_parts.is_empty() { - return true; - } - - let file_no_ext = file.with_extension(""); - let file_parts: Vec<&str> = file_no_ext - .components() - .filter_map(|c| c.as_os_str().to_str()) - .collect(); +fn normalize_go_import(path: &str) -> Vec { + let parts: Vec<&str> = path.split('/').collect(); - let dir_parts = if module_parts.len() > 1 { - &module_parts[..module_parts.len() - 1] + let local_parts: Vec<&str> = if parts.len() >= 3 + && (parts[0].contains('.') || parts[0] == "github" || parts[0] == "golang") + { + parts[3..].to_vec() } else { - return true; + parts }; - if dir_parts.len() > file_parts.len() { - return false; + if local_parts.is_empty() { + return vec![]; } - dir_parts - .iter() - .rev() - .zip(file_parts.iter().rev()) - .all(|(module, file)| *module == *file) + let dir_path = local_parts.join("/"); + vec![ + format!("{}/*.go", dir_path), + format!("**/{}/*.go", dir_path), + ] } -fn path_matches_java_package(file: &Path, package_parts: &[&str]) -> bool { - let file_parts: Vec<&str> = file - .components() - .filter_map(|c| c.as_os_str().to_str()) - .collect(); - - if package_parts.is_empty() { - return true; +fn normalize_js_import(path: &str) -> Vec { + if path.starts_with('.') { + let clean = path.trim_start_matches("./").trim_start_matches("../"); + return vec![ + format!("**/{}.ts", clean), + format!("**/{}.tsx", clean), + format!("**/{}.js", clean), + format!("**/{}.jsx", clean), + format!("**/{}/index.ts", clean), + format!("**/{}/index.tsx", clean), + format!("**/{}/index.js", clean), + ]; } - if package_parts.len() > file_parts.len() { - return false; - } + let clean = path.trim_start_matches("@/").trim_start_matches('@'); + let parts: Vec<&str> = clean.split('/').collect(); - for window in file_parts.windows(package_parts.len()) { - if window == package_parts { - return true; - } + if parts.is_empty() { + return vec![]; } - false -} - -#[derive(Debug, Clone)] -pub struct TsWorkspace { - root: PathBuf, - name: String, - paths: Vec<(String, PathBuf)>, -} - -#[derive(Deserialize)] -struct PackageJson { - name: Option, - #[allow(dead_code)] - workspaces: Option>, + let file_path = parts.join("/"); + vec![ + format!("**/{}.ts", file_path), + format!("**/{}.tsx", file_path), + format!("**/{}.js", file_path), + format!("**/{}.jsx", file_path), + format!("**/{}/index.ts", file_path), + format!("**/{}/index.tsx", file_path), + format!("**/{}/index.js", file_path), + format!("**/src/{}.ts", file_path), + format!("**/src/{}.tsx", file_path), + ] } -#[derive(Deserialize)] -struct TsConfig { - #[serde(rename = "compilerOptions")] - compiler_options: Option, -} +fn normalize_c_import(path: &str) -> Vec { + if is_system_header(path) { + return vec![]; + } -#[derive(Deserialize)] -struct TsCompilerOptions { - paths: Option>>, - #[serde(rename = "baseUrl")] - base_url: Option, + let clean = path.trim_matches(|c| c == '"' || c == '<' || c == '>'); + vec![ + format!("**/{}", clean), + format!("**/include/{}", clean), + format!("**/src/{}", clean), + ] } -impl WorkspaceDiscovery for TsWorkspace { - fn discover(root: &Path) -> Result>> { - let pkg_json_path = root.join("package.json"); - if !pkg_json_path.exists() { - return Ok(None); - } - - let content = fs::read_to_string(&pkg_json_path) - .with_context(|| format!("failed to read {}", pkg_json_path.display()))?; - - let pkg: PackageJson = - serde_json::from_str(&content).with_context(|| "failed to parse package.json")?; - - let name = pkg.name.unwrap_or_else(|| { - root.file_name() - .unwrap_or_default() - .to_string_lossy() - .to_string() - }); - - let mut paths = Vec::new(); - - let tsconfig_path = root.join("tsconfig.json"); - if tsconfig_path.exists() { - if let Ok(ts_content) = fs::read_to_string(&tsconfig_path) { - if let Ok(tsconfig) = serde_json::from_str::(&ts_content) { - if let Some(opts) = tsconfig.compiler_options { - let base = opts - .base_url - .map(|b| root.join(b)) - .unwrap_or_else(|| root.to_path_buf()); - - if let Some(path_map) = opts.paths { - for (alias, targets) in path_map { - if let Some(target) = targets.first() { - let clean_alias = alias.trim_end_matches("/*"); - let clean_target = target.trim_end_matches("/*"); - paths.push((clean_alias.to_string(), base.join(clean_target))); - } - } - } - } - } - } - } - - Ok(Some(Box::new(TsWorkspace { - root: root.to_path_buf(), - name, - paths, - }))) +fn normalize_java_import(path: &str) -> Vec { + if path.starts_with("java.") || path.starts_with("javax.") || path.starts_with("sun.") { + return vec![]; } - fn resolve_module(&self, module_path: &str) -> Option { - if module_path.starts_with('.') { - return None; - } - - for (alias, target_dir) in &self.paths { - if module_path.starts_with(alias) { - let remainder = module_path.strip_prefix(alias)?.trim_start_matches('/'); - let base = if remainder.is_empty() { - target_dir.clone() - } else { - target_dir.join(remainder) - }; - - for ext in &["ts", "tsx", "js", "jsx"] { - let with_ext = base.with_extension(ext); - if with_ext.exists() { - return Some(with_ext); - } - } - - let index_path = base.join("index"); - for ext in &["ts", "tsx", "js", "jsx"] { - let with_ext = index_path.with_extension(ext); - if with_ext.exists() { - return Some(with_ext); - } - } - } - } - - let node_modules = self.root.join("node_modules").join(module_path); - if node_modules.exists() { - let pkg_json = node_modules.join("package.json"); - if pkg_json.exists() { - if let Ok(content) = fs::read_to_string(&pkg_json) { - if let Ok(pkg) = serde_json::from_str::(&content) { - if let Some(main) = pkg.get("main").and_then(|m| m.as_str()) { - let main_path = node_modules.join(main); - if main_path.exists() { - return Some(main_path); - } - } - } - } - } - } + let file_path = path.replace('.', "/"); + vec![ + format!("**/{}.java", file_path), + format!("**/src/{}.java", file_path), + format!("**/src/main/java/{}.java", file_path), + ] +} - None +fn normalize_scala_import(path: &str) -> Vec { + if path.starts_with("scala.") || path.starts_with("java.") { + return vec![]; } - fn root(&self) -> &Path { - &self.root - } + let clean = path.trim_end_matches("._").trim_end_matches(".*"); + let file_path = clean.replace('.', "/"); + vec![ + format!("**/{}.scala", file_path), + format!("**/{}.sc", file_path), + format!("**/src/{}.scala", file_path), + format!("**/src/main/scala/{}.scala", file_path), + ] } -impl TsWorkspace { - pub fn name(&self) -> &str { - &self.name +fn normalize_zig_import(path: &str) -> Vec { + if path == "std" { + return vec![]; } - pub fn paths(&self) -> &[(String, PathBuf)] { - &self.paths + if path.ends_with(".zig") || path.contains('/') { + return vec![format!("**/{}", path), format!("**/src/{}", path)]; } -} - -#[derive(Debug, Clone)] -pub struct PythonWorkspace { - root: PathBuf, - package_name: String, - src_dir: PathBuf, -} - -#[derive(Deserialize)] -struct PyProjectToml { - project: Option, - tool: Option, -} - -#[derive(Deserialize)] -struct PyProject { - name: Option, -} - -#[derive(Deserialize)] -struct PyToolSection { - poetry: Option, - setuptools: Option, -} - -#[derive(Deserialize)] -struct PyPoetry { - name: Option, -} -#[derive(Deserialize)] -struct PySetuptools { - #[serde(rename = "package-dir")] - package_dir: Option>, + vec![format!("**/{}.zig", path), format!("**/src/{}.zig", path)] } -impl WorkspaceDiscovery for PythonWorkspace { - fn discover(root: &Path) -> Result>> { - let pyproject_path = root.join("pyproject.toml"); - - let (package_name, src_dir) = if pyproject_path.exists() { - let content = fs::read_to_string(&pyproject_path) - .with_context(|| format!("failed to read {}", pyproject_path.display()))?; - - let pyproject: PyProjectToml = - toml::from_str(&content).with_context(|| "failed to parse pyproject.toml")?; - - let name = pyproject - .project - .and_then(|p| p.name) - .or_else(|| { - pyproject - .tool - .as_ref() - .and_then(|t| t.poetry.as_ref()) - .and_then(|p| p.name.clone()) - }) - .unwrap_or_else(|| { - root.file_name() - .unwrap_or_default() - .to_string_lossy() - .to_string() - }); - - let src = pyproject - .tool - .and_then(|t| t.setuptools) - .and_then(|s| s.package_dir) - .and_then(|dirs| dirs.get("").cloned()) - .map(|dir| root.join(dir)) - .unwrap_or_else(|| { - let src_layout = root.join("src"); - if src_layout.exists() { - src_layout - } else { - root.to_path_buf() - } - }); +fn search_patterns(patterns: &[String], root: &Path) -> Option { + for pattern in patterns { + let full_pattern = root.join(pattern); + let pattern_str = full_pattern.to_string_lossy(); - (name, src) - } else { - let setup_py = root.join("setup.py"); - if !setup_py.exists() { - return Ok(None); + if let Ok(paths) = glob::glob(&pattern_str) { + for entry in paths.flatten() { + if entry.is_file() { + return Some(entry); + } } - - let name = root - .file_name() - .unwrap_or_default() - .to_string_lossy() - .to_string(); - - let src = if root.join("src").exists() { - root.join("src") - } else { - root.to_path_buf() - }; - - (name, src) - }; - - Ok(Some(Box::new(PythonWorkspace { - root: root.to_path_buf(), - package_name, - src_dir, - }))) - } - - fn resolve_module(&self, module_path: &str) -> Option { - if module_path.starts_with('.') { - return None; - } - - let parts: Vec<&str> = module_path.split('.').collect(); - if parts.is_empty() { - return None; - } - - let mut path = self.src_dir.clone(); - for part in &parts { - path = path.join(part); - } - - let py_file = path.with_extension("py"); - if py_file.exists() { - return Some(py_file); } - - let init_file = path.join("__init__.py"); - if init_file.exists() { - return Some(init_file); - } - - None - } - - fn root(&self) -> &Path { - &self.root - } -} - -impl PythonWorkspace { - pub fn package_name(&self) -> &str { - &self.package_name - } - - pub fn src_dir(&self) -> &Path { - &self.src_dir } + None } -#[derive(Debug, Clone)] -pub struct ZigWorkspace { - root: PathBuf, - packages: Vec<(String, PathBuf)>, -} - -impl WorkspaceDiscovery for ZigWorkspace { - fn discover(root: &Path) -> Result>> { - let zon_path = root.join("build.zig.zon"); - if !zon_path.exists() { - return Ok(None); - } - - let content = fs::read_to_string(&zon_path) - .with_context(|| format!("failed to read {}", zon_path.display()))?; - - let packages = parse_zig_zon_deps(&content, root); - - Ok(Some(Box::new(ZigWorkspace { - root: root.to_path_buf(), - packages, - }))) - } - - fn resolve_module(&self, module_path: &str) -> Option { - if module_path == "std" { - return None; - } +fn resolve_relative(import_path: &str, from_file: &Path, lang: &str) -> Option { + let from_dir = from_file.parent()?; + let clean = import_path.trim_matches(|c| c == '"' || c == '\'' || c == '<' || c == '>'); - if module_path.ends_with(".zig") || module_path.contains('/') { - let resolved = self.root.join("src").join(module_path); - if resolved.exists() { - return Some(resolved); + match lang { + "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" => { + if is_system_header(import_path) { + return None; } - let resolved = self.root.join(module_path); - if resolved.exists() { - return Some(resolved); + let candidate = from_dir.join(clean); + if candidate.exists() { + return Some(candidate); } - return None; - } - - for (name, pkg_path) in &self.packages { - if name == module_path { - return find_zig_root_file(pkg_path, name); + let parent = from_dir.parent()?; + let candidate = parent.join(clean); + if candidate.exists() { + return Some(candidate); } } - - None - } - - fn root(&self) -> &Path { - &self.root - } -} - -impl ZigWorkspace { - pub fn packages(&self) -> &[(String, PathBuf)] { - &self.packages - } -} - -fn parse_zig_zon_deps(content: &str, root: &Path) -> Vec<(String, PathBuf)> { - let mut packages = Vec::new(); - - for line in content.lines() { - let line = line.trim(); - if !line.starts_with('.') || !line.contains('=') { - continue; - } - - let Some(name_end) = line.find('=') else { - continue; - }; - let name = line[1..name_end].trim().trim_matches(|c| c == ' '); - - if !line.contains(".path") { - continue; - } - - if let Some(path_start) = line.find(".path") { - let rest = &line[path_start..]; - if let Some(quote_start) = rest.find('"') { - let after_quote = &rest[quote_start + 1..]; - if let Some(quote_end) = after_quote.find('"') { - let path_str = &after_quote[..quote_end]; - packages.push((name.to_string(), root.join(path_str))); + "typescript" | "ts" | "tsx" | "javascript" | "js" | "mjs" | "cjs" | "jsx" => { + if !clean.starts_with('.') { + return None; + } + let base = from_dir.join(clean.trim_start_matches("./")); + for ext in &["ts", "tsx", "js", "jsx"] { + let candidate = base.with_extension(ext); + if candidate.exists() { + return Some(candidate); } } - } - } - - packages -} - -fn find_zig_root_file(pkg_path: &Path, name: &str) -> Option { - let candidates = [ - pkg_path.join("src/root.zig"), - pkg_path.join("src/lib.zig"), - pkg_path.join("src/main.zig"), - pkg_path.join(format!("src/{}.zig", name)), - pkg_path.join(format!("{}.zig", name)), - pkg_path.join("root.zig"), - pkg_path.join("lib.zig"), - pkg_path.join("main.zig"), - ]; - - candidates.into_iter().find(|c| c.exists()) -} - -fn expand_glob(root: &Path, pattern: &str) -> Result> { - let full_pattern = root.join(pattern); - let pattern_str = full_pattern.to_string_lossy(); - - let mut results = Vec::new(); - - if pattern.contains('*') { - for path in glob::glob(&pattern_str) - .with_context(|| "invalid glob pattern")? - .flatten() - { - if path.is_dir() && path.join("Cargo.toml").exists() { - results.push(path); + let index = base.join("index"); + for ext in &["ts", "tsx", "js", "jsx"] { + let candidate = index.with_extension(ext); + if candidate.exists() { + return Some(candidate); + } } } - } else { - let path = root.join(pattern); - if path.is_dir() && path.join("Cargo.toml").exists() { - results.push(path); + "zig" => { + if clean.ends_with(".zig") || clean.contains('/') { + let candidate = from_dir.join(clean); + if candidate.exists() { + return Some(candidate); + } + } } + _ => {} } - Ok(results) -} - -fn parse_crate_member(path: &Path) -> Result> { - let cargo_path = path.join("Cargo.toml"); - if !cargo_path.exists() { - return Ok(None); - } - - let content = fs::read_to_string(&cargo_path) - .with_context(|| format!("failed to read {}", cargo_path.display()))?; - - let cargo: CargoToml = - toml::from_str(&content).with_context(|| "failed to parse Cargo.toml")?; - - let name = cargo - .package - .map(|p| p.name) - .unwrap_or_else(|| path.file_name().unwrap().to_string_lossy().to_string()); - - Ok(Some(CrateMember { - name, - path: path.to_path_buf(), - })) -} - -pub fn resolve_same_file(callee: &str, file: &Path, index: &Index) -> Option { - let record = index.get(file)?; - record - .definitions - .iter() - .find(|d| d.name == callee) - .cloned() -} - -pub fn resolve_by_index(callee: &str, index: &Index) -> Option { - index.definitions().find(|d| d.name == callee).cloned() + None } struct DefinitionPattern { @@ -826,6 +422,19 @@ const DEFINITION_PATTERNS: &[DefinitionPattern] = &[ }, ]; +pub fn resolve_same_file(callee: &str, file: &Path, index: &Index) -> Option { + let record = index.get(file)?; + record + .definitions + .iter() + .find(|d| d.name == callee) + .cloned() +} + +pub fn resolve_by_index(callee: &str, index: &Index) -> Option { + index.definitions().find(|d| d.name == callee).cloned() +} + pub fn resolve_by_search(callee: &str, root: &Path) -> Result> { use grep::regex::RegexMatcher; use grep::searcher::sinks::UTF8; @@ -868,8 +477,8 @@ pub fn resolve_by_search(callee: &str, root: &Path) -> Result if let Some((line_num, file_path)) = found { return Ok(Some(Definition { name: callee.to_string(), - kind: super::index::DefinitionKind::Function, - span: super::index::Span { + kind: DefinitionKind::Function, + span: Span { start_byte: 0, end_byte: 0, start_line: line_num as usize, @@ -886,20 +495,14 @@ pub fn resolve_by_search(callee: &str, root: &Path) -> Result pub struct Resolver<'a> { index: &'a Index, - workspace: Option>, root: PathBuf, discovered_files: RefCell>, } impl<'a> Resolver<'a> { - pub fn new( - index: &'a Index, - workspace: Option>, - root: PathBuf, - ) -> Self { + pub fn new(index: &'a Index, root: PathBuf) -> Self { Self { index, - workspace, root, discovered_files: RefCell::new(HashSet::new()), } @@ -914,16 +517,6 @@ impl<'a> Resolver<'a> { return Ok(Some(def)); } - if let Some(ref ws) = self.workspace { - if let Some(module_path) = ws.resolve_module(callee) { - if let Some(record) = self.index.get(&module_path) { - if let Some(def) = record.definitions.first() { - return Ok(Some(def.clone())); - } - } - } - } - if let Some(def) = resolve_by_index(callee, self.index) { return Ok(Some(def)); } @@ -945,260 +538,33 @@ impl<'a> Resolver<'a> { } fn resolve_via_imports(&self, callee: &str, from_file: &Path) -> Option { + let record = self.index.get(from_file)?; let ext = from_file.extension().and_then(|e| e.to_str()).unwrap_or(""); - match ext { - "rs" => self.resolve_rust_import(callee, from_file), - "py" => self.resolve_python_import(callee, from_file), - "go" => self.resolve_go_import(callee, from_file), - "zig" => self.resolve_zig_import(callee, from_file), - "java" => self.resolve_java_import(callee, from_file), - "scala" | "sc" => self.resolve_scala_import(callee, from_file), - "ts" | "tsx" | "js" | "jsx" | "mjs" | "cjs" => { - self.resolve_ts_import(callee, from_file) + for import in &record.imports { + if !self.import_matches_callee(&import.module_path, callee, ext) { + continue; } - _ => self.resolve_generic_import(callee, from_file), - } - } - - fn resolve_rust_import(&self, callee: &str, from_file: &Path) -> Option { - let record = self.index.get(from_file)?; - for import in &record.imports { - let segments: Vec<&str> = import.module_path.split("::").collect(); - let symbol_name = segments.last()?; - - let visible_name = import.alias.as_deref().unwrap_or(symbol_name); - if visible_name != callee { - continue; - } - - if let Some(ref ws) = self.workspace { - if let Some(resolved_path) = ws.resolve_module(&import.module_path) { - if let Some(def) = self.find_def_in_file(&resolved_path, symbol_name) { - return Some(def); - } - } - } - - let filtered_parts: Vec<&str> = segments - .iter() - .copied() - .filter(|p| !p.is_empty() && *p != "crate" && *p != "self" && *p != "super") - .collect(); - - for def in self.index.definitions() { - if def.name != *symbol_name { - continue; - } - if path_matches_module(&def.file, &filtered_parts) { - return Some(def.clone()); - } - } - } - - None - } - - fn resolve_python_import(&self, callee: &str, from_file: &Path) -> Option { - let record = self.index.get(from_file)?; - - for import in &record.imports { - let segments: Vec<&str> = import.module_path.split('.').collect(); - let symbol_name = segments.last()?; - - let visible_name = import.alias.as_deref().unwrap_or(symbol_name); - if visible_name != callee { - continue; - } - - if let Some(ref ws) = self.workspace { - if let Some(resolved_path) = ws.resolve_module(&import.module_path) { - if let Some(def) = self.find_def_in_file(&resolved_path, symbol_name) { - return Some(def); - } - } - } - - for def in self.index.definitions() { - if def.name != *symbol_name { - continue; - } - if path_matches_module(&def.file, &segments) { - return Some(def.clone()); - } - } - } - - None - } - - fn resolve_go_import(&self, callee: &str, from_file: &Path) -> Option { - let record = self.index.get(from_file)?; - - for import in &record.imports { - let pkg_name = import - .alias - .as_deref() - .or_else(|| import.module_path.rsplit('/').next())?; - - if pkg_name != callee { - continue; - } - - if let Some(ref ws) = self.workspace { - if let Some(resolved_path) = ws.resolve_module(&import.module_path) { - if let Some(target_record) = self.index.get(&resolved_path) { - if let Some(def) = target_record.definitions.first() { - return Some(def.clone()); - } - } - } - } - } - - None - } - - fn resolve_ts_import(&self, callee: &str, from_file: &Path) -> Option { - let record = self.index.get(from_file)?; - - for import in &record.imports { - let segments: Vec<&str> = import.module_path.split('/').collect(); - let module_name = segments.last()?; - - let visible_name = import.alias.as_deref().unwrap_or(module_name); - if visible_name != callee { - continue; - } - - if let Some(ref ws) = self.workspace { - if let Some(resolved_path) = ws.resolve_module(&import.module_path) { - if let Some(def) = self.find_def_in_file(&resolved_path, callee) { - return Some(def); - } - } - } - - for def in self.index.definitions() { - if def.name != callee { - continue; - } - if path_matches_module(&def.file, &segments) { - return Some(def.clone()); + if let Some(resolved) = resolve_relative(&import.module_path, from_file, ext) { + if let Some(def) = self.find_def_in_file(&resolved, callee) { + return Some(def); } - } - } - - None - } - - fn resolve_zig_import(&self, callee: &str, from_file: &Path) -> Option { - let record = self.index.get(from_file)?; - - for import in &record.imports { - let import_name = import.alias.as_deref().unwrap_or(""); - if import_name != callee { - continue; - } - - let import_path = &import.module_path; - - if import_path == "std" { - continue; - } - - if import_path.ends_with(".zig") || import_path.contains('/') { - let from_dir = from_file.parent()?; - let resolved = from_dir.join(import_path); - if resolved.exists() { - if let Some(record) = self.index.get(&resolved) { - if let Some(def) = record.definitions.first() { - return Some(def.clone()); - } - } - } - continue; - } - - if let Some(ref ws) = self.workspace { - if let Some(resolved_path) = ws.resolve_module(import_path) { - if let Some(target_record) = self.index.get(&resolved_path) { - if let Some(def) = target_record.definitions.first() { - return Some(def.clone()); - } - } - } - } - } - - None - } - - fn resolve_java_import(&self, callee: &str, from_file: &Path) -> Option { - let record = self.index.get(from_file)?; - - for import in &record.imports { - let segments: Vec<&str> = import.module_path.split('.').collect(); - let class_name = segments.last()?; - - if *class_name != callee && *class_name != "*" { - continue; - } - - if *class_name == "*" { - let pkg_segments = &segments[..segments.len() - 1]; - for def in self.index.definitions() { - if def.name != callee { - continue; - } - if path_matches_java_package(&def.file, pkg_segments) { - return Some(def.clone()); - } - } - } else { - for def in self.index.definitions() { - if def.name != *class_name { - continue; - } - if path_matches_java_package(&def.file, &segments[..segments.len() - 1]) { + if let Some(target_record) = self.index.get(&resolved) { + if let Some(def) = target_record.definitions.first() { return Some(def.clone()); } } } - } - - None - } - - fn resolve_scala_import(&self, callee: &str, from_file: &Path) -> Option { - let record = self.index.get(from_file)?; - - for import in &record.imports { - let segments: Vec<&str> = import.module_path.split('.').collect(); - let symbol_name = segments.last()?; - - let visible_name = import.alias.as_deref().unwrap_or(symbol_name); - if visible_name != callee && *symbol_name != "_" { - continue; - } - if *symbol_name == "_" { - let pkg_segments = &segments[..segments.len() - 1]; - for def in self.index.definitions() { - if def.name != callee { - continue; - } - if path_matches_java_package(&def.file, pkg_segments) { - return Some(def.clone()); - } + let patterns = normalize_to_patterns(&import.module_path, ext); + if let Some(resolved) = search_patterns(&patterns, &self.root) { + self.discovered_files.borrow_mut().insert(resolved.clone()); + if let Some(def) = self.find_def_in_file(&resolved, callee) { + return Some(def); } - } else { - for def in self.index.definitions() { - if def.name != *symbol_name { - continue; - } - if path_matches_java_package(&def.file, &segments[..segments.len() - 1]) { + if let Some(target_record) = self.index.get(&resolved) { + if let Some(def) = target_record.definitions.first() { return Some(def.clone()); } } @@ -1208,38 +574,34 @@ impl<'a> Resolver<'a> { None } - fn resolve_generic_import(&self, callee: &str, from_file: &Path) -> Option { - let record = self.index.get(from_file)?; - - for import in &record.imports { - let normalized = import.module_path.replace('.', "::"); - let segments: Vec<&str> = normalized.split("::").filter(|p| !p.is_empty()).collect(); - let symbol_name = segments.last()?; + fn import_matches_callee(&self, module_path: &str, callee: &str, lang: &str) -> bool { + let clean = module_path.trim_matches(|c| c == '"' || c == '\'' || c == '<' || c == '>'); - let visible_name = import.alias.as_deref().unwrap_or(symbol_name); - if visible_name != callee { - continue; + match lang { + "rs" => { + let parts: Vec<&str> = clean.split("::").collect(); + parts.last().map(|s| *s == callee).unwrap_or(false) } - - if let Some(ref ws) = self.workspace { - if let Some(resolved_path) = ws.resolve_module(&import.module_path) { - if let Some(def) = self.find_def_in_file(&resolved_path, symbol_name) { - return Some(def); - } - } + "py" => { + let parts: Vec<&str> = clean.split('.').collect(); + parts.last().map(|s| *s == callee).unwrap_or(false) } - - for def in self.index.definitions() { - if def.name != *symbol_name { - continue; - } - if path_matches_module(&def.file, &segments) { - return Some(def.clone()); - } + "go" => { + let parts: Vec<&str> = clean.split('/').collect(); + parts.last().map(|s| *s == callee).unwrap_or(false) } + "java" | "scala" | "sc" => { + let parts: Vec<&str> = clean.split('.').collect(); + parts + .last() + .map(|s| *s == callee || *s == "*" || *s == "_") + .unwrap_or(false) + } + "ts" | "tsx" | "js" | "jsx" | "mjs" | "cjs" => true, + "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" => true, + "zig" => true, + _ => true, } - - None } fn find_def_in_file(&self, file: &Path, name: &str) -> Option { @@ -1254,101 +616,92 @@ mod tests { use std::fs; use tempfile::TempDir; - fn setup_rust_workspace() -> TempDir { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("Cargo.toml"), - r#" -[workspace] -members = ["crates/*"] - -[package] -name = "root-crate" -version = "0.1.0" -"#, - ) - .unwrap(); - - let crate_a = dir.path().join("crates/crate-a"); - fs::create_dir_all(crate_a.join("src")).unwrap(); - fs::write( - crate_a.join("Cargo.toml"), - r#" -[package] -name = "crate-a" -version = "0.1.0" -"#, - ) - .unwrap(); - fs::write(crate_a.join("src/lib.rs"), "pub fn foo() {}").unwrap(); - - let crate_b = dir.path().join("crates/crate-b"); - fs::create_dir_all(crate_b.join("src")).unwrap(); - fs::write( - crate_b.join("Cargo.toml"), - r#" -[package] -name = "crate-b" -version = "0.1.0" -"#, - ) - .unwrap(); - fs::write(crate_b.join("src/lib.rs"), "pub fn bar() {}").unwrap(); - - dir + #[test] + fn test_normalize_rust_import() { + let patterns = normalize_rust_import("crate::foo::bar"); + assert!(patterns.iter().any(|p| p.contains("foo/bar.rs"))); + assert!(patterns.iter().any(|p| p.contains("foo/bar/mod.rs"))); } #[test] - fn test_rust_workspace_discovery() { - let dir = setup_rust_workspace(); - let ws = RustWorkspace::discover(dir.path()).unwrap().unwrap(); + fn test_normalize_python_import() { + let patterns = normalize_python_import("mypackage.utils.helper"); + assert!(patterns + .iter() + .any(|p| p.contains("mypackage/utils/helper.py"))); + assert!(patterns + .iter() + .any(|p| p.contains("mypackage/utils/helper/__init__.py"))); + } - assert_eq!(ws.root(), dir.path()); - assert_eq!(ws.members().len(), 3); + #[test] + fn test_normalize_go_import() { + let patterns = normalize_go_import("github.com/user/repo/pkg/utils"); + assert!(patterns.iter().any(|p| p.contains("pkg/utils"))); + } - let names: Vec<_> = ws.members().iter().map(|m| m.name.as_str()).collect(); - assert!(names.contains(&"root-crate")); - assert!(names.contains(&"crate-a")); - assert!(names.contains(&"crate-b")); + #[test] + fn test_normalize_js_import_relative() { + let patterns = normalize_js_import("./components/Button"); + assert!(patterns.iter().any(|p| p.contains("components/Button.ts"))); + assert!(patterns + .iter() + .any(|p| p.contains("components/Button/index.ts"))); } #[test] - fn test_rust_workspace_resolve_crate() { - let dir = setup_rust_workspace(); - let ws = RustWorkspace::discover(dir.path()).unwrap().unwrap(); + fn test_normalize_js_import_alias() { + let patterns = normalize_js_import("@/components/Button"); + assert!(patterns.iter().any(|p| p.contains("components/Button.ts"))); + } - let path = ws.resolve_crate("crate-a").unwrap(); - assert!(path.ends_with("crates/crate-a")); + #[test] + fn test_normalize_c_import() { + let patterns = normalize_c_import("utils/helper.h"); + assert!(patterns.iter().any(|p| p.contains("utils/helper.h"))); + } - assert!(ws.resolve_crate("nonexistent").is_none()); + #[test] + fn test_normalize_c_import_system_skipped() { + let patterns = normalize_c_import(""); + assert!(patterns.is_empty()); } #[test] - fn test_rust_workspace_resolve_module() { - let dir = setup_rust_workspace(); - let ws = RustWorkspace::discover(dir.path()).unwrap().unwrap(); + fn test_normalize_java_import() { + let patterns = normalize_java_import("com.example.utils.Helper"); + assert!(patterns + .iter() + .any(|p| p.contains("com/example/utils/Helper.java"))); + } - assert!(ws.resolve_module("crate").is_none()); - assert!(ws.resolve_module("self").is_none()); - assert!(ws.resolve_module("super").is_none()); + #[test] + fn test_normalize_java_import_stdlib_skipped() { + let patterns = normalize_java_import("java.util.List"); + assert!(patterns.is_empty()); } #[test] - fn test_no_cargo_toml() { - let dir = TempDir::new().unwrap(); - let ws = RustWorkspace::discover(dir.path()).unwrap(); - assert!(ws.is_none()); + fn test_is_system_header() { + assert!(is_system_header("")); + assert!(is_system_header("")); + assert!(is_system_header("")); + assert!(is_system_header("")); + assert!(!is_system_header("\"myheader.h\"")); + assert!(!is_system_header("\"utils/helper.h\"")); } #[test] - fn test_resolve_module_finds_file() { - let dir = setup_rust_workspace(); - let ws = RustWorkspace::discover(dir.path()).unwrap().unwrap(); + fn test_search_patterns() { + let dir = TempDir::new().unwrap(); + let src = dir.path().join("src"); + fs::create_dir_all(&src).unwrap(); + fs::write(src.join("helper.rs"), "fn helper() {}").unwrap(); - let resolved = ws.resolve_module("crate-a"); - assert!(resolved.is_some()); - assert!(resolved.unwrap().ends_with("src/lib.rs")); + let patterns = vec!["**/helper.rs".to_string()]; + let found = search_patterns(&patterns, dir.path()); + assert!(found.is_some()); + assert!(found.unwrap().ends_with("helper.rs")); } #[test] @@ -1362,30 +715,17 @@ version = "0.1.0" path: file.clone(), mtime: 0, size: 0, - definitions: vec![ - Definition { - name: "foo".to_string(), - kind: DefinitionKind::Function, - span: Span { - start_byte: 0, - end_byte: 10, - start_line: 1, - end_line: 3, - }, - file: file.clone(), - }, - Definition { - name: "bar".to_string(), - kind: DefinitionKind::Function, - span: Span { - start_byte: 20, - end_byte: 30, - start_line: 5, - end_line: 7, - }, - file: file.clone(), + definitions: vec![Definition { + name: "foo".to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 3, }, - ], + file: file.clone(), + }], calls: vec![], imports: vec![], }); @@ -1394,14 +734,8 @@ version = "0.1.0" assert!(found.is_some()); assert_eq!(found.unwrap().name, "foo"); - let found = resolve_same_file("bar", &file, &index); - assert!(found.is_some()); - - let not_found = resolve_same_file("baz", &file, &index); + let not_found = resolve_same_file("bar", &file, &index); assert!(not_found.is_none()); - - let wrong_file = resolve_same_file("foo", Path::new("src/other.rs"), &index); - assert!(wrong_file.is_none()); } #[test] @@ -1429,32 +763,8 @@ version = "0.1.0" imports: vec![], }); - index.update(FileRecord { - path: PathBuf::from("src/b.rs"), - mtime: 0, - size: 0, - definitions: vec![Definition { - name: "beta".to_string(), - kind: DefinitionKind::Function, - span: Span { - start_byte: 0, - end_byte: 10, - start_line: 1, - end_line: 3, - }, - file: PathBuf::from("src/b.rs"), - }], - calls: vec![], - imports: vec![], - }); - let found = resolve_by_index("alpha", &index); assert!(found.is_some()); - assert_eq!(found.as_ref().unwrap().file, PathBuf::from("src/a.rs")); - - let found = resolve_by_index("beta", &index); - assert!(found.is_some()); - assert_eq!(found.as_ref().unwrap().file, PathBuf::from("src/b.rs")); let not_found = resolve_by_index("gamma", &index); assert!(not_found.is_none()); @@ -1506,7 +816,7 @@ version = "0.1.0" imports: vec![], }); - let resolver = Resolver::new(&index, None, PathBuf::from(".")); + let resolver = Resolver::new(&index, PathBuf::from(".")); let found = resolver.resolve("foo", &file_a).unwrap(); assert!(found.is_some()); @@ -1518,15 +828,52 @@ version = "0.1.0" } #[test] - fn test_resolver_import_tracing() { + fn test_resolve_by_search_rust() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("lib.rs"), + "pub fn my_function() {\n println!(\"hello\");\n}\n", + ) + .unwrap(); + + let found = resolve_by_search("my_function", dir.path()).unwrap(); + assert!(found.is_some()); + let def = found.unwrap(); + assert_eq!(def.name, "my_function"); + assert_eq!(def.span.start_line, 1); + } + + #[test] + fn test_resolve_by_search_python() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("utils.py"), + "def helper_func():\n pass\n", + ) + .unwrap(); + + let found = resolve_by_search("helper_func", dir.path()).unwrap(); + assert!(found.is_some()); + let def = found.unwrap(); + assert_eq!(def.name, "helper_func"); + } + + #[test] + fn test_resolve_via_imports_with_glob() { use super::super::index::{Definition, DefinitionKind, FileRecord, Import, Span}; + let dir = TempDir::new().unwrap(); + let utils_dir = dir.path().join("src/utils"); + fs::create_dir_all(&utils_dir).unwrap(); + fs::write(utils_dir.join("helper.rs"), "pub fn helper() {}").unwrap(); + let mut index = Index::new(); - let main_file = PathBuf::from("src/main.rs"); - let utils_file = PathBuf::from("src/utils.rs"); + let main_file = dir.path().join("src/main.rs"); index.update(FileRecord { - path: utils_file.clone(), + path: utils_dir.join("helper.rs"), mtime: 0, size: 0, definitions: vec![Definition { @@ -1534,11 +881,11 @@ version = "0.1.0" kind: DefinitionKind::Function, span: Span { start_byte: 0, - end_byte: 50, + end_byte: 20, start_line: 1, - end_line: 5, + end_line: 1, }, - file: utils_file.clone(), + file: utils_dir.join("helper.rs"), }], calls: vec![], imports: vec![], @@ -1563,702 +910,10 @@ version = "0.1.0" }], }); - let resolver = Resolver::new(&index, None, PathBuf::from(".")); + let resolver = Resolver::new(&index, dir.path().to_path_buf()); let found = resolver.resolve("helper", &main_file).unwrap(); assert!(found.is_some()); assert_eq!(found.unwrap().name, "helper"); } - - #[test] - fn test_resolver_import_tracing_with_alias() { - use super::super::index::{Definition, DefinitionKind, FileRecord, Import, Span}; - - let mut index = Index::new(); - let main_file = PathBuf::from("src/main.rs"); - let utils_file = PathBuf::from("src/utils.rs"); - - index.update(FileRecord { - path: utils_file.clone(), - mtime: 0, - size: 0, - definitions: vec![Definition { - name: "long_function_name".to_string(), - kind: DefinitionKind::Function, - span: Span { - start_byte: 0, - end_byte: 50, - start_line: 1, - end_line: 5, - }, - file: utils_file.clone(), - }], - calls: vec![], - imports: vec![], - }); - - index.update(FileRecord { - path: main_file.clone(), - mtime: 0, - size: 0, - definitions: vec![], - calls: vec![], - imports: vec![Import { - module_path: "crate::utils::long_function_name".to_string(), - alias: Some("short".to_string()), - span: Span { - start_byte: 0, - end_byte: 40, - start_line: 1, - end_line: 1, - }, - file: main_file.clone(), - }], - }); - - let resolver = Resolver::new(&index, None, PathBuf::from(".")); - - let found = resolver.resolve("short", &main_file).unwrap(); - assert!(found.is_some()); - assert_eq!(found.unwrap().name, "long_function_name"); - - let not_found = resolver.resolve("long_function_name", &main_file).unwrap(); - assert!(not_found.is_none() || not_found.unwrap().file != main_file); - } - - fn setup_go_workspace() -> TempDir { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("go.mod"), - "module github.com/example/myproject\n\ngo 1.21\n", - ) - .unwrap(); - - fs::write( - dir.path().join("main.go"), - "package main\n\nfunc main() {}\n", - ) - .unwrap(); - - let pkg_dir = dir.path().join("pkg/utils"); - fs::create_dir_all(&pkg_dir).unwrap(); - fs::write( - pkg_dir.join("helpers.go"), - "package utils\n\nfunc Helper() {}\n", - ) - .unwrap(); - - let internal_dir = dir.path().join("internal/core"); - fs::create_dir_all(&internal_dir).unwrap(); - fs::write( - internal_dir.join("core.go"), - "package core\n\nfunc Process() {}\n", - ) - .unwrap(); - - dir - } - - #[test] - fn test_go_workspace_discovery() { - let dir = setup_go_workspace(); - let ws = GoWorkspace::discover(dir.path()).unwrap(); - - assert!(ws.is_some()); - let ws = ws.unwrap(); - assert_eq!(ws.root(), dir.path()); - assert_eq!(ws.module_path(), "github.com/example/myproject"); - } - - #[test] - fn test_go_workspace_no_go_mod() { - let dir = TempDir::new().unwrap(); - let ws = GoWorkspace::discover(dir.path()).unwrap(); - assert!(ws.is_none()); - } - - #[test] - fn test_go_workspace_resolve_root() { - let dir = setup_go_workspace(); - let ws = GoWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("github.com/example/myproject"); - assert!(resolved.is_some()); - assert!(resolved.unwrap().ends_with("main.go")); - } - - #[test] - fn test_go_workspace_resolve_package() { - let dir = setup_go_workspace(); - let ws = GoWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("github.com/example/myproject/pkg/utils"); - assert!(resolved.is_some()); - assert!(resolved.unwrap().ends_with("helpers.go")); - } - - #[test] - fn test_go_workspace_resolve_external() { - let dir = setup_go_workspace(); - let ws = GoWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("github.com/other/package"); - assert!(resolved.is_none()); - } - - #[test] - fn test_go_mod_fallback_parsing() { - let content = "module github.com/foo/bar\n\ngo 1.21\n"; - let module = parse_go_mod_fallback(content); - assert_eq!(module, Some("github.com/foo/bar".to_string())); - - let content = "// comment\nmodule example.com/test \n"; - let module = parse_go_mod_fallback(content); - assert_eq!(module, Some("example.com/test".to_string())); - } - - fn setup_ts_workspace() -> TempDir { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("package.json"), - r#"{"name": "my-app", "version": "1.0.0"}"#, - ) - .unwrap(); - - fs::write( - dir.path().join("tsconfig.json"), - r#"{ - "compilerOptions": { - "baseUrl": ".", - "paths": { - "@/*": ["src/*"], - "@utils/*": ["src/utils/*"] - } - } - }"#, - ) - .unwrap(); - - let src_dir = dir.path().join("src"); - fs::create_dir_all(&src_dir).unwrap(); - fs::write(src_dir.join("index.ts"), "export const main = () => {};\n").unwrap(); - - let utils_dir = src_dir.join("utils"); - fs::create_dir_all(&utils_dir).unwrap(); - fs::write( - utils_dir.join("helpers.ts"), - "export const helper = () => {};\n", - ) - .unwrap(); - - let components_dir = src_dir.join("components"); - fs::create_dir_all(&components_dir).unwrap(); - fs::write( - components_dir.join("Button.tsx"), - "export const Button = () => null;\n", - ) - .unwrap(); - fs::write( - components_dir.join("index.ts"), - "export * from './Button';\n", - ) - .unwrap(); - - dir - } - - #[test] - fn test_ts_workspace_discovery() { - let dir = setup_ts_workspace(); - let ws = TsWorkspace::discover(dir.path()).unwrap(); - - assert!(ws.is_some()); - let ws = ws.unwrap(); - assert_eq!(ws.root(), dir.path()); - assert_eq!(ws.name(), "my-app"); - assert!(!ws.paths().is_empty()); - } - - #[test] - fn test_ts_workspace_no_package_json() { - let dir = TempDir::new().unwrap(); - let ws = TsWorkspace::discover(dir.path()).unwrap(); - assert!(ws.is_none()); - } - - #[test] - fn test_ts_workspace_resolve_alias() { - let dir = setup_ts_workspace(); - let ws = TsWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("@/index"); - assert!(resolved.is_some()); - assert!(resolved.unwrap().ends_with("src/index.ts")); - } - - #[test] - fn test_ts_workspace_resolve_utils_alias() { - let dir = setup_ts_workspace(); - let ws = TsWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("@utils/helpers"); - assert!(resolved.is_some()); - assert!(resolved.unwrap().ends_with("src/utils/helpers.ts")); - } - - #[test] - fn test_ts_workspace_resolve_index_file() { - let dir = setup_ts_workspace(); - let ws = TsWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("@/components"); - assert!(resolved.is_some()); - let path = resolved.unwrap(); - assert!(path.ends_with("components/index.ts")); - } - - #[test] - fn test_ts_workspace_relative_ignored() { - let dir = setup_ts_workspace(); - let ws = TsWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("./local"); - assert!(resolved.is_none()); - - let resolved = ws.resolve_module("../parent"); - assert!(resolved.is_none()); - } - - fn setup_python_workspace_src_layout() -> TempDir { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("pyproject.toml"), - r#" -[project] -name = "mypackage" -version = "0.1.0" - -[tool.setuptools] -package-dir = {"" = "src"} -"#, - ) - .unwrap(); - - let pkg_dir = dir.path().join("src/mypackage"); - fs::create_dir_all(&pkg_dir).unwrap(); - fs::write(pkg_dir.join("__init__.py"), "").unwrap(); - fs::write(pkg_dir.join("main.py"), "def main(): pass\n").unwrap(); - - let utils_dir = pkg_dir.join("utils"); - fs::create_dir_all(&utils_dir).unwrap(); - fs::write(utils_dir.join("__init__.py"), "").unwrap(); - fs::write(utils_dir.join("helpers.py"), "def helper(): pass\n").unwrap(); - - dir - } - - fn setup_python_workspace_flat_layout() -> TempDir { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("pyproject.toml"), - r#" -[project] -name = "flatpkg" -version = "0.1.0" -"#, - ) - .unwrap(); - - let pkg_dir = dir.path().join("flatpkg"); - fs::create_dir_all(&pkg_dir).unwrap(); - fs::write(pkg_dir.join("__init__.py"), "").unwrap(); - fs::write(pkg_dir.join("core.py"), "def process(): pass\n").unwrap(); - - dir - } - - #[test] - fn test_python_workspace_discovery_src_layout() { - let dir = setup_python_workspace_src_layout(); - let ws = PythonWorkspace::discover(dir.path()).unwrap(); - - assert!(ws.is_some()); - let ws = ws.unwrap(); - assert_eq!(ws.root(), dir.path()); - assert_eq!(ws.package_name(), "mypackage"); - assert!(ws.src_dir().ends_with("src")); - } - - #[test] - fn test_python_workspace_discovery_flat_layout() { - let dir = setup_python_workspace_flat_layout(); - let ws = PythonWorkspace::discover(dir.path()).unwrap(); - - assert!(ws.is_some()); - let ws = ws.unwrap(); - assert_eq!(ws.package_name(), "flatpkg"); - } - - #[test] - fn test_python_workspace_no_pyproject() { - let dir = TempDir::new().unwrap(); - let ws = PythonWorkspace::discover(dir.path()).unwrap(); - assert!(ws.is_none()); - } - - #[test] - fn test_python_workspace_resolve_module() { - let dir = setup_python_workspace_src_layout(); - let ws = PythonWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("mypackage.main"); - assert!(resolved.is_some()); - assert!(resolved.unwrap().ends_with("mypackage/main.py")); - } - - #[test] - fn test_python_workspace_resolve_package() { - let dir = setup_python_workspace_src_layout(); - let ws = PythonWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("mypackage.utils"); - assert!(resolved.is_some()); - assert!(resolved.unwrap().ends_with("utils/__init__.py")); - } - - #[test] - fn test_python_workspace_resolve_submodule() { - let dir = setup_python_workspace_src_layout(); - let ws = PythonWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("mypackage.utils.helpers"); - assert!(resolved.is_some()); - assert!(resolved.unwrap().ends_with("utils/helpers.py")); - } - - #[test] - fn test_python_workspace_relative_ignored() { - let dir = setup_python_workspace_src_layout(); - let ws = PythonWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module(".relative"); - assert!(resolved.is_none()); - } - - #[test] - fn test_python_workspace_poetry_project() { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("pyproject.toml"), - r#" -[tool.poetry] -name = "poetry-project" -version = "0.1.0" -"#, - ) - .unwrap(); - - let src_dir = dir.path().join("src"); - fs::create_dir_all(&src_dir).unwrap(); - - let ws = PythonWorkspace::discover(dir.path()).unwrap().unwrap(); - assert_eq!(ws.package_name(), "poetry-project"); - } - - #[test] - fn test_resolve_by_search_rust() { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("lib.rs"), - "pub fn my_function() {\n println!(\"hello\");\n}\n", - ) - .unwrap(); - - let found = resolve_by_search("my_function", dir.path()).unwrap(); - assert!(found.is_some()); - let def = found.unwrap(); - assert_eq!(def.name, "my_function"); - assert_eq!(def.span.start_line, 1); - } - - #[test] - fn test_resolve_by_search_python() { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("utils.py"), - "def helper_func():\n pass\n", - ) - .unwrap(); - - let found = resolve_by_search("helper_func", dir.path()).unwrap(); - assert!(found.is_some()); - let def = found.unwrap(); - assert_eq!(def.name, "helper_func"); - } - - #[test] - fn test_resolve_by_search_go() { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("main.go"), - "package main\n\nfunc ProcessData() {\n}\n", - ) - .unwrap(); - - let found = resolve_by_search("ProcessData", dir.path()).unwrap(); - assert!(found.is_some()); - let def = found.unwrap(); - assert_eq!(def.name, "ProcessData"); - } - - #[test] - fn test_resolve_by_search_typescript() { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("index.ts"), - "export function fetchData() {\n return null;\n}\n", - ) - .unwrap(); - - let found = resolve_by_search("fetchData", dir.path()).unwrap(); - assert!(found.is_some()); - let def = found.unwrap(); - assert_eq!(def.name, "fetchData"); - } - - #[test] - fn test_resolve_by_search_not_found() { - let dir = TempDir::new().unwrap(); - - fs::write(dir.path().join("lib.rs"), "pub fn other() {}\n").unwrap(); - - let found = resolve_by_search("nonexistent", dir.path()).unwrap(); - assert!(found.is_none()); - } - - #[test] - fn test_resolver_tracks_discovered_files() { - let dir = TempDir::new().unwrap(); - - fs::write(dir.path().join("utils.rs"), "pub fn discovered_func() {}\n").unwrap(); - - fs::write(dir.path().join("helpers.rs"), "pub fn another_func() {}\n").unwrap(); - - let index = Index::new(); - let resolver = Resolver::new(&index, None, dir.path().to_path_buf()); - - assert!(resolver.files_to_index().is_empty()); - - let _ = resolver.resolve("discovered_func", Path::new("main.rs")); - let files = resolver.files_to_index(); - assert_eq!(files.len(), 1); - assert!(files[0].ends_with("utils.rs")); - - let _ = resolver.resolve("another_func", Path::new("main.rs")); - let files = resolver.files_to_index(); - assert_eq!(files.len(), 2); - - resolver.clear_discovered(); - assert!(resolver.files_to_index().is_empty()); - } - - #[test] - fn test_path_matches_module_exact_suffix() { - let file = PathBuf::from("src/utils/helpers.rs"); - assert!(path_matches_module(&file, &["utils", "helpers", "func"])); - assert!(path_matches_module(&file, &["helpers", "func"])); - assert!(!path_matches_module(&file, &["other", "helpers", "func"])); - } - - #[test] - fn test_path_matches_module_single_part() { - let file = PathBuf::from("src/main.rs"); - assert!(path_matches_module(&file, &["func"])); - assert!(path_matches_module(&file, &[])); - } - - #[test] - fn test_path_matches_module_no_false_substring_match() { - let file = PathBuf::from("src/my_utils/helpers.rs"); - assert!(!path_matches_module(&file, &["utils", "helpers", "func"])); - assert!(path_matches_module(&file, &["my_utils", "helpers", "func"])); - } - - #[test] - fn test_path_matches_module_deep_path() { - let file = PathBuf::from("crates/core/src/utils/helpers.rs"); - assert!(path_matches_module(&file, &["utils", "helpers", "func"])); - assert!(path_matches_module( - &file, - &["src", "utils", "helpers", "func"] - )); - assert!(!path_matches_module( - &file, - &["wrong", "src", "utils", "helpers", "func"] - )); - } - - #[test] - fn test_path_matches_module_too_many_parts() { - let file = PathBuf::from("src/helpers.rs"); - assert!(!path_matches_module( - &file, - &["deeply", "nested", "utils", "helpers", "func"] - )); - } - - fn setup_zig_workspace() -> TempDir { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("build.zig.zon"), - r#" -.{ - .name = .myproject, - .dependencies = .{ - .utils = .{ .path = "vendor/utils" }, - .remote_pkg = .{ .url = "https://example.com/pkg.tar.gz" }, - }, -} -"#, - ) - .unwrap(); - - fs::write( - dir.path().join("build.zig"), - "const std = @import(\"std\");\n", - ) - .unwrap(); - - let src_dir = dir.path().join("src"); - fs::create_dir_all(&src_dir).unwrap(); - fs::write(src_dir.join("main.zig"), "const std = @import(\"std\");\n").unwrap(); - fs::write(src_dir.join("helper.zig"), "pub fn help() void {}\n").unwrap(); - - let vendor_dir = dir.path().join("vendor/utils"); - fs::create_dir_all(&vendor_dir).unwrap(); - fs::write(vendor_dir.join("main.zig"), "pub fn utilFn() void {}\n").unwrap(); - - dir - } - - #[test] - fn test_zig_workspace_discovery() { - let dir = setup_zig_workspace(); - let ws = ZigWorkspace::discover(dir.path()).unwrap(); - - assert!(ws.is_some()); - let ws = ws.unwrap(); - assert_eq!(ws.root(), dir.path()); - assert_eq!(ws.packages().len(), 1); - assert_eq!(ws.packages()[0].0, "utils"); - } - - #[test] - fn test_zig_workspace_no_zon() { - let dir = TempDir::new().unwrap(); - let ws = ZigWorkspace::discover(dir.path()).unwrap(); - assert!(ws.is_none()); - } - - #[test] - fn test_zig_workspace_resolve_local_package() { - let dir = setup_zig_workspace(); - let ws = ZigWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("utils"); - assert!(resolved.is_some()); - assert!(resolved.unwrap().ends_with("vendor/utils/main.zig")); - } - - #[test] - fn test_zig_workspace_skip_std() { - let dir = setup_zig_workspace(); - let ws = ZigWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("std"); - assert!(resolved.is_none()); - } - - #[test] - fn test_zig_workspace_skip_unknown_package() { - let dir = setup_zig_workspace(); - let ws = ZigWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("remote_pkg"); - assert!(resolved.is_none()); - - let resolved = ws.resolve_module("nonexistent"); - assert!(resolved.is_none()); - } - - #[test] - fn test_zig_workspace_resolve_relative_import() { - let dir = setup_zig_workspace(); - let ws = ZigWorkspace::discover(dir.path()).unwrap().unwrap(); - - let resolved = ws.resolve_module("helper.zig"); - assert!(resolved.is_some()); - } - - #[test] - fn test_parse_zig_zon_deps() { - let content = r#" -.{ - .name = .test, - .dependencies = .{ - .foo = .{ .path = "libs/foo" }, - .bar = .{ .path = "vendor/bar" }, - .remote = .{ .url = "https://example.com" }, - }, -} -"#; - let root = Path::new("/project"); - let deps = parse_zig_zon_deps(content, root); - - assert_eq!(deps.len(), 2); - assert!(deps - .iter() - .any(|(n, p)| n == "foo" && p == Path::new("/project/libs/foo"))); - assert!(deps - .iter() - .any(|(n, p)| n == "bar" && p == Path::new("/project/vendor/bar"))); - } - - #[test] - fn test_path_matches_java_package() { - let file = PathBuf::from("src/main/java/com/example/MyClass.java"); - assert!(path_matches_java_package(&file, &["com", "example"])); - assert!(path_matches_java_package(&file, &["example"])); - assert!(!path_matches_java_package(&file, &["org", "example"])); - } - - #[test] - fn test_path_matches_java_package_empty() { - let file = PathBuf::from("src/MyClass.java"); - assert!(path_matches_java_package(&file, &[])); - } - - #[test] - fn test_path_matches_java_package_deeply_nested() { - let file = PathBuf::from("src/main/java/com/example/internal/utils/Helper.java"); - assert!(path_matches_java_package( - &file, - &["com", "example", "internal", "utils"] - )); - assert!(path_matches_java_package(&file, &["internal", "utils"])); - assert!(!path_matches_java_package( - &file, - &["com", "other", "internal", "utils"] - )); - } } From 7157bb35695afd28e4073f007fd699c372ec917e Mon Sep 17 00:00:00 2001 From: ro Date: Sun, 28 Dec 2025 21:50:08 -0800 Subject: [PATCH 20/35] fix: reorder resolution to check index first, track all discovered files --- crates/code/src/resolve.rs | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index aba1b82..dda37fe 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -513,11 +513,11 @@ impl<'a> Resolver<'a> { return Ok(Some(def)); } - if let Some(def) = self.resolve_via_imports(callee, from_file) { + if let Some(def) = resolve_by_index(callee, self.index) { return Ok(Some(def)); } - if let Some(def) = resolve_by_index(callee, self.index) { + if let Some(def) = self.resolve_via_imports(callee, from_file) { return Ok(Some(def)); } @@ -547,14 +547,10 @@ impl<'a> Resolver<'a> { } if let Some(resolved) = resolve_relative(&import.module_path, from_file, ext) { + self.discovered_files.borrow_mut().insert(resolved.clone()); if let Some(def) = self.find_def_in_file(&resolved, callee) { return Some(def); } - if let Some(target_record) = self.index.get(&resolved) { - if let Some(def) = target_record.definitions.first() { - return Some(def.clone()); - } - } } let patterns = normalize_to_patterns(&import.module_path, ext); @@ -563,11 +559,6 @@ impl<'a> Resolver<'a> { if let Some(def) = self.find_def_in_file(&resolved, callee) { return Some(def); } - if let Some(target_record) = self.index.get(&resolved) { - if let Some(def) = target_record.definitions.first() { - return Some(def.clone()); - } - } } } From 7ba06de773bda71c03032f71f24b23fb4276fe43 Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 00:49:52 -0800 Subject: [PATCH 21/35] feat: implement call graph with traversal and transitive closure --- crates/code/src/graph.rs | 606 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 594 insertions(+), 12 deletions(-) diff --git a/crates/code/src/graph.rs b/crates/code/src/graph.rs index b677b22..c35f5e0 100644 --- a/crates/code/src/graph.rs +++ b/crates/code/src/graph.rs @@ -1,8 +1,8 @@ -use std::collections::{HashMap, HashSet}; - -use anyhow::Result; +use std::collections::{HashMap, HashSet, VecDeque}; +use std::path::Path; use super::index::{Definition, Index}; +use super::resolve::Resolver; pub type NodeId = usize; @@ -16,24 +16,606 @@ pub struct CallGraphNode { #[derive(Debug, Default)] pub struct CallGraph { pub nodes: HashMap, - #[allow(dead_code)] name_to_id: HashMap, + file_name_to_id: HashMap<(String, String), NodeId>, + next_id: NodeId, } impl CallGraph { - pub fn build(_index: &Index) -> Result { - todo!("build call graph from index") + pub fn new() -> Self { + Self { + nodes: HashMap::new(), + name_to_id: HashMap::new(), + file_name_to_id: HashMap::new(), + next_id: 0, + } + } + + pub fn build(index: &Index, root: &Path) -> Self { + let resolver = Resolver::new(index, root.to_path_buf()); + let mut graph = CallGraph::new(); + + for def in index.definitions() { + graph.add_definition(def.clone()); + } + + for call in index.calls() { + let caller_id = call + .caller + .as_ref() + .and_then(|name| graph.find_node_by_file_and_name(&call.file, name)); + + let callee_id = graph.find_node(&call.callee).or_else(|| { + resolver + .resolve(&call.callee, &call.file) + .ok() + .flatten() + .and_then(|def| graph.find_node(&def.name)) + }); + + if let (Some(caller), Some(callee)) = (caller_id, callee_id) { + graph.add_edge(caller, callee); + } + } + + graph + } + + fn add_definition(&mut self, definition: Definition) -> NodeId { + let file_key = definition.file.to_string_lossy().to_string(); + let composite_key = (file_key, definition.name.clone()); + + if let Some(&existing_id) = self.file_name_to_id.get(&composite_key) { + return existing_id; + } + + let id = self.next_id; + self.next_id += 1; + + let node = CallGraphNode { + definition: definition.clone(), + callees: HashSet::new(), + callers: HashSet::new(), + }; + + self.nodes.insert(id, node); + self.name_to_id.entry(definition.name.clone()).or_insert(id); + self.file_name_to_id.insert(composite_key, id); + + id + } + + fn add_edge(&mut self, caller: NodeId, callee: NodeId) { + if caller == callee { + return; + } + + if let Some(caller_node) = self.nodes.get_mut(&caller) { + caller_node.callees.insert(callee); + } + + if let Some(callee_node) = self.nodes.get_mut(&callee) { + callee_node.callers.insert(caller); + } + } + + pub fn find_node(&self, name: &str) -> Option { + self.name_to_id.get(name).copied() + } + + pub fn find_node_by_file_and_name(&self, file: &Path, name: &str) -> Option { + let file_key = file.to_string_lossy().to_string(); + self.file_name_to_id + .get(&(file_key, name.to_string())) + .copied() + } + + pub fn get_node(&self, id: NodeId) -> Option<&CallGraphNode> { + self.nodes.get(&id) + } + + pub fn get_callees(&self, node_id: NodeId) -> Vec<&CallGraphNode> { + self.nodes + .get(&node_id) + .map(|node| { + node.callees + .iter() + .filter_map(|id| self.nodes.get(id)) + .collect() + }) + .unwrap_or_default() + } + + pub fn get_callers(&self, node_id: NodeId) -> Vec<&CallGraphNode> { + self.nodes + .get(&node_id) + .map(|node| { + node.callers + .iter() + .filter_map(|id| self.nodes.get(id)) + .collect() + }) + .unwrap_or_default() + } + + pub fn get_transitive_callees(&self, node_id: NodeId) -> Vec<&CallGraphNode> { + let mut visited = HashSet::new(); + let mut result = Vec::new(); + let mut queue = VecDeque::new(); + + if let Some(node) = self.nodes.get(&node_id) { + for &callee_id in &node.callees { + queue.push_back(callee_id); + } + } + + while let Some(current_id) = queue.pop_front() { + if !visited.insert(current_id) { + continue; + } + + if let Some(node) = self.nodes.get(¤t_id) { + result.push(node); + + for &callee_id in &node.callees { + if !visited.contains(&callee_id) { + queue.push_back(callee_id); + } + } + } + } + + result + } + + pub fn get_transitive_callers(&self, node_id: NodeId) -> Vec<&CallGraphNode> { + let mut visited = HashSet::new(); + let mut result = Vec::new(); + let mut queue = VecDeque::new(); + + if let Some(node) = self.nodes.get(&node_id) { + for &caller_id in &node.callers { + queue.push_back(caller_id); + } + } + + while let Some(current_id) = queue.pop_front() { + if !visited.insert(current_id) { + continue; + } + + if let Some(node) = self.nodes.get(¤t_id) { + result.push(node); + + for &caller_id in &node.callers { + if !visited.contains(&caller_id) { + queue.push_back(caller_id); + } + } + } + } + + result + } + + pub fn post_order(&self, node_id: NodeId) -> Vec { + let mut visited = HashSet::new(); + let mut result = Vec::new(); + self.post_order_dfs(node_id, &mut visited, &mut result); + result + } + + fn post_order_dfs( + &self, + node_id: NodeId, + visited: &mut HashSet, + result: &mut Vec, + ) { + if !visited.insert(node_id) { + return; + } + + if let Some(node) = self.nodes.get(&node_id) { + for &callee_id in &node.callees { + self.post_order_dfs(callee_id, visited, result); + } + } + + result.push(node_id); + } + + pub fn post_order_definitions(&self, node_id: NodeId) -> Vec<&Definition> { + self.post_order(node_id) + .into_iter() + .filter_map(|id| self.nodes.get(&id).map(|n| &n.definition)) + .collect() + } + + pub fn node_count(&self) -> usize { + self.nodes.len() + } + + pub fn edge_count(&self) -> usize { + self.nodes.values().map(|n| n.callees.len()).sum() + } + + pub fn roots(&self) -> Vec { + self.nodes + .iter() + .filter(|(_, node)| node.callers.is_empty()) + .map(|(&id, _)| id) + .collect() + } + + pub fn leaves(&self) -> Vec { + self.nodes + .iter() + .filter(|(_, node)| node.callees.is_empty()) + .map(|(&id, _)| id) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::index::{Call, DefinitionKind, FileRecord, Span}; + use std::path::PathBuf; + + fn make_span() -> Span { + Span { + start_byte: 0, + end_byte: 100, + start_line: 1, + end_line: 10, + } + } + + fn make_definition(name: &str, file: &str) -> Definition { + Definition { + name: name.to_string(), + kind: DefinitionKind::Function, + span: make_span(), + file: PathBuf::from(file), + } + } + + fn make_call(callee: &str, caller: Option<&str>, file: &str) -> Call { + Call { + callee: callee.to_string(), + span: make_span(), + file: PathBuf::from(file), + caller: caller.map(|s| s.to_string()), + } + } + + #[test] + fn test_build_empty_index() { + let index = Index::new(); + let graph = CallGraph::build(&index, Path::new(".")); + assert_eq!(graph.node_count(), 0); + assert_eq!(graph.edge_count(), 0); } - pub fn get_callees(&self, _node_id: NodeId) -> Vec<&CallGraphNode> { - todo!("get direct callees") + #[test] + fn test_build_definitions_only() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/main.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("main", "src/main.rs"), + make_definition("helper", "src/main.rs"), + ], + calls: vec![], + imports: vec![], + }); + + let graph = CallGraph::build(&index, Path::new(".")); + assert_eq!(graph.node_count(), 2); + assert_eq!(graph.edge_count(), 0); } - pub fn get_transitive_callees(&self, _node_id: NodeId) -> Vec<&CallGraphNode> { - todo!("get all callees recursively") + #[test] + fn test_build_with_calls() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/main.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("main", "src/main.rs"), + make_definition("helper", "src/main.rs"), + ], + calls: vec![make_call("helper", Some("main"), "src/main.rs")], + imports: vec![], + }); + + let graph = CallGraph::build(&index, Path::new(".")); + assert_eq!(graph.node_count(), 2); + assert_eq!(graph.edge_count(), 1); + + let main_id = graph.find_node("main").unwrap(); + let callees = graph.get_callees(main_id); + assert_eq!(callees.len(), 1); + assert_eq!(callees[0].definition.name, "helper"); } - pub fn post_order(&self, _node_id: NodeId) -> Vec { - todo!("return nodes in post-order traversal") + #[test] + fn test_get_callees_and_callers() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("a", "src/lib.rs"), + make_definition("b", "src/lib.rs"), + make_definition("c", "src/lib.rs"), + ], + calls: vec![ + make_call("b", Some("a"), "src/lib.rs"), + make_call("c", Some("a"), "src/lib.rs"), + make_call("c", Some("b"), "src/lib.rs"), + ], + imports: vec![], + }); + + let graph = CallGraph::build(&index, Path::new(".")); + + let a_id = graph.find_node("a").unwrap(); + let c_id = graph.find_node("c").unwrap(); + + let a_callees = graph.get_callees(a_id); + assert_eq!(a_callees.len(), 2); + + let c_callers = graph.get_callers(c_id); + assert_eq!(c_callers.len(), 2); + + let a_callers = graph.get_callers(a_id); + assert!(a_callers.is_empty()); + + let c_callees = graph.get_callees(c_id); + assert!(c_callees.is_empty()); + + assert_eq!(graph.roots(), vec![a_id]); + assert_eq!(graph.leaves(), vec![c_id]); + } + + #[test] + fn test_transitive_callees() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("a", "src/lib.rs"), + make_definition("b", "src/lib.rs"), + make_definition("c", "src/lib.rs"), + make_definition("d", "src/lib.rs"), + ], + calls: vec![ + make_call("b", Some("a"), "src/lib.rs"), + make_call("c", Some("b"), "src/lib.rs"), + make_call("d", Some("c"), "src/lib.rs"), + ], + imports: vec![], + }); + + let graph = CallGraph::build(&index, Path::new(".")); + let a_id = graph.find_node("a").unwrap(); + + let transitive = graph.get_transitive_callees(a_id); + assert_eq!(transitive.len(), 3); + + let names: HashSet<_> = transitive + .iter() + .map(|n| n.definition.name.as_str()) + .collect(); + assert!(names.contains("b")); + assert!(names.contains("c")); + assert!(names.contains("d")); + } + + #[test] + fn test_transitive_callees_with_cycle() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("a", "src/lib.rs"), + make_definition("b", "src/lib.rs"), + make_definition("c", "src/lib.rs"), + ], + calls: vec![ + make_call("b", Some("a"), "src/lib.rs"), + make_call("c", Some("b"), "src/lib.rs"), + make_call("a", Some("c"), "src/lib.rs"), + ], + imports: vec![], + }); + + let graph = CallGraph::build(&index, Path::new(".")); + let a_id = graph.find_node("a").unwrap(); + + let transitive = graph.get_transitive_callees(a_id); + assert_eq!(transitive.len(), 3); + } + + #[test] + fn test_post_order() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("a", "src/lib.rs"), + make_definition("b", "src/lib.rs"), + make_definition("c", "src/lib.rs"), + ], + calls: vec![ + make_call("b", Some("a"), "src/lib.rs"), + make_call("c", Some("b"), "src/lib.rs"), + ], + imports: vec![], + }); + + let graph = CallGraph::build(&index, Path::new(".")); + let a_id = graph.find_node("a").unwrap(); + let b_id = graph.find_node("b").unwrap(); + let c_id = graph.find_node("c").unwrap(); + + let order = graph.post_order(a_id); + + let c_pos = order.iter().position(|&id| id == c_id).unwrap(); + let b_pos = order.iter().position(|&id| id == b_id).unwrap(); + let a_pos = order.iter().position(|&id| id == a_id).unwrap(); + + assert!(c_pos < b_pos); + assert!(b_pos < a_pos); + } + + #[test] + fn test_post_order_with_cycle() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("a", "src/lib.rs"), + make_definition("b", "src/lib.rs"), + ], + calls: vec![ + make_call("b", Some("a"), "src/lib.rs"), + make_call("a", Some("b"), "src/lib.rs"), + ], + imports: vec![], + }); + + let graph = CallGraph::build(&index, Path::new(".")); + let a_id = graph.find_node("a").unwrap(); + + let order = graph.post_order(a_id); + assert_eq!(order.len(), 2); + } + + #[test] + fn test_post_order_definitions() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("main", "src/lib.rs"), + make_definition("init", "src/lib.rs"), + ], + calls: vec![make_call("init", Some("main"), "src/lib.rs")], + imports: vec![], + }); + + let graph = CallGraph::build(&index, Path::new(".")); + let main_id = graph.find_node("main").unwrap(); + + let defs = graph.post_order_definitions(main_id); + assert_eq!(defs.len(), 2); + assert_eq!(defs[0].name, "init"); + assert_eq!(defs[1].name, "main"); + } + + #[test] + fn test_no_self_loops() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![make_definition("recursive", "src/lib.rs")], + calls: vec![make_call("recursive", Some("recursive"), "src/lib.rs")], + imports: vec![], + }); + + let graph = CallGraph::build(&index, Path::new(".")); + let id = graph.find_node("recursive").unwrap(); + let node = graph.get_node(id).unwrap(); + + assert!(node.callees.is_empty()); + assert!(node.callers.is_empty()); + } + + #[test] + fn test_cross_file_calls() { + let mut index = Index::new(); + + index.update(FileRecord { + path: PathBuf::from("src/main.rs"), + mtime: 0, + size: 0, + definitions: vec![make_definition("main", "src/main.rs")], + calls: vec![make_call("helper", Some("main"), "src/main.rs")], + imports: vec![], + }); + + index.update(FileRecord { + path: PathBuf::from("src/utils.rs"), + mtime: 0, + size: 0, + definitions: vec![make_definition("helper", "src/utils.rs")], + calls: vec![], + imports: vec![], + }); + + let graph = CallGraph::build(&index, Path::new(".")); + + let main_id = graph.find_node("main").unwrap(); + let callees = graph.get_callees(main_id); + + assert_eq!(callees.len(), 1); + assert_eq!(callees[0].definition.name, "helper"); + assert_eq!(callees[0].definition.file, PathBuf::from("src/utils.rs")); + } + + #[test] + fn test_find_node_by_file_and_name() { + let mut index = Index::new(); + + index.update(FileRecord { + path: PathBuf::from("src/a.rs"), + mtime: 0, + size: 0, + definitions: vec![make_definition("foo", "src/a.rs")], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: PathBuf::from("src/b.rs"), + mtime: 0, + size: 0, + definitions: vec![make_definition("foo", "src/b.rs")], + calls: vec![], + imports: vec![], + }); + + let graph = CallGraph::build(&index, Path::new(".")); + + let a_id = graph.find_node_by_file_and_name(Path::new("src/a.rs"), "foo"); + let b_id = graph.find_node_by_file_and_name(Path::new("src/b.rs"), "foo"); + + assert!(a_id.is_some()); + assert!(b_id.is_some()); + assert_ne!(a_id, b_id); + + let a_node = graph.get_node(a_id.unwrap()).unwrap(); + let b_node = graph.get_node(b_id.unwrap()).unwrap(); + + assert_eq!(a_node.definition.file, PathBuf::from("src/a.rs")); + assert_eq!(b_node.definition.file, PathBuf::from("src/b.rs")); } } From 8f3c45a06bf3e14ee2039d8894db9498e7f74e92 Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 00:52:39 -0800 Subject: [PATCH 22/35] test: add integration tests for resolver and call graph --- crates/code/tests/integration.rs | 850 +++++++++++++++++++++++++++++++ 1 file changed, 850 insertions(+) create mode 100644 crates/code/tests/integration.rs diff --git a/crates/code/tests/integration.rs b/crates/code/tests/integration.rs new file mode 100644 index 0000000..780f201 --- /dev/null +++ b/crates/code/tests/integration.rs @@ -0,0 +1,850 @@ +use std::collections::HashSet; +use std::fs; +use std::path::Path; + +use glimpse_code::extract::Extractor; +use glimpse_code::graph::CallGraph; +use glimpse_code::index::{file_fingerprint, FileRecord, Index}; +use glimpse_code::resolve::{resolve_by_index, resolve_by_search, resolve_same_file, Resolver}; +use tree_sitter::Parser; + +fn index_file(index: &mut Index, extractor: &Extractor, path: &Path, source: &str) { + let mut parser = Parser::new(); + parser.set_language(extractor.language()).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + let (mtime, size) = file_fingerprint(path).unwrap_or((0, source.len() as u64)); + + let record = FileRecord { + path: path.to_path_buf(), + mtime, + size, + definitions: extractor.extract_definitions(&tree, source.as_bytes(), path), + calls: extractor.extract_calls(&tree, source.as_bytes(), path), + imports: extractor.extract_imports(&tree, source.as_bytes(), path), + }; + + index.update(record); +} + +mod resolver_tests { + use super::*; + use glimpse_code::index::{Call, Definition, DefinitionKind, FileRecord, Import, Span}; + use std::path::PathBuf; + use tempfile::TempDir; + + fn make_span() -> Span { + Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 1, + } + } + + fn make_def(name: &str, file: &Path) -> Definition { + Definition { + name: name.to_string(), + kind: DefinitionKind::Function, + span: make_span(), + file: file.to_path_buf(), + } + } + + #[test] + fn test_resolve_same_file_priority() { + let mut index = Index::new(); + let file_a = PathBuf::from("src/a.rs"); + let file_b = PathBuf::from("src/b.rs"); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("helper", &file_a)], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: file_b.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("helper", &file_b)], + calls: vec![], + imports: vec![], + }); + + let from_a = resolve_same_file("helper", &file_a, &index); + assert!(from_a.is_some()); + assert_eq!(from_a.unwrap().file, file_a); + + let from_b = resolve_same_file("helper", &file_b, &index); + assert!(from_b.is_some()); + assert_eq!(from_b.unwrap().file, file_b); + + let not_found = resolve_same_file("nonexistent", &file_a, &index); + assert!(not_found.is_none()); + } + + #[test] + fn test_resolve_by_index_cross_file() { + let mut index = Index::new(); + let file_a = PathBuf::from("src/a.rs"); + let file_b = PathBuf::from("src/b.rs"); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("func_a", &file_a)], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: file_b.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("func_b", &file_b)], + calls: vec![], + imports: vec![], + }); + + let found_a = resolve_by_index("func_a", &index); + assert!(found_a.is_some()); + assert_eq!(found_a.unwrap().file, file_a); + + let found_b = resolve_by_index("func_b", &index); + assert!(found_b.is_some()); + assert_eq!(found_b.unwrap().file, file_b); + } + + #[test] + fn test_resolve_by_search_rust() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("lib.rs"), + "pub fn my_searched_function() {\n println!(\"found\");\n}\n", + ) + .unwrap(); + + let found = resolve_by_search("my_searched_function", dir.path()).unwrap(); + assert!(found.is_some()); + let def = found.unwrap(); + assert_eq!(def.name, "my_searched_function"); + assert!(def.file.ends_with("lib.rs")); + } + + #[test] + fn test_resolve_by_search_python() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("utils.py"), + "def searched_python_func():\n pass\n", + ) + .unwrap(); + + let found = resolve_by_search("searched_python_func", dir.path()).unwrap(); + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "searched_python_func"); + } + + #[test] + fn test_resolve_by_search_go() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("main.go"), + "package main\n\nfunc searchedGoFunc() {\n}\n", + ) + .unwrap(); + + let found = resolve_by_search("searchedGoFunc", dir.path()).unwrap(); + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "searchedGoFunc"); + } + + #[test] + fn test_resolve_by_search_typescript() { + let dir = TempDir::new().unwrap(); + + fs::write( + dir.path().join("index.ts"), + "function searchedTsFunc() {\n return 42;\n}\n", + ) + .unwrap(); + + let found = resolve_by_search("searchedTsFunc", dir.path()).unwrap(); + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "searchedTsFunc"); + } + + #[test] + fn test_resolve_by_search_not_found() { + let dir = TempDir::new().unwrap(); + + fs::write(dir.path().join("empty.rs"), "// no functions here\n").unwrap(); + + let found = resolve_by_search("nonexistent_function", dir.path()).unwrap(); + assert!(found.is_none()); + } + + #[test] + fn test_resolver_resolution_chain() { + let dir = TempDir::new().unwrap(); + let mut index = Index::new(); + + let file_main = dir.path().join("main.rs"); + let file_utils = dir.path().join("utils.rs"); + + fs::write(&file_main, "fn main() { helper(); }").unwrap(); + fs::write(&file_utils, "pub fn helper() { nested(); }\npub fn nested() {}").unwrap(); + + index.update(FileRecord { + path: file_main.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("main", &file_main)], + calls: vec![Call { + callee: "helper".to_string(), + caller: Some("main".to_string()), + span: make_span(), + file: file_main.clone(), + }], + imports: vec![], + }); + + index.update(FileRecord { + path: file_utils.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("helper", &file_utils), make_def("nested", &file_utils)], + calls: vec![], + imports: vec![], + }); + + let resolver = Resolver::new(&index, dir.path().to_path_buf()); + + let found = resolver.resolve("helper", &file_main).unwrap(); + assert!(found.is_some()); + assert_eq!(found.unwrap().file, file_utils); + + let same_file = resolver.resolve("nested", &file_utils).unwrap(); + assert!(same_file.is_some()); + assert_eq!(same_file.unwrap().file, file_utils); + } + + #[test] + fn test_resolver_grep_fallback() { + let dir = TempDir::new().unwrap(); + let index = Index::new(); + + fs::write( + dir.path().join("hidden.rs"), + "fn not_indexed_function() {\n println!(\"hidden\");\n}\n", + ) + .unwrap(); + + let resolver = Resolver::new(&index, dir.path().to_path_buf()); + let from_file = dir.path().join("caller.rs"); + + let found = resolver.resolve("not_indexed_function", &from_file).unwrap(); + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "not_indexed_function"); + + let discovered = resolver.files_to_index(); + assert!(!discovered.is_empty()); + } + + #[test] + fn test_resolver_tracks_discovered_files() { + let dir = TempDir::new().unwrap(); + let index = Index::new(); + + fs::write(dir.path().join("a.rs"), "fn discovered_a() {}").unwrap(); + fs::write(dir.path().join("b.rs"), "fn discovered_b() {}").unwrap(); + + let resolver = Resolver::new(&index, dir.path().to_path_buf()); + let from_file = dir.path().join("main.rs"); + + resolver.resolve("discovered_a", &from_file).unwrap(); + resolver.resolve("discovered_b", &from_file).unwrap(); + + let discovered = resolver.files_to_index(); + assert_eq!(discovered.len(), 2); + + resolver.clear_discovered(); + assert!(resolver.files_to_index().is_empty()); + } + + #[test] + fn test_resolver_with_imports() { + let dir = TempDir::new().unwrap(); + let src = dir.path().join("src"); + fs::create_dir_all(&src).unwrap(); + + fs::write(src.join("utils.rs"), "pub fn imported_helper() {}").unwrap(); + + let mut index = Index::new(); + let main_file = dir.path().join("src/main.rs"); + + index.update(FileRecord { + path: src.join("utils.rs"), + mtime: 0, + size: 0, + definitions: vec![make_def("imported_helper", &src.join("utils.rs"))], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: main_file.clone(), + mtime: 0, + size: 0, + definitions: vec![], + calls: vec![], + imports: vec![Import { + module_path: "crate::utils::imported_helper".to_string(), + alias: None, + span: make_span(), + file: main_file.clone(), + }], + }); + + let resolver = Resolver::new(&index, dir.path().to_path_buf()); + + let found = resolver.resolve("imported_helper", &main_file).unwrap(); + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "imported_helper"); + } +} + +mod call_graph_resolution { + use super::*; + use glimpse_code::index::{Call, Definition, DefinitionKind, FileRecord, Span}; + use tempfile::TempDir; + + fn make_span() -> Span { + Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 1, + } + } + + fn make_def(name: &str, file: &Path) -> Definition { + Definition { + name: name.to_string(), + kind: DefinitionKind::Function, + span: make_span(), + file: file.to_path_buf(), + } + } + + #[test] + fn test_graph_resolves_cross_file_calls() { + let dir = TempDir::new().unwrap(); + let file_a = dir.path().join("a.rs"); + let file_b = dir.path().join("b.rs"); + + fs::write(&file_a, "fn caller() { callee(); }").unwrap(); + fs::write(&file_b, "pub fn callee() {}").unwrap(); + + let mut index = Index::new(); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("caller", &file_a)], + calls: vec![Call { + callee: "callee".to_string(), + caller: Some("caller".to_string()), + span: make_span(), + file: file_a.clone(), + }], + imports: vec![], + }); + + index.update(FileRecord { + path: file_b.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("callee", &file_b)], + calls: vec![], + imports: vec![], + }); + + let graph = CallGraph::build(&index, dir.path()); + + let caller_id = graph.find_node("caller").unwrap(); + let callees = graph.get_callees(caller_id); + + assert_eq!(callees.len(), 1); + assert_eq!(callees[0].definition.name, "callee"); + assert_eq!(callees[0].definition.file, file_b); + } + + #[test] + fn test_resolver_finds_unindexed_via_grep() { + let dir = TempDir::new().unwrap(); + let file_caller = dir.path().join("caller.rs"); + let file_hidden = dir.path().join("hidden.rs"); + + fs::write(&file_caller, "fn caller() { hidden_func(); }").unwrap(); + fs::write(&file_hidden, "fn hidden_func() {}").unwrap(); + + let index = Index::new(); + let resolver = Resolver::new(&index, dir.path().to_path_buf()); + + let found = resolver.resolve("hidden_func", &file_caller).unwrap(); + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "hidden_func"); + + let discovered = resolver.files_to_index(); + assert!(!discovered.is_empty()); + } + + #[test] + fn test_graph_same_name_different_files() { + let dir = TempDir::new().unwrap(); + let file_a = dir.path().join("a.rs"); + let file_b = dir.path().join("b.rs"); + let file_main = dir.path().join("main.rs"); + + fs::write(&file_a, "fn helper() {}").unwrap(); + fs::write(&file_b, "fn helper() {}").unwrap(); + fs::write(&file_main, "fn main() { helper(); }").unwrap(); + + let mut index = Index::new(); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("helper", &file_a)], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: file_b.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("helper", &file_b)], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: file_main.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("main", &file_main)], + calls: vec![Call { + callee: "helper".to_string(), + caller: Some("main".to_string()), + span: make_span(), + file: file_main.clone(), + }], + imports: vec![], + }); + + let graph = CallGraph::build(&index, dir.path()); + + assert_eq!(graph.node_count(), 3); + + let a_id = graph.find_node_by_file_and_name(&file_a, "helper"); + let b_id = graph.find_node_by_file_and_name(&file_b, "helper"); + assert!(a_id.is_some()); + assert!(b_id.is_some()); + assert_ne!(a_id, b_id); + } + + #[test] + fn test_graph_transitive_through_resolution() { + let dir = TempDir::new().unwrap(); + let file_a = dir.path().join("a.rs"); + let file_b = dir.path().join("b.rs"); + let file_c = dir.path().join("c.rs"); + + fs::write(&file_a, "fn entry() { middle(); }").unwrap(); + fs::write(&file_b, "fn middle() { leaf(); }").unwrap(); + fs::write(&file_c, "fn leaf() {}").unwrap(); + + let mut index = Index::new(); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("entry", &file_a)], + calls: vec![Call { + callee: "middle".to_string(), + caller: Some("entry".to_string()), + span: make_span(), + file: file_a.clone(), + }], + imports: vec![], + }); + + index.update(FileRecord { + path: file_b.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("middle", &file_b)], + calls: vec![Call { + callee: "leaf".to_string(), + caller: Some("middle".to_string()), + span: make_span(), + file: file_b.clone(), + }], + imports: vec![], + }); + + index.update(FileRecord { + path: file_c.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("leaf", &file_c)], + calls: vec![], + imports: vec![], + }); + + let graph = CallGraph::build(&index, dir.path()); + + let entry_id = graph.find_node("entry").unwrap(); + let transitive = graph.get_transitive_callees(entry_id); + + assert_eq!(transitive.len(), 2); + + let names: HashSet<_> = transitive.iter().map(|n| n.definition.name.as_str()).collect(); + assert!(names.contains("middle")); + assert!(names.contains("leaf")); + + let order = graph.post_order_definitions(entry_id); + assert_eq!(order.len(), 3); + assert_eq!(order[0].name, "leaf"); + assert_eq!(order[1].name, "middle"); + assert_eq!(order[2].name, "entry"); + } + + #[test] + fn test_graph_unresolved_calls_ignored() { + let dir = TempDir::new().unwrap(); + let file_a = dir.path().join("a.rs"); + + fs::write(&file_a, "fn caller() { nonexistent(); }").unwrap(); + + let mut index = Index::new(); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("caller", &file_a)], + calls: vec![Call { + callee: "nonexistent".to_string(), + caller: Some("caller".to_string()), + span: make_span(), + file: file_a.clone(), + }], + imports: vec![], + }); + + let graph = CallGraph::build(&index, dir.path()); + + let caller_id = graph.find_node("caller").unwrap(); + let callees = graph.get_callees(caller_id); + + assert!(callees.is_empty()); + } +} + +mod language_extraction { + use super::*; + use tempfile::TempDir; + + #[test] + #[ignore] + fn test_rust_full_pipeline() { + let dir = TempDir::new().unwrap(); + let src = dir.path().join("src"); + fs::create_dir_all(&src).unwrap(); + + let main_rs = r#" +mod utils; + +fn main() { + let config = load_config(); + utils::process(config); +} + +fn load_config() -> Config { + Config::default() +} + +struct Config { + data: String, +} + +impl Default for Config { + fn default() -> Self { + Self { data: String::new() } + } +} +"#; + + let utils_rs = r#" +use crate::Config; + +pub fn process(cfg: Config) { + validate(&cfg); + save(&cfg); +} + +fn validate(cfg: &Config) { + check_data(cfg); +} + +fn check_data(_cfg: &Config) {} + +fn save(cfg: &Config) { + write_file(&cfg.data); +} + +fn write_file(_data: &str) {} +"#; + + fs::write(src.join("main.rs"), main_rs).unwrap(); + fs::write(src.join("utils.rs"), utils_rs).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("rust").unwrap(); + + index_file(&mut index, &extractor, &src.join("main.rs"), main_rs); + index_file(&mut index, &extractor, &src.join("utils.rs"), utils_rs); + + let graph = CallGraph::build(&index, dir.path()); + + assert!(graph.node_count() >= 5); + + if let Some(process_id) = graph.find_node("process") { + let callees = graph.get_callees(process_id); + let names: HashSet<_> = callees.iter().map(|n| n.definition.name.as_str()).collect(); + assert!(names.contains("validate") || names.contains("save")); + } + } + + #[test] + #[ignore] + fn test_python_full_pipeline() { + let dir = TempDir::new().unwrap(); + + let main_py = r#" +from utils import helper + +def main(): + data = load() + result = process(data) + helper(result) + +def load(): + return read_file() + +def read_file(): + return "data" + +def process(data): + return transform(data) + +def transform(x): + return x.upper() + +if __name__ == "__main__": + main() +"#; + + let utils_py = r#" +def helper(data): + print(data) + format_output(data) + +def format_output(s): + return s.strip() +"#; + + fs::write(dir.path().join("main.py"), main_py).unwrap(); + fs::write(dir.path().join("utils.py"), utils_py).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("python").unwrap(); + + index_file(&mut index, &extractor, &dir.path().join("main.py"), main_py); + index_file(&mut index, &extractor, &dir.path().join("utils.py"), utils_py); + + let graph = CallGraph::build(&index, dir.path()); + + if let Some(main_id) = graph.find_node("main") { + let transitive = graph.get_transitive_callees(main_id); + assert!(!transitive.is_empty()); + } + } + + #[test] + #[ignore] + fn test_typescript_full_pipeline() { + let dir = TempDir::new().unwrap(); + + let main_ts = r#" +import { helper } from './utils'; + +function main() { + const result = processData(); + helper(result); +} + +function processData(): string { + return transform("input"); +} + +function transform(input: string): string { + return input.toUpperCase(); +} + +main(); +"#; + + let utils_ts = r#" +export function helper(data: string) { + console.log(data); + format(data); +} + +function format(s: string): string { + return s.trim(); +} +"#; + + fs::write(dir.path().join("main.ts"), main_ts).unwrap(); + fs::write(dir.path().join("utils.ts"), utils_ts).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("typescript").unwrap(); + + index_file(&mut index, &extractor, &dir.path().join("main.ts"), main_ts); + index_file(&mut index, &extractor, &dir.path().join("utils.ts"), utils_ts); + + let graph = CallGraph::build(&index, dir.path()); + + if let Some(main_id) = graph.find_node("main") { + let callees = graph.get_callees(main_id); + assert!(!callees.is_empty()); + } + } + + #[test] + #[ignore] + fn test_go_full_pipeline() { + let dir = TempDir::new().unwrap(); + + let main_go = r#" +package main + +func main() { + config := loadConfig() + process(config) +} + +func loadConfig() *Config { + return &Config{} +} + +func process(cfg *Config) { + validate(cfg) + save(cfg) +} + +func validate(cfg *Config) {} + +func save(cfg *Config) {} + +type Config struct { + Name string +} +"#; + + fs::write(dir.path().join("main.go"), main_go).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("go").unwrap(); + + index_file(&mut index, &extractor, &dir.path().join("main.go"), main_go); + + let graph = CallGraph::build(&index, dir.path()); + + if let Some(main_id) = graph.find_node("main") { + let transitive = graph.get_transitive_callees(main_id); + assert!(transitive.len() >= 2); + } + } +} + +mod index_persistence { + use super::*; + use glimpse_code::index::{clear_index, load_index, save_index}; + use tempfile::TempDir; + + #[test] + fn test_save_and_load_preserves_data() { + let dir = TempDir::new().unwrap(); + + let mut index = Index::new(); + index.update(FileRecord { + path: dir.path().join("test.rs"), + mtime: 12345, + size: 100, + definitions: vec![], + calls: vec![], + imports: vec![], + }); + + save_index(&index, dir.path()).unwrap(); + + let loaded = load_index(dir.path()).unwrap().unwrap(); + assert_eq!(loaded.files.len(), 1); + assert!(loaded.get(&dir.path().join("test.rs")).is_some()); + + clear_index(dir.path()).unwrap(); + assert!(load_index(dir.path()).unwrap().is_none()); + } + + #[test] + fn test_index_staleness_detection() { + let dir = TempDir::new().unwrap(); + let file = dir.path().join("test.rs"); + + fs::write(&file, "fn test() {}").unwrap(); + + let (mtime, size) = file_fingerprint(&file).unwrap(); + + let mut index = Index::new(); + index.update(FileRecord { + path: file.clone(), + mtime, + size, + definitions: vec![], + calls: vec![], + imports: vec![], + }); + + assert!(!index.is_stale(&file, mtime, size)); + assert!(index.is_stale(&file, mtime + 1, size)); + assert!(index.is_stale(&file, mtime, size + 1)); + assert!(index.is_stale(&dir.path().join("other.rs"), mtime, size)); + } +} From 8b8e58810949c32151da3e8438d3eb48d220671d Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 01:02:40 -0800 Subject: [PATCH 23/35] fix: resolve unindexed definitions via imports and add them to call graph --- crates/code/src/graph.rs | 6 ++- crates/code/src/resolve.rs | 54 +++++++++++++++++++++++++ crates/code/tests/integration.rs | 69 ++++++++++++++++++++++++++++---- 3 files changed, 120 insertions(+), 9 deletions(-) diff --git a/crates/code/src/graph.rs b/crates/code/src/graph.rs index c35f5e0..430b731 100644 --- a/crates/code/src/graph.rs +++ b/crates/code/src/graph.rs @@ -50,7 +50,11 @@ impl CallGraph { .resolve(&call.callee, &call.file) .ok() .flatten() - .and_then(|def| graph.find_node(&def.name)) + .map(|def| { + graph + .find_node_by_file_and_name(&def.file, &def.name) + .unwrap_or_else(|| graph.add_definition(def)) + }) }); if let (Some(caller), Some(callee)) = (caller_id, callee_id) { diff --git a/crates/code/src/resolve.rs b/crates/code/src/resolve.rs index dda37fe..066023d 100644 --- a/crates/code/src/resolve.rs +++ b/crates/code/src/resolve.rs @@ -435,6 +435,54 @@ pub fn resolve_by_index(callee: &str, index: &Index) -> Option { index.definitions().find(|d| d.name == callee).cloned() } +fn search_file_for_def(callee: &str, file: &Path) -> Option { + use grep::regex::RegexMatcher; + use grep::searcher::sinks::UTF8; + use grep::searcher::Searcher; + + let ext = file.extension().and_then(|e| e.to_str()).unwrap_or(""); + let escaped = regex::escape(callee); + + for pattern_def in DEFINITION_PATTERNS { + if !pattern_def.extensions.contains(&ext) { + continue; + } + + let pattern = pattern_def.pattern.replace("{NAME}", &escaped); + let matcher = match RegexMatcher::new(&pattern) { + Ok(m) => m, + Err(_) => continue, + }; + + let mut found: Option = None; + + let _ = Searcher::new().search_path( + &matcher, + file, + UTF8(|line_num, _line| { + found = Some(line_num); + Ok(false) + }), + ); + + if let Some(line_num) = found { + return Some(Definition { + name: callee.to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 0, + start_line: line_num as usize, + end_line: line_num as usize, + }, + file: file.to_path_buf(), + }); + } + } + + None +} + pub fn resolve_by_search(callee: &str, root: &Path) -> Result> { use grep::regex::RegexMatcher; use grep::searcher::sinks::UTF8; @@ -551,6 +599,9 @@ impl<'a> Resolver<'a> { if let Some(def) = self.find_def_in_file(&resolved, callee) { return Some(def); } + if let Some(def) = search_file_for_def(callee, &resolved) { + return Some(def); + } } let patterns = normalize_to_patterns(&import.module_path, ext); @@ -559,6 +610,9 @@ impl<'a> Resolver<'a> { if let Some(def) = self.find_def_in_file(&resolved, callee) { return Some(def); } + if let Some(def) = search_file_for_def(callee, &resolved) { + return Some(def); + } } } diff --git a/crates/code/tests/integration.rs b/crates/code/tests/integration.rs index 780f201..04ff653 100644 --- a/crates/code/tests/integration.rs +++ b/crates/code/tests/integration.rs @@ -320,6 +320,45 @@ mod resolver_tests { assert!(found.is_some()); assert_eq!(found.unwrap().name, "imported_helper"); } + + #[test] + fn test_import_discovery_tracks_files_for_reindexing() { + let dir = TempDir::new().unwrap(); + let src = dir.path().join("src"); + let utils_dir = src.join("utils"); + fs::create_dir_all(&utils_dir).unwrap(); + + fs::write(utils_dir.join("helper.rs"), "pub fn helper() {}").unwrap(); + + let mut index = Index::new(); + let main_file = src.join("main.rs"); + + index.update(FileRecord { + path: main_file.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("main", &main_file)], + calls: vec![], + imports: vec![Import { + module_path: "crate::utils::helper".to_string(), + alias: None, + span: make_span(), + file: main_file.clone(), + }], + }); + + let resolver = Resolver::new(&index, dir.path().to_path_buf()); + + let found = resolver.resolve("helper", &main_file).unwrap(); + assert!(found.is_some(), "grep fallback should find unindexed definition"); + assert_eq!(found.unwrap().name, "helper"); + + let discovered = resolver.files_to_index(); + assert!( + discovered.iter().any(|p| p.ends_with("helper.rs")), + "should track helper.rs for re-indexing" + ); + } } mod call_graph_resolution { @@ -390,7 +429,7 @@ mod call_graph_resolution { } #[test] - fn test_resolver_finds_unindexed_via_grep() { + fn test_graph_uses_grep_fallback_for_unindexed() { let dir = TempDir::new().unwrap(); let file_caller = dir.path().join("caller.rs"); let file_hidden = dir.path().join("hidden.rs"); @@ -398,15 +437,29 @@ mod call_graph_resolution { fs::write(&file_caller, "fn caller() { hidden_func(); }").unwrap(); fs::write(&file_hidden, "fn hidden_func() {}").unwrap(); - let index = Index::new(); - let resolver = Resolver::new(&index, dir.path().to_path_buf()); + let mut index = Index::new(); - let found = resolver.resolve("hidden_func", &file_caller).unwrap(); - assert!(found.is_some()); - assert_eq!(found.unwrap().name, "hidden_func"); + index.update(FileRecord { + path: file_caller.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("caller", &file_caller)], + calls: vec![Call { + callee: "hidden_func".to_string(), + caller: Some("caller".to_string()), + span: make_span(), + file: file_caller.clone(), + }], + imports: vec![], + }); - let discovered = resolver.files_to_index(); - assert!(!discovered.is_empty()); + let graph = CallGraph::build(&index, dir.path()); + + let caller_id = graph.find_node("caller").unwrap(); + let callees = graph.get_callees(caller_id); + + assert_eq!(callees.len(), 1); + assert_eq!(callees[0].definition.name, "hidden_func"); } #[test] From 30247fa01ce62a3b0c472b8772a547f6a3e5f1a0 Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 01:28:32 -0800 Subject: [PATCH 24/35] refactor: flatten workspace into single crate - remove crates/ directory, consolidate into src/ - core, fetch, tui, code become submodules of the library - single Cargo.toml with all dependencies - remove BackwardsCompatOutputFormat, use serde rename_all - update imports: binary uses glimpse::, lib uses super:: --- AGENTS.md | 32 +- Cargo.lock | 340 +++++++++++++----- Cargo.toml | 82 ++--- crates/core/build.rs => build.rs | 7 +- crates/cli/Cargo.toml | 29 -- crates/code/Cargo.toml | 29 -- crates/core/Cargo.toml | 22 -- crates/fetch/Cargo.toml | 17 - crates/tui/Cargo.toml | 17 - {crates/cli/src => src}/analyzer.rs | 4 +- {crates/cli/src => src}/cli.rs | 12 +- {crates/code/src => src/code}/extract.rs | 0 {crates/code/src => src/code}/grammar.rs | 2 +- {crates/code/src => src/code}/graph.rs | 2 +- {crates/code/src => src/code}/index.rs | 0 crates/code/src/lib.rs => src/code/mod.rs | 0 {crates/code/src => src/code}/resolve.rs | 0 {crates/core/src => src/core}/config.rs | 63 +--- crates/core/src/lib.rs => src/core/mod.rs | 4 +- .../core/src => src/core}/source_detection.rs | 0 {crates/core/src => src/core}/tokenizer.rs | 2 +- {crates/core/src => src/core}/types.rs | 1 + {crates/fetch/src => src/fetch}/git.rs | 0 crates/fetch/src/lib.rs => src/fetch/mod.rs | 0 {crates/fetch/src => src/fetch}/url.rs | 0 src/lib.rs | 12 + {crates/cli/src => src}/main.rs | 17 +- {crates/cli/src => src}/output.rs | 2 +- {crates/tui/src => src/tui}/file_picker.rs | 0 crates/tui/src/lib.rs => src/tui/mod.rs | 0 {crates/code/tests => tests}/extraction.rs | 10 +- {crates/code/tests => tests}/integration.rs | 14 +- 32 files changed, 350 insertions(+), 370 deletions(-) rename crates/core/build.rs => build.rs (94%) delete mode 100644 crates/cli/Cargo.toml delete mode 100644 crates/code/Cargo.toml delete mode 100644 crates/core/Cargo.toml delete mode 100644 crates/fetch/Cargo.toml delete mode 100644 crates/tui/Cargo.toml rename {crates/cli/src => src}/analyzer.rs (99%) rename {crates/cli/src => src}/cli.rs (91%) rename {crates/code/src => src/code}/extract.rs (100%) rename {crates/code/src => src/code}/grammar.rs (99%) rename {crates/code/src => src/code}/graph.rs (99%) rename {crates/code/src => src/code}/index.rs (100%) rename crates/code/src/lib.rs => src/code/mod.rs (100%) rename {crates/code/src => src/code}/resolve.rs (100%) rename {crates/core/src => src/core}/config.rs (69%) rename crates/core/src/lib.rs => src/core/mod.rs (82%) rename {crates/core/src => src/core}/source_detection.rs (100%) rename {crates/core/src => src/core}/tokenizer.rs (99%) rename {crates/core/src => src/core}/types.rs (93%) rename {crates/fetch/src => src/fetch}/git.rs (100%) rename crates/fetch/src/lib.rs => src/fetch/mod.rs (100%) rename {crates/fetch/src => src/fetch}/url.rs (100%) create mode 100644 src/lib.rs rename {crates/cli/src => src}/main.rs (95%) rename {crates/cli/src => src}/output.rs (99%) rename {crates/tui/src => src/tui}/file_picker.rs (100%) rename crates/tui/src/lib.rs => src/tui/mod.rs (100%) rename {crates/code/tests => tests}/extraction.rs (98%) rename {crates/code/tests => tests}/integration.rs (98%) diff --git a/AGENTS.md b/AGENTS.md index 3395b9a..b7f00b1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -39,7 +39,6 @@ cargo run -- --help # show help ```bash cargo test # run all tests -cargo test --package glimpse-core # run tests for specific crate cargo test test_name # run single test by name cargo test test_name -- --nocapture # run test with stdout cargo test -- --test-threads=1 # run tests sequentially @@ -58,12 +57,20 @@ cargo clippy -- -D warnings # fail on warnings (CI) ``` glimpse/ -├── crates/ -│ ├── core/ # shared types, config, tokenizer, source detection -│ ├── fetch/ # git clone + url/html processing -│ ├── tui/ # file picker, output formatting, pdf generation -│ └── cli/ # binary crate, arg parsing, directory analyzer -└── languages.yml # language definitions for source detection +├── src/ +│ ├── main.rs # binary entry point +│ ├── lib.rs # library root +│ ├── cli.rs # CLI arg parsing +│ ├── analyzer.rs # directory processing +│ ├── output.rs # output formatting +│ ├── core/ # config, tokenizer, types, source detection +│ ├── fetch/ # git clone, url/html processing +│ ├── tui/ # file picker +│ └── code/ # code analysis (extract, graph, index, resolve) +├── tests/ # integration tests +├── languages.yml # language definitions for source detection +├── registry.toml # tree-sitter grammar registry +└── build.rs # generates language data from languages.yml ``` ## Code Style @@ -153,17 +160,6 @@ pub struct FileEntry { - Use `tempfile` for filesystem tests - Group related assertions -### Workspace Dependencies - -Always use workspace dependencies in crate Cargo.toml: - -```toml -[dependencies] -anyhow.workspace = true -serde.workspace = true -glimpse-core.workspace = true -``` - ### Patterns to Follow - Use `Option` combinators: `.map()`, `.and_then()`, `.unwrap_or()` diff --git a/Cargo.lock b/Cargo.lock index 9ef9f72..2c371fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -126,6 +126,22 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.4.0" @@ -344,6 +360,15 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "colored" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fde0e0ec90c9dfb3b4b1a0891a7dcd0e2bffde2f7efed5fe7c9bb00e5bfb915e" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "compact_str" version = "0.8.1" @@ -455,7 +480,7 @@ dependencies = [ "crossterm_winapi", "mio", "parking_lot", - "rustix", + "rustix 0.38.44", "signal-hook", "signal-hook-mio", "winapi", @@ -679,6 +704,12 @@ dependencies = [ "encoding_rs", ] +[[package]] +name = "env_home" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" + [[package]] name = "equivalent" version = "1.0.2" @@ -952,86 +983,37 @@ dependencies = [ "anyhow", "arboard", "base64 0.22.1", - "clap", - "glimpse-core", - "glimpse-fetch", - "glimpse-tui", - "ignore", - "indicatif", - "num-format", - "printpdf", - "rayon", - "serde", - "tempfile", -] - -[[package]] -name = "glimpse-code" -version = "0.7.8" -dependencies = [ - "anyhow", "bincode", "cc", + "clap", + "crossterm", "dirs", "git2", - "glimpse-core", - "glimpse-fetch", "glob", "grep", + "ignore", + "indicatif", "libloading", + "mockito", + "num-format", "once_cell", + "printpdf", + "ratatui", + "rayon", "regex", + "reqwest", + "scraper", "serde", "serde_json", - "tempfile", - "toml", - "tree-sitter", - "walkdir", -] - -[[package]] -name = "glimpse-core" -version = "0.7.8" -dependencies = [ - "anyhow", - "dirs", - "once_cell", - "serde", "serde_yaml", "tempfile", "tiktoken-rs", "tokenizers", "toml", -] - -[[package]] -name = "glimpse-fetch" -version = "0.7.8" -dependencies = [ - "anyhow", - "arboard", - "git2", - "glimpse-core", - "indicatif", - "reqwest", - "scraper", - "tempfile", + "tree-sitter", "url", -] - -[[package]] -name = "glimpse-tui" -version = "0.7.8" -dependencies = [ - "anyhow", - "arboard", - "base64 0.22.1", - "crossterm", - "glimpse-core", - "ignore", - "num-format", - "printpdf", - "ratatui", + "walkdir", + "which", ] [[package]] @@ -1143,7 +1125,26 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.4.0", "indexmap", "slab", "tokio", @@ -1178,7 +1179,7 @@ dependencies = [ "indicatif", "log", "native-tls", - "rand", + "rand 0.8.5", "serde", "serde_json", "thiserror", @@ -1210,6 +1211,16 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + [[package]] name = "http-body" version = "0.4.6" @@ -1217,7 +1228,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", - "http", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.4.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -1243,9 +1277,9 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", "httparse", "httpdate", "itoa", @@ -1257,6 +1291,28 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2 0.4.12", + "http 1.4.0", + "http-body 1.0.1", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", +] + [[package]] name = "hyper-tls" version = "0.5.0" @@ -1264,12 +1320,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper", + "hyper 0.14.32", "native-tls", "tokio", "tokio-native-tls", ] +[[package]] +name = "hyper-util" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.8.1", + "pin-project-lite", + "tokio", +] + [[package]] name = "icu_collections" version = "1.5.0" @@ -1640,6 +1711,12 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + [[package]] name = "litemap" version = "0.7.4" @@ -1779,6 +1856,31 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "mockito" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e0603425789b4a70fcc4ac4f5a46a566c116ee3e2a6b768dc623f7719c611de" +dependencies = [ + "assert-json-diff", + "bytes", + "colored", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "log", + "pin-project-lite", + "rand 0.9.2", + "regex", + "serde_json", + "serde_urlencoded", + "similar", + "tokio", +] + [[package]] name = "monostate" version = "0.1.13" @@ -2162,7 +2264,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" dependencies = [ "phf_shared 0.10.0", - "rand", + "rand 0.8.5", ] [[package]] @@ -2172,7 +2274,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared 0.11.3", - "rand", + "rand 0.8.5", ] [[package]] @@ -2319,8 +2421,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.3", ] [[package]] @@ -2330,7 +2442,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.3", ] [[package]] @@ -2342,6 +2464,15 @@ dependencies = [ "getrandom 0.2.15", ] +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.1", +] + [[package]] name = "ratatui" version = "0.29.0" @@ -2454,10 +2585,10 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2", - "http", - "http-body", - "hyper", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", "hyper-tls", "ipnet", "js-sys", @@ -2518,7 +2649,20 @@ dependencies = [ "bitflags 2.8.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags 2.8.0", + "errno", + "libc", + "linux-raw-sys 0.11.0", "windows-sys 0.59.0", ] @@ -2775,6 +2919,12 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + [[package]] name = "siphasher" version = "0.3.11" @@ -2971,7 +3121,7 @@ dependencies = [ "fastrand", "getrandom 0.3.1", "once_cell", - "rustix", + "rustix 0.38.44", "windows-sys 0.59.0", ] @@ -3102,7 +3252,7 @@ dependencies = [ "monostate", "onig", "paste", - "rand", + "rand 0.8.5", "rayon", "rayon-cond", "regex", @@ -3126,6 +3276,7 @@ dependencies = [ "bytes", "libc", "mio", + "parking_lot", "pin-project-lite", "socket2", "windows-sys 0.52.0", @@ -3499,7 +3650,7 @@ checksum = "b7208998eaa3870dad37ec8836979581506e0c5c64c20c9e79e9d2a10d6f47bf" dependencies = [ "cc", "downcast-rs", - "rustix", + "rustix 0.38.44", "smallvec", "wayland-sys", ] @@ -3511,7 +3662,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2120de3d33638aaef5b9f4472bff75f07c56379cf76ea320bd3a3d65ecaf73f" dependencies = [ "bitflags 2.8.0", - "rustix", + "rustix 0.38.44", "wayland-backend", "wayland-scanner", ] @@ -3596,6 +3747,17 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" +[[package]] +name = "which" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fabb953106c3c8eea8306e4393700d7657561cb43122571b172bbfb7c7ba1d" +dependencies = [ + "env_home", + "rustix 1.1.3", + "winsafe", +] + [[package]] name = "winapi" version = "0.3.9" @@ -3800,6 +3962,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "winsafe" +version = "0.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" + [[package]] name = "wit-bindgen-rt" version = "0.33.0" @@ -3848,7 +4016,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d91ffca73ee7f68ce055750bf9f6eca0780b8c85eff9bc046a3b0da41755e12" dependencies = [ "gethostname", - "rustix", + "rustix 0.38.44", "x11rb-protocol", ] diff --git a/Cargo.toml b/Cargo.toml index 1fb96f2..3dc82bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,68 +1,56 @@ -[workspace] -resolver = "2" -members = [ - "crates/cli", - "crates/code", - "crates/core", - "crates/fetch", - "crates/tui" -] - -[workspace.package] +[package] +name = "glimpse" version = "0.7.8" edition = "2021" license = "MIT" +description = "A blazingly fast tool for peeking at codebases" -[workspace.dependencies] -# Internal crates -glimpse-code = { path = "crates/code" } -glimpse-core = { path = "crates/core" } -glimpse-fetch = { path = "crates/fetch" } -glimpse-tui = { path = "crates/tui" } +[[bin]] +name = "glimpse" +path = "src/main.rs" -# Common dependencies -anyhow = "1.0.95" -serde = { version = "1.0.217", features = ["derive"] } -rayon = "1.10.0" +[lib] +name = "glimpse" +path = "src/lib.rs" -# Code analysis dependencies +[dependencies] +anyhow = "1.0.95" +arboard = { version = "3.4.1", features = ["wayland-data-control"] } +base64 = "0.22.1" bincode = "1.3" cc = "1.2" -grep = "0.4" -libloading = "0.8" -tree-sitter = "0.25" - -# Core dependencies +clap = { version = "4.5.23", features = ["derive"] } +crossterm = "0.28.1" dirs = "5.0.1" +git2 = "0.18" glob = "0.3" +grep = "0.4" +ignore = "0.4.23" +indicatif = "0.17.9" +libloading = "0.8" +num-format = "0.4.4" +once_cell = "1.20.2" +printpdf = "0.7.0" +ratatui = "0.29.0" +rayon = "1.10.0" regex = "1.11" +reqwest = { version = "0.11", features = ["blocking"] } +scraper = "0.18" +serde = { version = "1.0.217", features = ["derive"] } serde_json = "1.0" -once_cell = "1.20.2" tempfile = "3.14.0" tiktoken-rs = "0.6.0" tokenizers = { version = "0.21.0", features = ["http"] } toml = "0.8.19" - -# Fetch dependencies -arboard = { version = "3.4.1", features = ["wayland-data-control"] } -git2 = "0.18" -indicatif = "0.17.9" -reqwest = { version = "0.11", features = ["blocking"] } -scraper = "0.18" +tree-sitter = "0.25" url = "2.5" +walkdir = "2.5.0" -# TUI dependencies -base64 = "0.22.1" -crossterm = "0.28.1" -ignore = "0.4.23" -num-format = { version = "0.4.4" } -printpdf = "0.7.0" -ratatui = "0.29.0" - -# CLI dependencies -clap = { version = "4.5.23", features = ["derive"] } +[build-dependencies] +serde = { version = "1.0.217", features = ["derive"] } +serde_yaml = "0.9" -# Test dependencies +[dev-dependencies] +tempfile = "3.14.0" mockito = "1.4" -walkdir = "2.5.0" which = "8.0.0" diff --git a/crates/core/build.rs b/build.rs similarity index 94% rename from crates/core/build.rs rename to build.rs index a37c73c..5b12af4 100644 --- a/crates/core/build.rs +++ b/build.rs @@ -20,12 +20,7 @@ struct Language { fn main() { let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); - let languages_path = Path::new(&manifest_dir) - .parent() - .unwrap() - .parent() - .unwrap() - .join("languages.yml"); + let languages_path = Path::new(&manifest_dir).join("languages.yml"); println!( "cargo:rerun-if-changed={}", diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml deleted file mode 100644 index 6d58158..0000000 --- a/crates/cli/Cargo.toml +++ /dev/null @@ -1,29 +0,0 @@ -[package] -name = "glimpse" -version.workspace = true -edition.workspace = true -license.workspace = true -description = "A blazingly fast tool for peeking at codebases" - -[[bin]] -name = "glimpse" -path = "src/main.rs" - -[dependencies] -glimpse-core.workspace = true -glimpse-fetch.workspace = true -glimpse-tui.workspace = true - -anyhow.workspace = true -arboard.workspace = true -base64.workspace = true -clap.workspace = true -ignore.workspace = true -indicatif.workspace = true -num-format.workspace = true -printpdf.workspace = true -rayon.workspace = true -serde.workspace = true - -[dev-dependencies] -tempfile.workspace = true diff --git a/crates/code/Cargo.toml b/crates/code/Cargo.toml deleted file mode 100644 index 11d39ea..0000000 --- a/crates/code/Cargo.toml +++ /dev/null @@ -1,29 +0,0 @@ -[package] -name = "glimpse-code" -version.workspace = true -edition.workspace = true -license.workspace = true - -[dependencies] -anyhow.workspace = true -bincode.workspace = true -cc.workspace = true -dirs.workspace = true -git2.workspace = true -glob.workspace = true -grep.workspace = true -libloading.workspace = true -once_cell.workspace = true -regex.workspace = true -serde.workspace = true -serde_json.workspace = true -tempfile.workspace = true -toml.workspace = true -tree-sitter.workspace = true -walkdir.workspace = true - -glimpse-core.workspace = true -glimpse-fetch.workspace = true - -[dev-dependencies] -tempfile.workspace = true diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml deleted file mode 100644 index a1e75aa..0000000 --- a/crates/core/Cargo.toml +++ /dev/null @@ -1,22 +0,0 @@ -[package] -name = "glimpse-core" -version.workspace = true -edition.workspace = true -license.workspace = true - -[dependencies] -anyhow.workspace = true -dirs.workspace = true -once_cell.workspace = true -serde.workspace = true -tempfile.workspace = true -tiktoken-rs.workspace = true -tokenizers.workspace = true -toml.workspace = true - -[build-dependencies] -serde = { version = "1.0.217", features = ["derive"] } -serde_yaml = "0.9" - -[dev-dependencies] -tempfile.workspace = true diff --git a/crates/fetch/Cargo.toml b/crates/fetch/Cargo.toml deleted file mode 100644 index e6968e0..0000000 --- a/crates/fetch/Cargo.toml +++ /dev/null @@ -1,17 +0,0 @@ -[package] -name = "glimpse-fetch" -version.workspace = true -edition.workspace = true -license.workspace = true - -[dependencies] -glimpse-core.workspace = true - -anyhow.workspace = true -arboard.workspace = true -git2.workspace = true -indicatif.workspace = true -reqwest.workspace = true -scraper.workspace = true -tempfile.workspace = true -url.workspace = true diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml deleted file mode 100644 index eb67d11..0000000 --- a/crates/tui/Cargo.toml +++ /dev/null @@ -1,17 +0,0 @@ -[package] -name = "glimpse-tui" -version.workspace = true -edition.workspace = true -license.workspace = true - -[dependencies] -glimpse-core.workspace = true - -anyhow.workspace = true -arboard.workspace = true -base64.workspace = true -crossterm.workspace = true -ignore.workspace = true -num-format.workspace = true -printpdf.workspace = true -ratatui.workspace = true diff --git a/crates/cli/src/analyzer.rs b/src/analyzer.rs similarity index 99% rename from crates/cli/src/analyzer.rs rename to src/analyzer.rs index 150afde..eb84abb 100644 --- a/crates/cli/src/analyzer.rs +++ b/src/analyzer.rs @@ -6,8 +6,8 @@ use ignore::{overrides::OverrideBuilder, WalkBuilder}; use indicatif::{ProgressBar, ProgressStyle}; use rayon::prelude::*; -use glimpse_core::{is_source_file, Exclude, FileEntry, OutputFormat, TokenCounter, TokenizerType}; -use glimpse_tui::FilePicker; +use glimpse::tui::FilePicker; +use glimpse::{is_source_file, Exclude, FileEntry, OutputFormat, TokenCounter, TokenizerType}; use crate::cli::Cli; use crate::output::{display_token_counts, generate_output, generate_pdf, handle_output}; diff --git a/crates/cli/src/cli.rs b/src/cli.rs similarity index 91% rename from crates/cli/src/cli.rs rename to src/cli.rs index f1ed9ab..fd5a5ed 100644 --- a/crates/cli/src/cli.rs +++ b/src/cli.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; use clap::{Parser, ValueEnum}; use serde::{Deserialize, Serialize}; -use glimpse_core::{BackwardsCompatOutputFormat, Config, Exclude, OutputFormat, TokenizerType}; +use glimpse::{Config, Exclude, OutputFormat, TokenizerType}; #[derive(Debug, Clone, ValueEnum, Serialize, Deserialize)] pub enum CliOutputFormat { @@ -32,13 +32,6 @@ impl From for CliOutputFormat { } } -impl From for CliOutputFormat { - fn from(format: BackwardsCompatOutputFormat) -> Self { - let output_format: OutputFormat = format.into(); - output_format.into() - } -} - #[derive(Debug, Clone, ValueEnum)] pub enum CliTokenizerType { Tiktoken, @@ -138,8 +131,7 @@ impl Cli { cli.max_size = cli.max_size.or(Some(config.max_size)); cli.max_depth = cli.max_depth.or(Some(config.max_depth)); - let output_format: OutputFormat = config.default_output_format.clone().into(); - cli.output = cli.output.or(Some(CliOutputFormat::from(output_format))); + cli.output = cli.output.or(Some(config.default_output_format.clone().into())); if let Some(mut excludes) = cli.exclude.take() { excludes.extend(config.default_excludes.clone()); diff --git a/crates/code/src/extract.rs b/src/code/extract.rs similarity index 100% rename from crates/code/src/extract.rs rename to src/code/extract.rs diff --git a/crates/code/src/grammar.rs b/src/code/grammar.rs similarity index 99% rename from crates/code/src/grammar.rs rename to src/code/grammar.rs index e8481a0..f336bb3 100644 --- a/crates/code/src/grammar.rs +++ b/src/code/grammar.rs @@ -45,7 +45,7 @@ pub struct Registry { impl Registry { pub fn load() -> Result { - let registry_toml = include_str!("../../../registry.toml"); + let registry_toml = include_str!("../../registry.toml"); Self::from_str(registry_toml) } diff --git a/crates/code/src/graph.rs b/src/code/graph.rs similarity index 99% rename from crates/code/src/graph.rs rename to src/code/graph.rs index 430b731..80dfe94 100644 --- a/crates/code/src/graph.rs +++ b/src/code/graph.rs @@ -262,8 +262,8 @@ impl CallGraph { #[cfg(test)] mod tests { + use super::super::index::{Call, DefinitionKind, FileRecord, Span}; use super::*; - use crate::index::{Call, DefinitionKind, FileRecord, Span}; use std::path::PathBuf; fn make_span() -> Span { diff --git a/crates/code/src/index.rs b/src/code/index.rs similarity index 100% rename from crates/code/src/index.rs rename to src/code/index.rs diff --git a/crates/code/src/lib.rs b/src/code/mod.rs similarity index 100% rename from crates/code/src/lib.rs rename to src/code/mod.rs diff --git a/crates/code/src/resolve.rs b/src/code/resolve.rs similarity index 100% rename from crates/code/src/resolve.rs rename to src/code/resolve.rs diff --git a/crates/core/src/config.rs b/src/core/config.rs similarity index 69% rename from crates/core/src/config.rs rename to src/core/config.rs index facd3ab..6c51781 100644 --- a/crates/core/src/config.rs +++ b/src/core/config.rs @@ -2,60 +2,7 @@ use std::path::{Path, PathBuf}; use serde::{Deserialize, Serialize}; -use crate::types::{Exclude, OutputFormat}; - -#[derive(Debug, Serialize, Clone)] -#[serde(into = "String")] -pub struct BackwardsCompatOutputFormat(OutputFormat); - -impl From for String { - fn from(format: BackwardsCompatOutputFormat) -> Self { - match format.0 { - OutputFormat::Tree => "tree".to_string(), - OutputFormat::Files => "files".to_string(), - OutputFormat::Both => "both".to_string(), - } - } -} - -impl<'de> Deserialize<'de> for BackwardsCompatOutputFormat { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - #[derive(Deserialize)] - #[serde(untagged)] - enum FormatOrString { - Format(OutputFormat), - String(String), - } - - match FormatOrString::deserialize(deserializer)? { - FormatOrString::Format(format) => Ok(BackwardsCompatOutputFormat(format)), - FormatOrString::String(s) => { - let format = match s.to_lowercase().as_str() { - "tree" => OutputFormat::Tree, - "files" => OutputFormat::Files, - "both" => OutputFormat::Both, - _ => OutputFormat::Both, - }; - Ok(BackwardsCompatOutputFormat(format)) - } - } - } -} - -impl From for BackwardsCompatOutputFormat { - fn from(format: OutputFormat) -> Self { - BackwardsCompatOutputFormat(format) - } -} - -impl From for OutputFormat { - fn from(format: BackwardsCompatOutputFormat) -> Self { - format.0 - } -} +use super::types::{Exclude, OutputFormat}; #[derive(Debug, Serialize, Deserialize, Clone)] pub struct Config { @@ -66,7 +13,7 @@ pub struct Config { pub max_depth: usize, #[serde(default = "default_output_format")] - pub default_output_format: BackwardsCompatOutputFormat, + pub default_output_format: OutputFormat, #[serde(default)] pub default_excludes: Vec, @@ -119,8 +66,8 @@ fn default_max_depth() -> usize { 20 } -fn default_output_format() -> BackwardsCompatOutputFormat { - BackwardsCompatOutputFormat(OutputFormat::Both) +fn default_output_format() -> OutputFormat { + OutputFormat::Both } fn default_excludes() -> Vec { @@ -182,7 +129,7 @@ pub struct RepoConfig { pub exclude: Option>, pub max_size: Option, pub max_depth: Option, - pub output: Option, + pub output: Option, pub file: Option, pub hidden: Option, pub no_ignore: Option, diff --git a/crates/core/src/lib.rs b/src/core/mod.rs similarity index 82% rename from crates/core/src/lib.rs rename to src/core/mod.rs index 9b01bdd..62fc8ee 100644 --- a/crates/core/src/lib.rs +++ b/src/core/mod.rs @@ -4,8 +4,8 @@ pub mod tokenizer; pub mod types; pub use config::{ - get_config_path, load_config, load_repo_config, save_config, save_repo_config, - BackwardsCompatOutputFormat, Config, RepoConfig, + get_config_path, load_config, load_repo_config, save_config, save_repo_config, Config, + RepoConfig, }; pub use source_detection::is_source_file; pub use tokenizer::{TokenCount, TokenCounter, TokenizerBackend}; diff --git a/crates/core/src/source_detection.rs b/src/core/source_detection.rs similarity index 100% rename from crates/core/src/source_detection.rs rename to src/core/source_detection.rs diff --git a/crates/core/src/tokenizer.rs b/src/core/tokenizer.rs similarity index 99% rename from crates/core/src/tokenizer.rs rename to src/core/tokenizer.rs index 8fee8f7..e70572a 100644 --- a/crates/core/src/tokenizer.rs +++ b/src/core/tokenizer.rs @@ -4,7 +4,7 @@ use anyhow::{anyhow, Result}; use tiktoken_rs::get_bpe_from_model; use tokenizers::Tokenizer as HfTokenizer; -use crate::types::FileEntry; +use super::types::FileEntry; pub enum TokenizerBackend { Tiktoken(tiktoken_rs::CoreBPE), diff --git a/crates/core/src/types.rs b/src/core/types.rs similarity index 93% rename from crates/core/src/types.rs rename to src/core/types.rs index d0584cf..24e7da8 100644 --- a/crates/core/src/types.rs +++ b/src/core/types.rs @@ -3,6 +3,7 @@ use std::path::PathBuf; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] pub enum OutputFormat { Tree, Files, diff --git a/crates/fetch/src/git.rs b/src/fetch/git.rs similarity index 100% rename from crates/fetch/src/git.rs rename to src/fetch/git.rs diff --git a/crates/fetch/src/lib.rs b/src/fetch/mod.rs similarity index 100% rename from crates/fetch/src/lib.rs rename to src/fetch/mod.rs diff --git a/crates/fetch/src/url.rs b/src/fetch/url.rs similarity index 100% rename from crates/fetch/src/url.rs rename to src/fetch/url.rs diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..e133234 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,12 @@ +pub mod code; +pub mod core; +pub mod fetch; +pub mod tui; + +pub use core::{ + get_config_path, is_source_file, load_config, load_repo_config, save_config, save_repo_config, + Config, Exclude, FileEntry, OutputFormat, RepoConfig, TokenCount, TokenCounter, + TokenizerBackend, TokenizerType, +}; +pub use fetch::{GitProcessor, UrlProcessor}; +pub use tui::FilePicker; diff --git a/crates/cli/src/main.rs b/src/main.rs similarity index 95% rename from crates/cli/src/main.rs rename to src/main.rs index bb5c3f5..ba6c80b 100644 --- a/crates/cli/src/main.rs +++ b/src/main.rs @@ -6,14 +6,12 @@ use std::fs; use std::io::{self, Write}; use std::path::{Path, PathBuf}; -use glimpse_core::{ - get_config_path, load_config, load_repo_config, save_config, save_repo_config, - BackwardsCompatOutputFormat, RepoConfig, -}; -use glimpse_fetch::{GitProcessor, UrlProcessor}; - use crate::analyzer::process_directory; use crate::cli::Cli; +use glimpse::fetch::{GitProcessor, UrlProcessor}; +use glimpse::{ + get_config_path, load_config, load_repo_config, save_config, save_repo_config, RepoConfig, +}; fn is_url_or_git(path: &str) -> bool { GitProcessor::is_git_url(path) || path.starts_with("http://") || path.starts_with("https://") @@ -205,9 +203,7 @@ fn create_repo_config_from_args(args: &Cli) -> RepoConfig { exclude: args.exclude.clone(), max_size: args.max_size, max_depth: args.max_depth, - output: args - .get_output_format() - .map(BackwardsCompatOutputFormat::from), + output: args.get_output_format(), file: args.file.clone(), hidden: Some(args.hidden), no_ignore: Some(args.no_ignore), @@ -232,8 +228,7 @@ fn apply_repo_config(args: &mut Cli, repo_config: &RepoConfig) { } if let Some(ref output) = repo_config.output { - let output_format: glimpse_core::OutputFormat = (*output).clone().into(); - args.output = Some(output_format.into()); + args.output = Some(output.clone().into()); } if let Some(ref file) = repo_config.file { diff --git a/crates/cli/src/output.rs b/src/output.rs similarity index 99% rename from crates/cli/src/output.rs rename to src/output.rs index 5b0ddce..e4af9dc 100644 --- a/crates/cli/src/output.rs +++ b/src/output.rs @@ -6,7 +6,7 @@ use base64::Engine; use num_format::{Buffer, Locale}; use printpdf::*; -use glimpse_core::{FileEntry, OutputFormat, TokenCounter}; +use glimpse::{FileEntry, OutputFormat, TokenCounter}; use crate::cli::Cli; diff --git a/crates/tui/src/file_picker.rs b/src/tui/file_picker.rs similarity index 100% rename from crates/tui/src/file_picker.rs rename to src/tui/file_picker.rs diff --git a/crates/tui/src/lib.rs b/src/tui/mod.rs similarity index 100% rename from crates/tui/src/lib.rs rename to src/tui/mod.rs diff --git a/crates/code/tests/extraction.rs b/tests/extraction.rs similarity index 98% rename from crates/code/tests/extraction.rs rename to tests/extraction.rs index f1e19fe..066bfd7 100644 --- a/crates/code/tests/extraction.rs +++ b/tests/extraction.rs @@ -1,7 +1,7 @@ use std::path::Path; -use glimpse_code::extract::Extractor; -use glimpse_code::index::DefinitionKind; +use glimpse::code::extract::Extractor; +use glimpse::code::index::DefinitionKind; use tree_sitter::Parser; fn parse_and_extract(lang: &str, source: &str) -> ExtractResult { @@ -21,9 +21,9 @@ fn parse_and_extract(lang: &str, source: &str) -> ExtractResult { } struct ExtractResult { - definitions: Vec, - calls: Vec, - imports: Vec, + definitions: Vec, + calls: Vec, + imports: Vec, } mod rust { diff --git a/crates/code/tests/integration.rs b/tests/integration.rs similarity index 98% rename from crates/code/tests/integration.rs rename to tests/integration.rs index 04ff653..999d434 100644 --- a/crates/code/tests/integration.rs +++ b/tests/integration.rs @@ -2,10 +2,10 @@ use std::collections::HashSet; use std::fs; use std::path::Path; -use glimpse_code::extract::Extractor; -use glimpse_code::graph::CallGraph; -use glimpse_code::index::{file_fingerprint, FileRecord, Index}; -use glimpse_code::resolve::{resolve_by_index, resolve_by_search, resolve_same_file, Resolver}; +use glimpse::code::extract::Extractor; +use glimpse::code::graph::CallGraph; +use glimpse::code::index::{file_fingerprint, FileRecord, Index}; +use glimpse::code::resolve::{resolve_by_index, resolve_by_search, resolve_same_file, Resolver}; use tree_sitter::Parser; fn index_file(index: &mut Index, extractor: &Extractor, path: &Path, source: &str) { @@ -29,7 +29,7 @@ fn index_file(index: &mut Index, extractor: &Extractor, path: &Path, source: &st mod resolver_tests { use super::*; - use glimpse_code::index::{Call, Definition, DefinitionKind, FileRecord, Import, Span}; + use glimpse::code::index::{Call, Definition, DefinitionKind, FileRecord, Import, Span}; use std::path::PathBuf; use tempfile::TempDir; @@ -363,7 +363,7 @@ mod resolver_tests { mod call_graph_resolution { use super::*; - use glimpse_code::index::{Call, Definition, DefinitionKind, FileRecord, Span}; + use glimpse::code::index::{Call, Definition, DefinitionKind, FileRecord, Span}; use tempfile::TempDir; fn make_span() -> Span { @@ -849,7 +849,7 @@ type Config struct { mod index_persistence { use super::*; - use glimpse_code::index::{clear_index, load_index, save_index}; + use glimpse::code::index::{clear_index, load_index, save_index}; use tempfile::TempDir; #[test] From ca13923b22c9517d0b3abe3f6b7424a1e07fc3ed Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 01:37:08 -0800 Subject: [PATCH 25/35] feat: add code and index subcommands with progress indicators --- src/analyzer.rs | 1 + src/cli.rs | 142 +++++++++++++++++++++++++- src/main.rs | 261 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 400 insertions(+), 4 deletions(-) diff --git a/src/analyzer.rs b/src/analyzer.rs index eb84abb..ba79d78 100644 --- a/src/analyzer.rs +++ b/src/analyzer.rs @@ -347,6 +347,7 @@ mod tests { fn create_test_cli(dir_path: &Path) -> Cli { Cli { + command: None, config: false, paths: vec![dir_path.to_string_lossy().to_string()], config_path: false, diff --git a/src/cli.rs b/src/cli.rs index fd5a5ed..3ca1f94 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,6 +1,6 @@ use std::path::PathBuf; -use clap::{Parser, ValueEnum}; +use clap::{Parser, Subcommand, ValueEnum}; use serde::{Deserialize, Serialize}; use glimpse::{Config, Exclude, OutputFormat, TokenizerType}; @@ -48,6 +48,103 @@ impl From for TokenizerType { } } +#[derive(Subcommand, Debug, Clone)] +pub enum Commands { + /// Generate call graph for a function + #[command(name = "code")] + Code(CodeArgs), + + /// Manage the code index + #[command(name = "index")] + Index(IndexArgs), +} + +#[derive(Parser, Debug, Clone)] +pub struct CodeArgs { + /// Target function in file:function format (e.g., src/main.rs:main or :main) + #[arg(required = true)] + pub target: String, + + /// Project root directory + #[arg(short, long, default_value = ".")] + pub root: PathBuf, + + /// Include callers (reverse call graph) + #[arg(long)] + pub callers: bool, + + /// Maximum depth to traverse + #[arg(short, long)] + pub depth: Option, + + /// Output file (default: stdout) + #[arg(short = 'f', long)] + pub file: Option, +} + +#[derive(Parser, Debug, Clone)] +pub struct IndexArgs { + #[command(subcommand)] + pub command: IndexCommand, +} + +#[derive(Subcommand, Debug, Clone)] +pub enum IndexCommand { + /// Build or update the index for a project + Build { + /// Project root directory + #[arg(default_value = ".")] + path: PathBuf, + + /// Force rebuild (ignore existing index) + #[arg(short, long)] + force: bool, + }, + + /// Clear the index for a project + Clear { + /// Project root directory + #[arg(default_value = ".")] + path: PathBuf, + }, + + /// Show index status and stats + Status { + /// Project root directory + #[arg(default_value = ".")] + path: PathBuf, + }, +} + +#[derive(Debug, Clone)] +pub struct FunctionTarget { + pub file: Option, + pub function: String, +} + +impl FunctionTarget { + pub fn parse(target: &str) -> anyhow::Result { + if let Some((file, func)) = target.rsplit_once(':') { + if file.is_empty() { + Ok(Self { + file: None, + function: func.to_string(), + }) + } else { + Ok(Self { + file: Some(PathBuf::from(file)), + function: func.to_string(), + }) + } + } else { + Ok(Self { + file: None, + function: target.to_string(), + }) + } + } +} + #[derive(Parser, Debug, Clone)] #[command( name = "glimpse", @@ -55,6 +152,9 @@ impl From for TokenizerType { version )] pub struct Cli { + #[command(subcommand)] + pub command: Option, + #[arg(default_value = ".")] pub paths: Vec, @@ -207,3 +307,43 @@ fn parse_exclude(value: &str) -> Result { Ok(Exclude::Pattern(value.to_string())) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_function_target_parse_with_file() { + let target = FunctionTarget::parse("src/main.rs:main").unwrap(); + assert_eq!(target.file, Some(PathBuf::from("src/main.rs"))); + assert_eq!(target.function, "main"); + } + + #[test] + fn test_function_target_parse_without_file() { + let target = FunctionTarget::parse(":main").unwrap(); + assert_eq!(target.file, None); + assert_eq!(target.function, "main"); + } + + #[test] + fn test_function_target_parse_function_only() { + let target = FunctionTarget::parse("main").unwrap(); + assert_eq!(target.file, None); + assert_eq!(target.function, "main"); + } + + #[test] + fn test_function_target_parse_nested_path() { + let target = FunctionTarget::parse("src/code/graph.rs:build").unwrap(); + assert_eq!(target.file, Some(PathBuf::from("src/code/graph.rs"))); + assert_eq!(target.function, "build"); + } + + #[test] + fn test_function_target_parse_windows_path() { + let target = FunctionTarget::parse("C:\\src\\main.rs:main").unwrap(); + assert_eq!(target.file, Some(PathBuf::from("C:\\src\\main.rs"))); + assert_eq!(target.function, "main"); + } +} diff --git a/src/main.rs b/src/main.rs index ba6c80b..6a77202 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,11 +6,20 @@ use std::fs; use std::io::{self, Write}; use std::path::{Path, PathBuf}; +use anyhow::{bail, Context, Result}; +use indicatif::{ProgressBar, ProgressStyle}; + use crate::analyzer::process_directory; -use crate::cli::Cli; +use crate::cli::{Cli, CodeArgs, Commands, FunctionTarget, IndexCommand}; +use glimpse::code::extract::Extractor; +use glimpse::code::graph::CallGraph; +use glimpse::code::index::{ + clear_index, file_fingerprint, load_index, save_index, FileRecord, Index, +}; use glimpse::fetch::{GitProcessor, UrlProcessor}; use glimpse::{ - get_config_path, load_config, load_repo_config, save_config, save_repo_config, RepoConfig, + get_config_path, is_source_file, load_config, load_repo_config, save_config, save_repo_config, + RepoConfig, }; fn is_url_or_git(path: &str) -> bool { @@ -28,10 +37,17 @@ fn has_custom_options(args: &Cli) -> bool { || args.no_ignore } -fn main() -> anyhow::Result<()> { +fn main() -> Result<()> { let mut config = load_config()?; let mut args = Cli::parse_with_config(&config)?; + if let Some(ref cmd) = args.command { + return match cmd { + Commands::Code(code_args) => handle_code_command(code_args), + Commands::Index(index_args) => handle_index_command(&index_args.command), + }; + } + if args.config_path { let path = get_config_path()?; println!("{}", path.display()); @@ -243,3 +259,242 @@ fn apply_repo_config(args: &mut Cli, repo_config: &RepoConfig) { args.no_ignore = no_ignore; } } + +fn handle_code_command(args: &CodeArgs) -> Result<()> { + let root = args.root.canonicalize().unwrap_or_else(|_| args.root.clone()); + let target = FunctionTarget::parse(&args.target)?; + + let mut index = load_index(&root)?.unwrap_or_else(Index::new); + let needs_update = index_directory(&root, &mut index)?; + + if needs_update > 0 { + save_index(&index, &root)?; + } + + let graph = CallGraph::build(&index, &root); + + let node_id = if let Some(ref file) = target.file { + let file_path = root.join(file); + let rel_path = file_path + .strip_prefix(&root) + .unwrap_or(&file_path) + .to_path_buf(); + graph + .find_node_by_file_and_name(&rel_path, &target.function) + .or_else(|| graph.find_node_by_file_and_name(&file_path, &target.function)) + } else { + graph.find_node(&target.function) + }; + + let Some(node_id) = node_id else { + bail!( + "function '{}' not found in index", + target.function + ); + }; + + let definitions = if args.callers { + let callers = graph.get_transitive_callers(node_id); + let mut defs: Vec<_> = callers.iter().map(|n| &n.definition).collect(); + if let Some(node) = graph.get_node(node_id) { + defs.push(&node.definition); + } + defs + } else { + graph.post_order_definitions(node_id) + }; + + let output = format_definitions(&definitions, &root)?; + + if let Some(ref file) = args.file { + fs::write(file, &output)?; + eprintln!("Output written to: {}", file.display()); + } else { + print!("{}", output); + } + + Ok(()) +} + +fn handle_index_command(cmd: &IndexCommand) -> Result<()> { + match cmd { + IndexCommand::Build { path, force } => { + let root = path.canonicalize().unwrap_or_else(|_| path.clone()); + + let mut index = if *force { + Index::new() + } else { + load_index(&root)?.unwrap_or_else(Index::new) + }; + + let updated = index_directory(&root, &mut index)?; + save_index(&index, &root)?; + + let file_count = index.files.len(); + let def_count = index.definitions().count(); + let call_count = index.calls().count(); + + if updated > 0 { + eprintln!( + "Index updated: {} files ({} updated), {} definitions, {} calls", + file_count, updated, def_count, call_count + ); + } else { + eprintln!( + "Index up to date: {} files, {} definitions, {} calls", + file_count, def_count, call_count + ); + } + } + IndexCommand::Clear { path } => { + let root = path.canonicalize().unwrap_or_else(|_| path.clone()); + clear_index(&root)?; + eprintln!("Index cleared for: {}", root.display()); + } + IndexCommand::Status { path } => { + let root = path.canonicalize().unwrap_or_else(|_| path.clone()); + + match load_index(&root)? { + Some(index) => { + let file_count = index.files.len(); + let def_count = index.definitions().count(); + let call_count = index.calls().count(); + let import_count = index.imports().count(); + + println!("Index status for: {}", root.display()); + println!(" Files: {}", file_count); + println!(" Definitions: {}", def_count); + println!(" Calls: {}", call_count); + println!(" Imports: {}", import_count); + } + None => { + println!("No index found for: {}", root.display()); + } + } + } + } + + Ok(()) +} + +fn index_directory(root: &Path, index: &mut Index) -> Result { + let pb = ProgressBar::new_spinner(); + pb.set_style( + ProgressStyle::default_spinner() + .template("{spinner:.green} {msg}") + .expect("valid template"), + ); + pb.set_message("Scanning files..."); + + let source_files: Vec<_> = walkdir::WalkDir::new(root) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_file()) + .filter(|e| is_source_file(e.path())) + .filter(|e| { + e.path() + .extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| !ext.is_empty()) + }) + .collect(); + + let total = source_files.len(); + pb.finish_and_clear(); + + let pb = ProgressBar::new(total as u64); + pb.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} {msg}") + .expect("valid template") + .progress_chars("#>-"), + ); + + let mut updated = 0; + + for entry in source_files { + let path = entry.path(); + let rel_path = path.strip_prefix(root).unwrap_or(path); + + pb.set_message(format!("{}", rel_path.display())); + + let ext = path + .extension() + .and_then(|e| e.to_str()) + .unwrap_or(""); + + let (mtime, size) = file_fingerprint(path)?; + + if !index.is_stale(rel_path, mtime, size) { + pb.inc(1); + continue; + } + + let extractor = match Extractor::from_extension(ext) { + Ok(e) => e, + Err(_) => { + pb.inc(1); + continue; + } + }; + + let source = fs::read(path).with_context(|| format!("failed to read: {}", path.display()))?; + + let mut parser = tree_sitter::Parser::new(); + parser.set_language(extractor.language())?; + + let Some(tree) = parser.parse(&source, None) else { + pb.inc(1); + continue; + }; + + let definitions = extractor.extract_definitions(&tree, &source, rel_path); + let calls = extractor.extract_calls(&tree, &source, rel_path); + let imports = extractor.extract_imports(&tree, &source, rel_path); + + index.update(FileRecord { + path: rel_path.to_path_buf(), + mtime, + size, + definitions, + calls, + imports, + }); + + updated += 1; + pb.inc(1); + } + + pb.finish_and_clear(); + Ok(updated) +} + +fn format_definitions( + definitions: &[&glimpse::code::index::Definition], + root: &Path, +) -> Result { + use std::fmt::Write; + + let mut output = String::new(); + + for def in definitions { + let file_path = root.join(&def.file); + let content = fs::read_to_string(&file_path) + .with_context(|| format!("failed to read: {}", file_path.display()))?; + + let lines: Vec<&str> = content.lines().collect(); + let start = def.span.start_line.saturating_sub(1); + let end = def.span.end_line.min(lines.len()); + + writeln!(output, "## {}:{}", def.file.display(), def.name)?; + writeln!(output)?; + writeln!(output, "```")?; + for line in &lines[start..end] { + writeln!(output, "{}", line)?; + } + writeln!(output, "```")?; + writeln!(output)?; + } + + Ok(output) +} From 0ab007cecfcc153bd25704dafdacd2e2f63b34ac Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 01:38:43 -0800 Subject: [PATCH 26/35] fix: use ignore crate to respect gitignore when indexing --- src/main.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main.rs b/src/main.rs index 6a77202..4f5a8f7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -386,10 +386,11 @@ fn index_directory(root: &Path, index: &mut Index) -> Result { ); pb.set_message("Scanning files..."); - let source_files: Vec<_> = walkdir::WalkDir::new(root) - .into_iter() + let source_files: Vec<_> = ignore::WalkBuilder::new(root) + .hidden(false) + .build() .filter_map(|e| e.ok()) - .filter(|e| e.file_type().is_file()) + .filter(|e| e.file_type().map(|ft| ft.is_file()).unwrap_or(false)) .filter(|e| is_source_file(e.path())) .filter(|e| { e.path() From 32537222bdd310617f39eb5d1bf9563e0c3e4702 Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 02:19:30 -0800 Subject: [PATCH 27/35] perf: remove slow resolve_by_search, use index-only lookups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - remove resolve.rs entirely (was doing O(files × calls) filesystem walks) - simplify CallGraph::build to use index-only O(1) lookups - add depth-limited traversal (default depth=1) - remove debug logging from code subcommand result: 23s → 24ms (~1000x faster) --- src/code/graph.rs | 121 ++++-- src/code/mod.rs | 1 - src/code/resolve.rs | 964 ------------------------------------------- src/main.rs | 118 +++--- tests/integration.rs | 387 +---------------- 5 files changed, 157 insertions(+), 1434 deletions(-) delete mode 100644 src/code/resolve.rs diff --git a/src/code/graph.rs b/src/code/graph.rs index 80dfe94..d1a11cc 100644 --- a/src/code/graph.rs +++ b/src/code/graph.rs @@ -1,8 +1,7 @@ use std::collections::{HashMap, HashSet, VecDeque}; use std::path::Path; -use super::index::{Definition, Index}; -use super::resolve::Resolver; +use super::index::{Call, Definition, Index}; pub type NodeId = usize; @@ -31,8 +30,7 @@ impl CallGraph { } } - pub fn build(index: &Index, root: &Path) -> Self { - let resolver = Resolver::new(index, root.to_path_buf()); + pub fn build(index: &Index) -> Self { let mut graph = CallGraph::new(); for def in index.definitions() { @@ -40,31 +38,23 @@ impl CallGraph { } for call in index.calls() { - let caller_id = call - .caller - .as_ref() - .and_then(|name| graph.find_node_by_file_and_name(&call.file, name)); - - let callee_id = graph.find_node(&call.callee).or_else(|| { - resolver - .resolve(&call.callee, &call.file) - .ok() - .flatten() - .map(|def| { - graph - .find_node_by_file_and_name(&def.file, &def.name) - .unwrap_or_else(|| graph.add_definition(def)) - }) - }); - - if let (Some(caller), Some(callee)) = (caller_id, callee_id) { - graph.add_edge(caller, callee); + if let Some((caller_id, callee_id)) = Self::link_call(&graph, call) { + graph.add_edge(caller_id, callee_id); } } graph } + fn link_call(graph: &CallGraph, call: &Call) -> Option<(NodeId, NodeId)> { + let caller_id = call + .caller + .as_ref() + .and_then(|name| graph.find_node_by_file_and_name(&call.file, name))?; + let callee_id = graph.find_node(&call.callee)?; + Some((caller_id, callee_id)) + } + fn add_definition(&mut self, definition: Definition) -> NodeId { let file_key = definition.file.to_string_lossy().to_string(); let composite_key = (file_key, definition.name.clone()); @@ -235,6 +225,67 @@ impl CallGraph { .collect() } + pub fn get_callees_to_depth(&self, node_id: NodeId, max_depth: usize) -> Vec { + let mut visited = HashSet::new(); + let mut result = Vec::new(); + let mut queue = VecDeque::new(); + + queue.push_back((node_id, 0)); + visited.insert(node_id); + + while let Some((current_id, depth)) = queue.pop_front() { + result.push(current_id); + + if depth >= max_depth { + continue; + } + + if let Some(node) = self.nodes.get(¤t_id) { + for &callee_id in &node.callees { + if visited.insert(callee_id) { + queue.push_back((callee_id, depth + 1)); + } + } + } + } + + result + } + + pub fn get_callers_to_depth(&self, node_id: NodeId, max_depth: usize) -> Vec { + let mut visited = HashSet::new(); + let mut result = Vec::new(); + let mut queue = VecDeque::new(); + + queue.push_back((node_id, 0)); + visited.insert(node_id); + + while let Some((current_id, depth)) = queue.pop_front() { + result.push(current_id); + + if depth >= max_depth { + continue; + } + + if let Some(node) = self.nodes.get(¤t_id) { + for &caller_id in &node.callers { + if visited.insert(caller_id) { + queue.push_back((caller_id, depth + 1)); + } + } + } + } + + result + } + + pub fn definitions_to_depth(&self, node_id: NodeId, max_depth: usize) -> Vec<&Definition> { + self.get_callees_to_depth(node_id, max_depth) + .into_iter() + .filter_map(|id| self.nodes.get(&id).map(|n| &n.definition)) + .collect() + } + pub fn node_count(&self) -> usize { self.nodes.len() } @@ -296,7 +347,7 @@ mod tests { #[test] fn test_build_empty_index() { let index = Index::new(); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); assert_eq!(graph.node_count(), 0); assert_eq!(graph.edge_count(), 0); } @@ -316,7 +367,7 @@ mod tests { imports: vec![], }); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); assert_eq!(graph.node_count(), 2); assert_eq!(graph.edge_count(), 0); } @@ -336,7 +387,7 @@ mod tests { imports: vec![], }); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); assert_eq!(graph.node_count(), 2); assert_eq!(graph.edge_count(), 1); @@ -366,7 +417,7 @@ mod tests { imports: vec![], }); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); let a_id = graph.find_node("a").unwrap(); let c_id = graph.find_node("c").unwrap(); @@ -408,7 +459,7 @@ mod tests { imports: vec![], }); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); let a_id = graph.find_node("a").unwrap(); let transitive = graph.get_transitive_callees(a_id); @@ -443,7 +494,7 @@ mod tests { imports: vec![], }); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); let a_id = graph.find_node("a").unwrap(); let transitive = graph.get_transitive_callees(a_id); @@ -469,7 +520,7 @@ mod tests { imports: vec![], }); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); let a_id = graph.find_node("a").unwrap(); let b_id = graph.find_node("b").unwrap(); let c_id = graph.find_node("c").unwrap(); @@ -502,7 +553,7 @@ mod tests { imports: vec![], }); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); let a_id = graph.find_node("a").unwrap(); let order = graph.post_order(a_id); @@ -524,7 +575,7 @@ mod tests { imports: vec![], }); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); let main_id = graph.find_node("main").unwrap(); let defs = graph.post_order_definitions(main_id); @@ -545,7 +596,7 @@ mod tests { imports: vec![], }); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); let id = graph.find_node("recursive").unwrap(); let node = graph.get_node(id).unwrap(); @@ -575,7 +626,7 @@ mod tests { imports: vec![], }); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); let main_id = graph.find_node("main").unwrap(); let callees = graph.get_callees(main_id); @@ -607,7 +658,7 @@ mod tests { imports: vec![], }); - let graph = CallGraph::build(&index, Path::new(".")); + let graph = CallGraph::build(&index); let a_id = graph.find_node_by_file_and_name(Path::new("src/a.rs"), "foo"); let b_id = graph.find_node_by_file_and_name(Path::new("src/b.rs"), "foo"); diff --git a/src/code/mod.rs b/src/code/mod.rs index 3bc6f18..2b3e76f 100644 --- a/src/code/mod.rs +++ b/src/code/mod.rs @@ -2,4 +2,3 @@ pub mod extract; pub mod grammar; pub mod graph; pub mod index; -pub mod resolve; diff --git a/src/code/resolve.rs b/src/code/resolve.rs deleted file mode 100644 index 066023d..0000000 --- a/src/code/resolve.rs +++ /dev/null @@ -1,964 +0,0 @@ -use std::cell::RefCell; -use std::collections::HashSet; -use std::path::{Path, PathBuf}; - -use anyhow::Result; - -use super::index::{Definition, DefinitionKind, Index, Span}; - -const SYSTEM_HEADERS: &[&str] = &[ - "stdio", - "stdlib", - "string", - "math", - "time", - "errno", - "assert", - "ctype", - "signal", - "stdarg", - "stddef", - "setjmp", - "locale", - "limits", - "float", - "iso646", - "stdbool", - "stdint", - "inttypes", - "wchar", - "wctype", - "fenv", - "complex", - "tgmath", - "stdalign", - "stdnoreturn", - "stdatomic", - "threads", - "uchar", - "iostream", - "vector", - "string", - "map", - "set", - "unordered_map", - "unordered_set", - "algorithm", - "memory", - "functional", - "utility", - "tuple", - "array", - "deque", - "list", - "forward_list", - "stack", - "queue", - "priority_queue", - "bitset", - "valarray", - "regex", - "random", - "chrono", - "ratio", - "thread", - "mutex", - "condition_variable", - "future", - "atomic", - "filesystem", - "optional", - "variant", - "any", - "string_view", - "charconv", - "execution", - "span", - "ranges", - "numbers", - "concepts", - "coroutine", - "compare", - "version", - "source_location", - "format", - "bit", - "numbers", - "typeinfo", - "typeindex", - "type_traits", - "initializer_list", - "new", - "exception", - "stdexcept", - "system_error", - "cerrno", - "cassert", - "cctype", - "cfenv", - "cfloat", - "cinttypes", - "climits", - "clocale", - "cmath", - "csetjmp", - "csignal", - "cstdarg", - "cstddef", - "cstdint", - "cstdio", - "cstdlib", - "cstring", - "ctime", - "cuchar", - "cwchar", - "cwctype", - "codecvt", - "fstream", - "iomanip", - "ios", - "iosfwd", - "istream", - "ostream", - "sstream", - "streambuf", - "syncstream", - "iterator", - "locale", - "numeric", - "limits", - "unistd", - "fcntl", - "sys/", - "pthread", - "netinet/", - "arpa/", - "dirent", - "dlfcn", - "poll", - "sched", - "semaphore", - "spawn", - "termios", -]; - -fn is_system_header(path: &str) -> bool { - let clean = path.trim_matches(|c| c == '<' || c == '>' || c == '"'); - SYSTEM_HEADERS.iter().any(|s| clean.starts_with(s)) -} - -fn normalize_to_patterns(import_path: &str, lang: &str) -> Vec { - let clean = import_path.trim_matches(|c| c == '"' || c == '\'' || c == '<' || c == '>'); - - match lang { - "rust" | "rs" => normalize_rust_import(clean), - "python" | "py" => normalize_python_import(clean), - "go" => normalize_go_import(clean), - "typescript" | "ts" | "tsx" | "javascript" | "js" | "mjs" | "cjs" | "jsx" => { - normalize_js_import(clean) - } - "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" => normalize_c_import(clean), - "java" => normalize_java_import(clean), - "scala" | "sc" => normalize_scala_import(clean), - "zig" => normalize_zig_import(clean), - _ => vec![format!("**/{}", clean)], - } -} - -fn normalize_rust_import(path: &str) -> Vec { - let stripped = path - .trim_start_matches("crate::") - .trim_start_matches("self::") - .trim_start_matches("super::"); - - let parts: Vec<&str> = stripped.split("::").filter(|p| !p.is_empty()).collect(); - if parts.is_empty() { - return vec![]; - } - - let file_path = parts.join("/"); - vec![ - format!("**/{}.rs", file_path), - format!("**/{}/mod.rs", file_path), - format!("**/src/{}.rs", file_path), - format!("**/src/{}/mod.rs", file_path), - ] -} - -fn normalize_python_import(path: &str) -> Vec { - if path.starts_with('.') { - return vec![]; - } - - let parts: Vec<&str> = path.split('.').collect(); - if parts.is_empty() { - return vec![]; - } - - let file_path = parts.join("/"); - vec![ - format!("**/{}.py", file_path), - format!("**/{}/__init__.py", file_path), - format!("**/src/{}.py", file_path), - format!("**/src/{}/__init__.py", file_path), - ] -} - -fn normalize_go_import(path: &str) -> Vec { - let parts: Vec<&str> = path.split('/').collect(); - - let local_parts: Vec<&str> = if parts.len() >= 3 - && (parts[0].contains('.') || parts[0] == "github" || parts[0] == "golang") - { - parts[3..].to_vec() - } else { - parts - }; - - if local_parts.is_empty() { - return vec![]; - } - - let dir_path = local_parts.join("/"); - vec![ - format!("{}/*.go", dir_path), - format!("**/{}/*.go", dir_path), - ] -} - -fn normalize_js_import(path: &str) -> Vec { - if path.starts_with('.') { - let clean = path.trim_start_matches("./").trim_start_matches("../"); - return vec![ - format!("**/{}.ts", clean), - format!("**/{}.tsx", clean), - format!("**/{}.js", clean), - format!("**/{}.jsx", clean), - format!("**/{}/index.ts", clean), - format!("**/{}/index.tsx", clean), - format!("**/{}/index.js", clean), - ]; - } - - let clean = path.trim_start_matches("@/").trim_start_matches('@'); - let parts: Vec<&str> = clean.split('/').collect(); - - if parts.is_empty() { - return vec![]; - } - - let file_path = parts.join("/"); - vec![ - format!("**/{}.ts", file_path), - format!("**/{}.tsx", file_path), - format!("**/{}.js", file_path), - format!("**/{}.jsx", file_path), - format!("**/{}/index.ts", file_path), - format!("**/{}/index.tsx", file_path), - format!("**/{}/index.js", file_path), - format!("**/src/{}.ts", file_path), - format!("**/src/{}.tsx", file_path), - ] -} - -fn normalize_c_import(path: &str) -> Vec { - if is_system_header(path) { - return vec![]; - } - - let clean = path.trim_matches(|c| c == '"' || c == '<' || c == '>'); - vec![ - format!("**/{}", clean), - format!("**/include/{}", clean), - format!("**/src/{}", clean), - ] -} - -fn normalize_java_import(path: &str) -> Vec { - if path.starts_with("java.") || path.starts_with("javax.") || path.starts_with("sun.") { - return vec![]; - } - - let file_path = path.replace('.', "/"); - vec![ - format!("**/{}.java", file_path), - format!("**/src/{}.java", file_path), - format!("**/src/main/java/{}.java", file_path), - ] -} - -fn normalize_scala_import(path: &str) -> Vec { - if path.starts_with("scala.") || path.starts_with("java.") { - return vec![]; - } - - let clean = path.trim_end_matches("._").trim_end_matches(".*"); - let file_path = clean.replace('.', "/"); - vec![ - format!("**/{}.scala", file_path), - format!("**/{}.sc", file_path), - format!("**/src/{}.scala", file_path), - format!("**/src/main/scala/{}.scala", file_path), - ] -} - -fn normalize_zig_import(path: &str) -> Vec { - if path == "std" { - return vec![]; - } - - if path.ends_with(".zig") || path.contains('/') { - return vec![format!("**/{}", path), format!("**/src/{}", path)]; - } - - vec![format!("**/{}.zig", path), format!("**/src/{}.zig", path)] -} - -fn search_patterns(patterns: &[String], root: &Path) -> Option { - for pattern in patterns { - let full_pattern = root.join(pattern); - let pattern_str = full_pattern.to_string_lossy(); - - if let Ok(paths) = glob::glob(&pattern_str) { - for entry in paths.flatten() { - if entry.is_file() { - return Some(entry); - } - } - } - } - None -} - -fn resolve_relative(import_path: &str, from_file: &Path, lang: &str) -> Option { - let from_dir = from_file.parent()?; - let clean = import_path.trim_matches(|c| c == '"' || c == '\'' || c == '<' || c == '>'); - - match lang { - "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" => { - if is_system_header(import_path) { - return None; - } - let candidate = from_dir.join(clean); - if candidate.exists() { - return Some(candidate); - } - let parent = from_dir.parent()?; - let candidate = parent.join(clean); - if candidate.exists() { - return Some(candidate); - } - } - "typescript" | "ts" | "tsx" | "javascript" | "js" | "mjs" | "cjs" | "jsx" => { - if !clean.starts_with('.') { - return None; - } - let base = from_dir.join(clean.trim_start_matches("./")); - for ext in &["ts", "tsx", "js", "jsx"] { - let candidate = base.with_extension(ext); - if candidate.exists() { - return Some(candidate); - } - } - let index = base.join("index"); - for ext in &["ts", "tsx", "js", "jsx"] { - let candidate = index.with_extension(ext); - if candidate.exists() { - return Some(candidate); - } - } - } - "zig" => { - if clean.ends_with(".zig") || clean.contains('/') { - let candidate = from_dir.join(clean); - if candidate.exists() { - return Some(candidate); - } - } - } - _ => {} - } - - None -} - -struct DefinitionPattern { - extensions: &'static [&'static str], - pattern: &'static str, -} - -const DEFINITION_PATTERNS: &[DefinitionPattern] = &[ - DefinitionPattern { - extensions: &["rs"], - pattern: r"fn\s+{NAME}\s*[<(]", - }, - DefinitionPattern { - extensions: &["go"], - pattern: r"func\s+(\([^)]*\)\s*)?{NAME}\s*[\[<(]", - }, - DefinitionPattern { - extensions: &["py"], - pattern: r"def\s+{NAME}\s*\(", - }, - DefinitionPattern { - extensions: &["ts", "tsx", "js", "jsx", "mjs", "cjs"], - pattern: r"(function\s+{NAME}|const\s+{NAME}\s*=|let\s+{NAME}\s*=|{NAME}\s*\([^)]*\)\s*\{)", - }, - DefinitionPattern { - extensions: &["java", "scala"], - pattern: r"(void|int|String|boolean|public|private|protected|static|def)\s+{NAME}\s*[<(]", - }, - DefinitionPattern { - extensions: &["c", "cpp", "cc", "cxx", "h", "hpp"], - pattern: r"\b\w+[\s*]+{NAME}\s*\(", - }, - DefinitionPattern { - extensions: &["zig"], - pattern: r"(fn|pub fn)\s+{NAME}\s*\(", - }, - DefinitionPattern { - extensions: &["sh", "bash"], - pattern: r"(function\s+{NAME}|{NAME}\s*\(\s*\))", - }, -]; - -pub fn resolve_same_file(callee: &str, file: &Path, index: &Index) -> Option { - let record = index.get(file)?; - record - .definitions - .iter() - .find(|d| d.name == callee) - .cloned() -} - -pub fn resolve_by_index(callee: &str, index: &Index) -> Option { - index.definitions().find(|d| d.name == callee).cloned() -} - -fn search_file_for_def(callee: &str, file: &Path) -> Option { - use grep::regex::RegexMatcher; - use grep::searcher::sinks::UTF8; - use grep::searcher::Searcher; - - let ext = file.extension().and_then(|e| e.to_str()).unwrap_or(""); - let escaped = regex::escape(callee); - - for pattern_def in DEFINITION_PATTERNS { - if !pattern_def.extensions.contains(&ext) { - continue; - } - - let pattern = pattern_def.pattern.replace("{NAME}", &escaped); - let matcher = match RegexMatcher::new(&pattern) { - Ok(m) => m, - Err(_) => continue, - }; - - let mut found: Option = None; - - let _ = Searcher::new().search_path( - &matcher, - file, - UTF8(|line_num, _line| { - found = Some(line_num); - Ok(false) - }), - ); - - if let Some(line_num) = found { - return Some(Definition { - name: callee.to_string(), - kind: DefinitionKind::Function, - span: Span { - start_byte: 0, - end_byte: 0, - start_line: line_num as usize, - end_line: line_num as usize, - }, - file: file.to_path_buf(), - }); - } - } - - None -} - -pub fn resolve_by_search(callee: &str, root: &Path) -> Result> { - use grep::regex::RegexMatcher; - use grep::searcher::sinks::UTF8; - use grep::searcher::Searcher; - - let escaped = regex::escape(callee); - - for pattern_def in DEFINITION_PATTERNS { - let pattern = pattern_def.pattern.replace("{NAME}", &escaped); - - let matcher = match RegexMatcher::new(&pattern) { - Ok(m) => m, - Err(_) => continue, - }; - - for entry in walkdir::WalkDir::new(root) - .into_iter() - .filter_map(|e| e.ok()) - .filter(|e| e.file_type().is_file()) - { - let path = entry.path(); - - let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); - - if !pattern_def.extensions.contains(&ext) { - continue; - } - - let mut found: Option<(u64, PathBuf)> = None; - - let _ = Searcher::new().search_path( - &matcher, - path, - UTF8(|line_num, _line| { - found = Some((line_num, path.to_path_buf())); - Ok(false) - }), - ); - - if let Some((line_num, file_path)) = found { - return Ok(Some(Definition { - name: callee.to_string(), - kind: DefinitionKind::Function, - span: Span { - start_byte: 0, - end_byte: 0, - start_line: line_num as usize, - end_line: line_num as usize, - }, - file: file_path, - })); - } - } - } - - Ok(None) -} - -pub struct Resolver<'a> { - index: &'a Index, - root: PathBuf, - discovered_files: RefCell>, -} - -impl<'a> Resolver<'a> { - pub fn new(index: &'a Index, root: PathBuf) -> Self { - Self { - index, - root, - discovered_files: RefCell::new(HashSet::new()), - } - } - - pub fn resolve(&self, callee: &str, from_file: &Path) -> Result> { - if let Some(def) = resolve_same_file(callee, from_file, self.index) { - return Ok(Some(def)); - } - - if let Some(def) = resolve_by_index(callee, self.index) { - return Ok(Some(def)); - } - - if let Some(def) = self.resolve_via_imports(callee, from_file) { - return Ok(Some(def)); - } - - if let Some(def) = resolve_by_search(callee, &self.root)? { - self.discovered_files.borrow_mut().insert(def.file.clone()); - return Ok(Some(def)); - } - - Ok(None) - } - - pub fn files_to_index(&self) -> Vec { - self.discovered_files.borrow().iter().cloned().collect() - } - - pub fn clear_discovered(&self) { - self.discovered_files.borrow_mut().clear(); - } - - fn resolve_via_imports(&self, callee: &str, from_file: &Path) -> Option { - let record = self.index.get(from_file)?; - let ext = from_file.extension().and_then(|e| e.to_str()).unwrap_or(""); - - for import in &record.imports { - if !self.import_matches_callee(&import.module_path, callee, ext) { - continue; - } - - if let Some(resolved) = resolve_relative(&import.module_path, from_file, ext) { - self.discovered_files.borrow_mut().insert(resolved.clone()); - if let Some(def) = self.find_def_in_file(&resolved, callee) { - return Some(def); - } - if let Some(def) = search_file_for_def(callee, &resolved) { - return Some(def); - } - } - - let patterns = normalize_to_patterns(&import.module_path, ext); - if let Some(resolved) = search_patterns(&patterns, &self.root) { - self.discovered_files.borrow_mut().insert(resolved.clone()); - if let Some(def) = self.find_def_in_file(&resolved, callee) { - return Some(def); - } - if let Some(def) = search_file_for_def(callee, &resolved) { - return Some(def); - } - } - } - - None - } - - fn import_matches_callee(&self, module_path: &str, callee: &str, lang: &str) -> bool { - let clean = module_path.trim_matches(|c| c == '"' || c == '\'' || c == '<' || c == '>'); - - match lang { - "rs" => { - let parts: Vec<&str> = clean.split("::").collect(); - parts.last().map(|s| *s == callee).unwrap_or(false) - } - "py" => { - let parts: Vec<&str> = clean.split('.').collect(); - parts.last().map(|s| *s == callee).unwrap_or(false) - } - "go" => { - let parts: Vec<&str> = clean.split('/').collect(); - parts.last().map(|s| *s == callee).unwrap_or(false) - } - "java" | "scala" | "sc" => { - let parts: Vec<&str> = clean.split('.').collect(); - parts - .last() - .map(|s| *s == callee || *s == "*" || *s == "_") - .unwrap_or(false) - } - "ts" | "tsx" | "js" | "jsx" | "mjs" | "cjs" => true, - "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" => true, - "zig" => true, - _ => true, - } - } - - fn find_def_in_file(&self, file: &Path, name: &str) -> Option { - let record = self.index.get(file)?; - record.definitions.iter().find(|d| d.name == name).cloned() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::fs; - use tempfile::TempDir; - - #[test] - fn test_normalize_rust_import() { - let patterns = normalize_rust_import("crate::foo::bar"); - assert!(patterns.iter().any(|p| p.contains("foo/bar.rs"))); - assert!(patterns.iter().any(|p| p.contains("foo/bar/mod.rs"))); - } - - #[test] - fn test_normalize_python_import() { - let patterns = normalize_python_import("mypackage.utils.helper"); - assert!(patterns - .iter() - .any(|p| p.contains("mypackage/utils/helper.py"))); - assert!(patterns - .iter() - .any(|p| p.contains("mypackage/utils/helper/__init__.py"))); - } - - #[test] - fn test_normalize_go_import() { - let patterns = normalize_go_import("github.com/user/repo/pkg/utils"); - assert!(patterns.iter().any(|p| p.contains("pkg/utils"))); - } - - #[test] - fn test_normalize_js_import_relative() { - let patterns = normalize_js_import("./components/Button"); - assert!(patterns.iter().any(|p| p.contains("components/Button.ts"))); - assert!(patterns - .iter() - .any(|p| p.contains("components/Button/index.ts"))); - } - - #[test] - fn test_normalize_js_import_alias() { - let patterns = normalize_js_import("@/components/Button"); - assert!(patterns.iter().any(|p| p.contains("components/Button.ts"))); - } - - #[test] - fn test_normalize_c_import() { - let patterns = normalize_c_import("utils/helper.h"); - assert!(patterns.iter().any(|p| p.contains("utils/helper.h"))); - } - - #[test] - fn test_normalize_c_import_system_skipped() { - let patterns = normalize_c_import(""); - assert!(patterns.is_empty()); - } - - #[test] - fn test_normalize_java_import() { - let patterns = normalize_java_import("com.example.utils.Helper"); - assert!(patterns - .iter() - .any(|p| p.contains("com/example/utils/Helper.java"))); - } - - #[test] - fn test_normalize_java_import_stdlib_skipped() { - let patterns = normalize_java_import("java.util.List"); - assert!(patterns.is_empty()); - } - - #[test] - fn test_is_system_header() { - assert!(is_system_header("")); - assert!(is_system_header("")); - assert!(is_system_header("")); - assert!(is_system_header("")); - assert!(!is_system_header("\"myheader.h\"")); - assert!(!is_system_header("\"utils/helper.h\"")); - } - - #[test] - fn test_search_patterns() { - let dir = TempDir::new().unwrap(); - let src = dir.path().join("src"); - fs::create_dir_all(&src).unwrap(); - fs::write(src.join("helper.rs"), "fn helper() {}").unwrap(); - - let patterns = vec!["**/helper.rs".to_string()]; - let found = search_patterns(&patterns, dir.path()); - assert!(found.is_some()); - assert!(found.unwrap().ends_with("helper.rs")); - } - - #[test] - fn test_resolve_same_file() { - use super::super::index::{Definition, DefinitionKind, FileRecord, Span}; - - let mut index = Index::new(); - let file = PathBuf::from("src/main.rs"); - - index.update(FileRecord { - path: file.clone(), - mtime: 0, - size: 0, - definitions: vec![Definition { - name: "foo".to_string(), - kind: DefinitionKind::Function, - span: Span { - start_byte: 0, - end_byte: 10, - start_line: 1, - end_line: 3, - }, - file: file.clone(), - }], - calls: vec![], - imports: vec![], - }); - - let found = resolve_same_file("foo", &file, &index); - assert!(found.is_some()); - assert_eq!(found.unwrap().name, "foo"); - - let not_found = resolve_same_file("bar", &file, &index); - assert!(not_found.is_none()); - } - - #[test] - fn test_resolve_by_index() { - use super::super::index::{Definition, DefinitionKind, FileRecord, Span}; - - let mut index = Index::new(); - - index.update(FileRecord { - path: PathBuf::from("src/a.rs"), - mtime: 0, - size: 0, - definitions: vec![Definition { - name: "alpha".to_string(), - kind: DefinitionKind::Function, - span: Span { - start_byte: 0, - end_byte: 10, - start_line: 1, - end_line: 3, - }, - file: PathBuf::from("src/a.rs"), - }], - calls: vec![], - imports: vec![], - }); - - let found = resolve_by_index("alpha", &index); - assert!(found.is_some()); - - let not_found = resolve_by_index("gamma", &index); - assert!(not_found.is_none()); - } - - #[test] - fn test_resolver_prefers_same_file() { - use super::super::index::{Definition, DefinitionKind, FileRecord, Span}; - - let mut index = Index::new(); - let file_a = PathBuf::from("src/a.rs"); - let file_b = PathBuf::from("src/b.rs"); - - index.update(FileRecord { - path: file_a.clone(), - mtime: 0, - size: 0, - definitions: vec![Definition { - name: "foo".to_string(), - kind: DefinitionKind::Function, - span: Span { - start_byte: 0, - end_byte: 10, - start_line: 1, - end_line: 3, - }, - file: file_a.clone(), - }], - calls: vec![], - imports: vec![], - }); - - index.update(FileRecord { - path: file_b.clone(), - mtime: 0, - size: 0, - definitions: vec![Definition { - name: "foo".to_string(), - kind: DefinitionKind::Function, - span: Span { - start_byte: 0, - end_byte: 10, - start_line: 10, - end_line: 12, - }, - file: file_b.clone(), - }], - calls: vec![], - imports: vec![], - }); - - let resolver = Resolver::new(&index, PathBuf::from(".")); - - let found = resolver.resolve("foo", &file_a).unwrap(); - assert!(found.is_some()); - assert_eq!(found.unwrap().file, file_a); - - let found = resolver.resolve("foo", &file_b).unwrap(); - assert!(found.is_some()); - assert_eq!(found.unwrap().file, file_b); - } - - #[test] - fn test_resolve_by_search_rust() { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("lib.rs"), - "pub fn my_function() {\n println!(\"hello\");\n}\n", - ) - .unwrap(); - - let found = resolve_by_search("my_function", dir.path()).unwrap(); - assert!(found.is_some()); - let def = found.unwrap(); - assert_eq!(def.name, "my_function"); - assert_eq!(def.span.start_line, 1); - } - - #[test] - fn test_resolve_by_search_python() { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("utils.py"), - "def helper_func():\n pass\n", - ) - .unwrap(); - - let found = resolve_by_search("helper_func", dir.path()).unwrap(); - assert!(found.is_some()); - let def = found.unwrap(); - assert_eq!(def.name, "helper_func"); - } - - #[test] - fn test_resolve_via_imports_with_glob() { - use super::super::index::{Definition, DefinitionKind, FileRecord, Import, Span}; - - let dir = TempDir::new().unwrap(); - let utils_dir = dir.path().join("src/utils"); - fs::create_dir_all(&utils_dir).unwrap(); - fs::write(utils_dir.join("helper.rs"), "pub fn helper() {}").unwrap(); - - let mut index = Index::new(); - let main_file = dir.path().join("src/main.rs"); - - index.update(FileRecord { - path: utils_dir.join("helper.rs"), - mtime: 0, - size: 0, - definitions: vec![Definition { - name: "helper".to_string(), - kind: DefinitionKind::Function, - span: Span { - start_byte: 0, - end_byte: 20, - start_line: 1, - end_line: 1, - }, - file: utils_dir.join("helper.rs"), - }], - calls: vec![], - imports: vec![], - }); - - index.update(FileRecord { - path: main_file.clone(), - mtime: 0, - size: 0, - definitions: vec![], - calls: vec![], - imports: vec![Import { - module_path: "crate::utils::helper".to_string(), - alias: None, - span: Span { - start_byte: 0, - end_byte: 25, - start_line: 1, - end_line: 1, - }, - file: main_file.clone(), - }], - }); - - let resolver = Resolver::new(&index, dir.path().to_path_buf()); - let found = resolver.resolve("helper", &main_file).unwrap(); - - assert!(found.is_some()); - assert_eq!(found.unwrap().name, "helper"); - } -} diff --git a/src/main.rs b/src/main.rs index 4f5a8f7..34294ea 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ use std::path::{Path, PathBuf}; use anyhow::{bail, Context, Result}; use indicatif::{ProgressBar, ProgressStyle}; +use rayon::prelude::*; use crate::analyzer::process_directory; use crate::cli::{Cli, CodeArgs, Commands, FunctionTarget, IndexCommand}; @@ -271,7 +272,7 @@ fn handle_code_command(args: &CodeArgs) -> Result<()> { save_index(&index, &root)?; } - let graph = CallGraph::build(&index, &root); + let graph = CallGraph::build(&index); let node_id = if let Some(ref file) = target.file { let file_path = root.join(file); @@ -293,15 +294,15 @@ fn handle_code_command(args: &CodeArgs) -> Result<()> { ); }; + let depth = args.depth.unwrap_or(1); + let definitions = if args.callers { - let callers = graph.get_transitive_callers(node_id); - let mut defs: Vec<_> = callers.iter().map(|n| &n.definition).collect(); - if let Some(node) = graph.get_node(node_id) { - defs.push(&node.definition); - } - defs + graph.get_callers_to_depth(node_id, depth) + .into_iter() + .filter_map(|id| graph.get_node(id).map(|n| &n.definition)) + .collect() } else { - graph.post_order_definitions(node_id) + graph.definitions_to_depth(node_id, depth) }; let output = format_definitions(&definitions, &root)?; @@ -377,6 +378,8 @@ fn handle_index_command(cmd: &IndexCommand) -> Result<()> { Ok(()) } +const INDEX_CHUNK_SIZE: usize = 256; + fn index_directory(root: &Path, index: &mut Index) -> Result { let pb = ProgressBar::new_spinner(); pb.set_style( @@ -400,70 +403,73 @@ fn index_directory(root: &Path, index: &mut Index) -> Result { }) .collect(); - let total = source_files.len(); pb.finish_and_clear(); + let stale_files: Vec<_> = source_files + .into_iter() + .filter_map(|entry| { + let path = entry.path(); + let rel_path = path.strip_prefix(root).unwrap_or(path); + let ext = path.extension().and_then(|e| e.to_str())?; + if ext.is_empty() { + return None; + } + let (mtime, size) = file_fingerprint(path).ok()?; + if index.is_stale(rel_path, mtime, size) { + Some((path.to_path_buf(), rel_path.to_path_buf(), ext.to_string(), mtime, size)) + } else { + None + } + }) + .collect(); + + let total = stale_files.len(); + if total == 0 { + return Ok(0); + } + let pb = ProgressBar::new(total as u64); pb.set_style( ProgressStyle::default_bar() - .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} {msg}") + .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len}") .expect("valid template") .progress_chars("#>-"), ); let mut updated = 0; - for entry in source_files { - let path = entry.path(); - let rel_path = path.strip_prefix(root).unwrap_or(path); + for chunk in stale_files.chunks(INDEX_CHUNK_SIZE) { + let records: Vec = chunk + .par_iter() + .filter_map(|(path, rel_path, ext, mtime, size)| { + let extractor = Extractor::from_extension(ext).ok()?; + let source = fs::read(path).ok()?; - pb.set_message(format!("{}", rel_path.display())); + let mut parser = tree_sitter::Parser::new(); + parser.set_language(extractor.language()).ok()?; + let tree = parser.parse(&source, None)?; - let ext = path - .extension() - .and_then(|e| e.to_str()) - .unwrap_or(""); + let definitions = extractor.extract_definitions(&tree, &source, rel_path); + let calls = extractor.extract_calls(&tree, &source, rel_path); + let imports = extractor.extract_imports(&tree, &source, rel_path); - let (mtime, size) = file_fingerprint(path)?; - - if !index.is_stale(rel_path, mtime, size) { - pb.inc(1); - continue; - } - - let extractor = match Extractor::from_extension(ext) { - Ok(e) => e, - Err(_) => { pb.inc(1); - continue; - } - }; - let source = fs::read(path).with_context(|| format!("failed to read: {}", path.display()))?; - - let mut parser = tree_sitter::Parser::new(); - parser.set_language(extractor.language())?; - - let Some(tree) = parser.parse(&source, None) else { - pb.inc(1); - continue; - }; - - let definitions = extractor.extract_definitions(&tree, &source, rel_path); - let calls = extractor.extract_calls(&tree, &source, rel_path); - let imports = extractor.extract_imports(&tree, &source, rel_path); - - index.update(FileRecord { - path: rel_path.to_path_buf(), - mtime, - size, - definitions, - calls, - imports, - }); - - updated += 1; - pb.inc(1); + Some(FileRecord { + path: rel_path.to_path_buf(), + mtime: *mtime, + size: *size, + definitions, + calls, + imports, + }) + }) + .collect(); + + updated += records.len(); + for record in records { + index.update(record); + } } pb.finish_and_clear(); diff --git a/tests/integration.rs b/tests/integration.rs index 999d434..b0a64f3 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -5,7 +5,6 @@ use std::path::Path; use glimpse::code::extract::Extractor; use glimpse::code::graph::CallGraph; use glimpse::code::index::{file_fingerprint, FileRecord, Index}; -use glimpse::code::resolve::{resolve_by_index, resolve_by_search, resolve_same_file, Resolver}; use tree_sitter::Parser; fn index_file(index: &mut Index, extractor: &Extractor, path: &Path, source: &str) { @@ -27,341 +26,7 @@ fn index_file(index: &mut Index, extractor: &Extractor, path: &Path, source: &st index.update(record); } -mod resolver_tests { - use super::*; - use glimpse::code::index::{Call, Definition, DefinitionKind, FileRecord, Import, Span}; - use std::path::PathBuf; - use tempfile::TempDir; - - fn make_span() -> Span { - Span { - start_byte: 0, - end_byte: 10, - start_line: 1, - end_line: 1, - } - } - - fn make_def(name: &str, file: &Path) -> Definition { - Definition { - name: name.to_string(), - kind: DefinitionKind::Function, - span: make_span(), - file: file.to_path_buf(), - } - } - - #[test] - fn test_resolve_same_file_priority() { - let mut index = Index::new(); - let file_a = PathBuf::from("src/a.rs"); - let file_b = PathBuf::from("src/b.rs"); - - index.update(FileRecord { - path: file_a.clone(), - mtime: 0, - size: 0, - definitions: vec![make_def("helper", &file_a)], - calls: vec![], - imports: vec![], - }); - - index.update(FileRecord { - path: file_b.clone(), - mtime: 0, - size: 0, - definitions: vec![make_def("helper", &file_b)], - calls: vec![], - imports: vec![], - }); - - let from_a = resolve_same_file("helper", &file_a, &index); - assert!(from_a.is_some()); - assert_eq!(from_a.unwrap().file, file_a); - - let from_b = resolve_same_file("helper", &file_b, &index); - assert!(from_b.is_some()); - assert_eq!(from_b.unwrap().file, file_b); - - let not_found = resolve_same_file("nonexistent", &file_a, &index); - assert!(not_found.is_none()); - } - - #[test] - fn test_resolve_by_index_cross_file() { - let mut index = Index::new(); - let file_a = PathBuf::from("src/a.rs"); - let file_b = PathBuf::from("src/b.rs"); - - index.update(FileRecord { - path: file_a.clone(), - mtime: 0, - size: 0, - definitions: vec![make_def("func_a", &file_a)], - calls: vec![], - imports: vec![], - }); - - index.update(FileRecord { - path: file_b.clone(), - mtime: 0, - size: 0, - definitions: vec![make_def("func_b", &file_b)], - calls: vec![], - imports: vec![], - }); - - let found_a = resolve_by_index("func_a", &index); - assert!(found_a.is_some()); - assert_eq!(found_a.unwrap().file, file_a); - - let found_b = resolve_by_index("func_b", &index); - assert!(found_b.is_some()); - assert_eq!(found_b.unwrap().file, file_b); - } - - #[test] - fn test_resolve_by_search_rust() { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("lib.rs"), - "pub fn my_searched_function() {\n println!(\"found\");\n}\n", - ) - .unwrap(); - - let found = resolve_by_search("my_searched_function", dir.path()).unwrap(); - assert!(found.is_some()); - let def = found.unwrap(); - assert_eq!(def.name, "my_searched_function"); - assert!(def.file.ends_with("lib.rs")); - } - - #[test] - fn test_resolve_by_search_python() { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("utils.py"), - "def searched_python_func():\n pass\n", - ) - .unwrap(); - - let found = resolve_by_search("searched_python_func", dir.path()).unwrap(); - assert!(found.is_some()); - assert_eq!(found.unwrap().name, "searched_python_func"); - } - - #[test] - fn test_resolve_by_search_go() { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("main.go"), - "package main\n\nfunc searchedGoFunc() {\n}\n", - ) - .unwrap(); - - let found = resolve_by_search("searchedGoFunc", dir.path()).unwrap(); - assert!(found.is_some()); - assert_eq!(found.unwrap().name, "searchedGoFunc"); - } - - #[test] - fn test_resolve_by_search_typescript() { - let dir = TempDir::new().unwrap(); - - fs::write( - dir.path().join("index.ts"), - "function searchedTsFunc() {\n return 42;\n}\n", - ) - .unwrap(); - - let found = resolve_by_search("searchedTsFunc", dir.path()).unwrap(); - assert!(found.is_some()); - assert_eq!(found.unwrap().name, "searchedTsFunc"); - } - - #[test] - fn test_resolve_by_search_not_found() { - let dir = TempDir::new().unwrap(); - - fs::write(dir.path().join("empty.rs"), "// no functions here\n").unwrap(); - - let found = resolve_by_search("nonexistent_function", dir.path()).unwrap(); - assert!(found.is_none()); - } - - #[test] - fn test_resolver_resolution_chain() { - let dir = TempDir::new().unwrap(); - let mut index = Index::new(); - - let file_main = dir.path().join("main.rs"); - let file_utils = dir.path().join("utils.rs"); - - fs::write(&file_main, "fn main() { helper(); }").unwrap(); - fs::write(&file_utils, "pub fn helper() { nested(); }\npub fn nested() {}").unwrap(); - - index.update(FileRecord { - path: file_main.clone(), - mtime: 0, - size: 0, - definitions: vec![make_def("main", &file_main)], - calls: vec![Call { - callee: "helper".to_string(), - caller: Some("main".to_string()), - span: make_span(), - file: file_main.clone(), - }], - imports: vec![], - }); - - index.update(FileRecord { - path: file_utils.clone(), - mtime: 0, - size: 0, - definitions: vec![make_def("helper", &file_utils), make_def("nested", &file_utils)], - calls: vec![], - imports: vec![], - }); - - let resolver = Resolver::new(&index, dir.path().to_path_buf()); - - let found = resolver.resolve("helper", &file_main).unwrap(); - assert!(found.is_some()); - assert_eq!(found.unwrap().file, file_utils); - - let same_file = resolver.resolve("nested", &file_utils).unwrap(); - assert!(same_file.is_some()); - assert_eq!(same_file.unwrap().file, file_utils); - } - - #[test] - fn test_resolver_grep_fallback() { - let dir = TempDir::new().unwrap(); - let index = Index::new(); - - fs::write( - dir.path().join("hidden.rs"), - "fn not_indexed_function() {\n println!(\"hidden\");\n}\n", - ) - .unwrap(); - - let resolver = Resolver::new(&index, dir.path().to_path_buf()); - let from_file = dir.path().join("caller.rs"); - - let found = resolver.resolve("not_indexed_function", &from_file).unwrap(); - assert!(found.is_some()); - assert_eq!(found.unwrap().name, "not_indexed_function"); - - let discovered = resolver.files_to_index(); - assert!(!discovered.is_empty()); - } - - #[test] - fn test_resolver_tracks_discovered_files() { - let dir = TempDir::new().unwrap(); - let index = Index::new(); - - fs::write(dir.path().join("a.rs"), "fn discovered_a() {}").unwrap(); - fs::write(dir.path().join("b.rs"), "fn discovered_b() {}").unwrap(); - - let resolver = Resolver::new(&index, dir.path().to_path_buf()); - let from_file = dir.path().join("main.rs"); - - resolver.resolve("discovered_a", &from_file).unwrap(); - resolver.resolve("discovered_b", &from_file).unwrap(); - - let discovered = resolver.files_to_index(); - assert_eq!(discovered.len(), 2); - - resolver.clear_discovered(); - assert!(resolver.files_to_index().is_empty()); - } - - #[test] - fn test_resolver_with_imports() { - let dir = TempDir::new().unwrap(); - let src = dir.path().join("src"); - fs::create_dir_all(&src).unwrap(); - - fs::write(src.join("utils.rs"), "pub fn imported_helper() {}").unwrap(); - - let mut index = Index::new(); - let main_file = dir.path().join("src/main.rs"); - - index.update(FileRecord { - path: src.join("utils.rs"), - mtime: 0, - size: 0, - definitions: vec![make_def("imported_helper", &src.join("utils.rs"))], - calls: vec![], - imports: vec![], - }); - - index.update(FileRecord { - path: main_file.clone(), - mtime: 0, - size: 0, - definitions: vec![], - calls: vec![], - imports: vec![Import { - module_path: "crate::utils::imported_helper".to_string(), - alias: None, - span: make_span(), - file: main_file.clone(), - }], - }); - - let resolver = Resolver::new(&index, dir.path().to_path_buf()); - - let found = resolver.resolve("imported_helper", &main_file).unwrap(); - assert!(found.is_some()); - assert_eq!(found.unwrap().name, "imported_helper"); - } - - #[test] - fn test_import_discovery_tracks_files_for_reindexing() { - let dir = TempDir::new().unwrap(); - let src = dir.path().join("src"); - let utils_dir = src.join("utils"); - fs::create_dir_all(&utils_dir).unwrap(); - - fs::write(utils_dir.join("helper.rs"), "pub fn helper() {}").unwrap(); - - let mut index = Index::new(); - let main_file = src.join("main.rs"); - - index.update(FileRecord { - path: main_file.clone(), - mtime: 0, - size: 0, - definitions: vec![make_def("main", &main_file)], - calls: vec![], - imports: vec![Import { - module_path: "crate::utils::helper".to_string(), - alias: None, - span: make_span(), - file: main_file.clone(), - }], - }); - - let resolver = Resolver::new(&index, dir.path().to_path_buf()); - - let found = resolver.resolve("helper", &main_file).unwrap(); - assert!(found.is_some(), "grep fallback should find unindexed definition"); - assert_eq!(found.unwrap().name, "helper"); - - let discovered = resolver.files_to_index(); - assert!( - discovered.iter().any(|p| p.ends_with("helper.rs")), - "should track helper.rs for re-indexing" - ); - } -} - -mod call_graph_resolution { +mod call_graph_tests { use super::*; use glimpse::code::index::{Call, Definition, DefinitionKind, FileRecord, Span}; use tempfile::TempDir; @@ -418,7 +83,7 @@ mod call_graph_resolution { imports: vec![], }); - let graph = CallGraph::build(&index, dir.path()); + let graph = CallGraph::build(&index); let caller_id = graph.find_node("caller").unwrap(); let callees = graph.get_callees(caller_id); @@ -428,40 +93,6 @@ mod call_graph_resolution { assert_eq!(callees[0].definition.file, file_b); } - #[test] - fn test_graph_uses_grep_fallback_for_unindexed() { - let dir = TempDir::new().unwrap(); - let file_caller = dir.path().join("caller.rs"); - let file_hidden = dir.path().join("hidden.rs"); - - fs::write(&file_caller, "fn caller() { hidden_func(); }").unwrap(); - fs::write(&file_hidden, "fn hidden_func() {}").unwrap(); - - let mut index = Index::new(); - - index.update(FileRecord { - path: file_caller.clone(), - mtime: 0, - size: 0, - definitions: vec![make_def("caller", &file_caller)], - calls: vec![Call { - callee: "hidden_func".to_string(), - caller: Some("caller".to_string()), - span: make_span(), - file: file_caller.clone(), - }], - imports: vec![], - }); - - let graph = CallGraph::build(&index, dir.path()); - - let caller_id = graph.find_node("caller").unwrap(); - let callees = graph.get_callees(caller_id); - - assert_eq!(callees.len(), 1); - assert_eq!(callees[0].definition.name, "hidden_func"); - } - #[test] fn test_graph_same_name_different_files() { let dir = TempDir::new().unwrap(); @@ -507,7 +138,7 @@ mod call_graph_resolution { imports: vec![], }); - let graph = CallGraph::build(&index, dir.path()); + let graph = CallGraph::build(&index); assert_eq!(graph.node_count(), 3); @@ -568,7 +199,7 @@ mod call_graph_resolution { imports: vec![], }); - let graph = CallGraph::build(&index, dir.path()); + let graph = CallGraph::build(&index); let entry_id = graph.find_node("entry").unwrap(); let transitive = graph.get_transitive_callees(entry_id); @@ -609,7 +240,7 @@ mod call_graph_resolution { imports: vec![], }); - let graph = CallGraph::build(&index, dir.path()); + let graph = CallGraph::build(&index); let caller_id = graph.find_node("caller").unwrap(); let callees = graph.get_callees(caller_id); @@ -682,7 +313,7 @@ fn write_file(_data: &str) {} index_file(&mut index, &extractor, &src.join("main.rs"), main_rs); index_file(&mut index, &extractor, &src.join("utils.rs"), utils_rs); - let graph = CallGraph::build(&index, dir.path()); + let graph = CallGraph::build(&index); assert!(graph.node_count() >= 5); @@ -740,7 +371,7 @@ def format_output(s): index_file(&mut index, &extractor, &dir.path().join("main.py"), main_py); index_file(&mut index, &extractor, &dir.path().join("utils.py"), utils_py); - let graph = CallGraph::build(&index, dir.path()); + let graph = CallGraph::build(&index); if let Some(main_id) = graph.find_node("main") { let transitive = graph.get_transitive_callees(main_id); @@ -792,7 +423,7 @@ function format(s: string): string { index_file(&mut index, &extractor, &dir.path().join("main.ts"), main_ts); index_file(&mut index, &extractor, &dir.path().join("utils.ts"), utils_ts); - let graph = CallGraph::build(&index, dir.path()); + let graph = CallGraph::build(&index); if let Some(main_id) = graph.find_node("main") { let callees = graph.get_callees(main_id); @@ -838,7 +469,7 @@ type Config struct { index_file(&mut index, &extractor, &dir.path().join("main.go"), main_go); - let graph = CallGraph::build(&index, dir.path()); + let graph = CallGraph::build(&index); if let Some(main_id) = graph.find_node("main") { let transitive = graph.get_transitive_callees(main_id); From b63f09662daf6ffc1464b30c2e41571247ab35c8 Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 02:34:04 -0800 Subject: [PATCH 28/35] feat: add import-aware call resolution with --strict mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - add Resolver with prioritized resolution: same-file → imports → global fallback - capture call qualifiers in tree-sitter queries for future type-aware resolution - add --strict flag to disable global name matching (reduces false positives) - global fallback enabled by default for broader coverage --- registry.toml | 49 +++-- src/cli.rs | 8 +- src/code/extract.rs | 8 + src/code/graph.rs | 41 +++-- src/code/index.rs | 4 +- src/code/mod.rs | 1 + src/code/resolve.rs | 426 +++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 25 ++- tests/integration.rs | 24 ++- 9 files changed, 547 insertions(+), 39 deletions(-) create mode 100644 src/code/resolve.rs diff --git a/registry.toml b/registry.toml index 3fe04dd..7a1bc01 100644 --- a/registry.toml +++ b/registry.toml @@ -25,8 +25,12 @@ call_query = """ function: [ (identifier) @name (parenthesized_expression (identifier) @name) - (selector_expression field: (field_identifier) @name) - (parenthesized_expression (selector_expression field: (field_identifier) @name)) + (selector_expression + operand: (_) @qualifier + field: (field_identifier) @name) + (parenthesized_expression (selector_expression + operand: (_) @qualifier + field: (field_identifier) @name)) ]) @reference.call """ import_query = """ @@ -61,7 +65,9 @@ call_query = """ (call_expression function: [ (identifier) @name - (field_expression member: (identifier) @name) + (field_expression + operand: (_) @qualifier + member: (identifier) @name) ]) @reference.call """ import_query = """ @@ -99,7 +105,9 @@ call_query = """ (call_expression function: [ (identifier) @name - (field_expression field: (field_identifier) @name) + (field_expression + argument: (_) @qualifier + field: (field_identifier) @name) (parenthesized_expression (identifier) @name) ]) @reference.call """ @@ -178,9 +186,13 @@ call_query = """ (call_expression function: [ (identifier) @name - (qualified_identifier name: (identifier) @name) + (qualified_identifier + scope: (_) @qualifier + name: (identifier) @name) (template_function name: (identifier) @name) - (field_expression field: (field_identifier) @name) + (field_expression + argument: (_) @qualifier + field: (field_identifier) @name) ]) @reference.call """ import_query = """ @@ -247,7 +259,9 @@ call_query = """ (call function: [ (identifier) @name - (attribute attribute: (identifier) @name) + (attribute + object: (_) @qualifier + attribute: (identifier) @name) ]) @reference.call """ import_query = """ @@ -309,7 +323,9 @@ call_query = """ (call_expression function: [ (identifier) @name - (member_expression property: (property_identifier) @name) + (member_expression + object: (_) @qualifier + property: (property_identifier) @name) ]) @reference.call """ import_query = """ @@ -345,8 +361,12 @@ call_query = """ (call_expression function: [ (identifier) @name - (scoped_identifier name: (identifier) @name) - (field_expression field: (field_identifier) @name) + (scoped_identifier + path: (_) @qualifier + name: (identifier) @name) + (field_expression + value: (_) @qualifier + field: (field_identifier) @name) ]) @reference.call """ import_query = """ @@ -408,7 +428,9 @@ call_query = """ (call_expression function: [ (identifier) @name - (member_expression property: (property_identifier) @name) + (member_expression + object: (_) @qualifier + property: (property_identifier) @name) ]) @reference.call """ import_query = """ @@ -460,6 +482,7 @@ definition_query = """ """ call_query = """ (method_invocation + object: (_)? @qualifier name: (identifier) @name) @reference.call (object_creation_expression type: (type_identifier) @name) @reference.call @@ -519,7 +542,9 @@ call_query = """ (call_expression function: [ (identifier) @name - (field_expression field: (identifier) @name) + (field_expression + value: (_) @qualifier + field: (identifier) @name) ]) @reference.call """ import_query = """ diff --git a/src/cli.rs b/src/cli.rs index 3ca1f94..e2e4d10 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -80,6 +80,10 @@ pub struct CodeArgs { /// Output file (default: stdout) #[arg(short = 'f', long)] pub file: Option, + + /// Strict mode: only resolve calls via imports (no global name matching) + #[arg(long)] + pub strict: bool, } #[derive(Parser, Debug, Clone)] @@ -231,7 +235,9 @@ impl Cli { cli.max_size = cli.max_size.or(Some(config.max_size)); cli.max_depth = cli.max_depth.or(Some(config.max_depth)); - cli.output = cli.output.or(Some(config.default_output_format.clone().into())); + cli.output = cli + .output + .or(Some(config.default_output_format.clone().into())); if let Some(mut excludes) = cli.exclude.take() { excludes.extend(config.default_excludes.clone()); diff --git a/src/code/extract.rs b/src/code/extract.rs index 09e830a..31995da 100644 --- a/src/code/extract.rs +++ b/src/code/extract.rs @@ -13,6 +13,7 @@ pub struct QuerySet { def_name_idx: u32, def_kind_indices: Vec<(u32, DefinitionKind)>, call_name_idx: u32, + call_qualifier_idx: Option, import_path_indices: Vec, import_alias_idx: Option, } @@ -41,6 +42,7 @@ impl QuerySet { let def_kind_indices = Self::build_definition_kind_indices(&definitions); let call_name_idx = calls.capture_index_for_name("name").unwrap_or(u32::MAX); + let call_qualifier_idx = calls.capture_index_for_name("qualifier"); let (import_path_indices, import_alias_idx) = if let Some(ref q) = imports { let path_indices = ["path", "source", "system_path", "local_path", "module"] @@ -60,6 +62,7 @@ impl QuerySet { def_name_idx, def_kind_indices, call_name_idx, + call_qualifier_idx, import_path_indices, import_alias_idx, }) @@ -169,6 +172,7 @@ impl Extractor { while let Some(m) = matches.next() { let mut callee: Option<&str> = None; + let mut qualifier: Option<&str> = None; let mut call_node: Option = None; for capture in m.captures { @@ -176,6 +180,9 @@ impl Extractor { callee = capture.node.utf8_text(source).ok(); call_node = Some(capture.node); } + if Some(capture.index) == self.queries.call_qualifier_idx { + qualifier = capture.node.utf8_text(source).ok(); + } } if let (Some(callee), Some(node)) = (callee, call_node) { @@ -183,6 +190,7 @@ impl Extractor { calls.push(Call { callee: callee.to_string(), + qualifier: qualifier.map(|s| s.to_string()), span: node_to_span(&node), file: path.to_path_buf(), caller, diff --git a/src/code/graph.rs b/src/code/graph.rs index d1a11cc..21ae7f9 100644 --- a/src/code/graph.rs +++ b/src/code/graph.rs @@ -1,7 +1,8 @@ use std::collections::{HashMap, HashSet, VecDeque}; use std::path::Path; -use super::index::{Call, Definition, Index}; +use super::index::{Definition, Index}; +use super::resolve::Resolver; pub type NodeId = usize; @@ -31,6 +32,11 @@ impl CallGraph { } pub fn build(index: &Index) -> Self { + Self::build_with_options(index, false) + } + + pub fn build_with_options(index: &Index, strict: bool) -> Self { + let resolver = Resolver::with_strict(index, strict); let mut graph = CallGraph::new(); for def in index.definitions() { @@ -38,23 +44,31 @@ impl CallGraph { } for call in index.calls() { - if let Some((caller_id, callee_id)) = Self::link_call(&graph, call) { - graph.add_edge(caller_id, callee_id); - } + let caller_id = call + .caller + .as_ref() + .and_then(|name| graph.find_node_by_file_and_name(&call.file, name)); + + let Some(caller_id) = caller_id else { + continue; + }; + + let callee_def = resolver.resolve(&call.callee, call.qualifier.as_deref(), &call.file); + + let callee_id = if let Some(def) = callee_def { + graph + .find_node_by_file_and_name(&def.file, &def.name) + .unwrap_or_else(|| graph.add_definition(def)) + } else { + continue; + }; + + graph.add_edge(caller_id, callee_id); } graph } - fn link_call(graph: &CallGraph, call: &Call) -> Option<(NodeId, NodeId)> { - let caller_id = call - .caller - .as_ref() - .and_then(|name| graph.find_node_by_file_and_name(&call.file, name))?; - let callee_id = graph.find_node(&call.callee)?; - Some((caller_id, callee_id)) - } - fn add_definition(&mut self, definition: Definition) -> NodeId { let file_key = definition.file.to_string_lossy().to_string(); let composite_key = (file_key, definition.name.clone()); @@ -338,6 +352,7 @@ mod tests { fn make_call(callee: &str, caller: Option<&str>, file: &str) -> Call { Call { callee: callee.to_string(), + qualifier: None, span: make_span(), file: PathBuf::from(file), caller: caller.map(|s| s.to_string()), diff --git a/src/code/index.rs b/src/code/index.rs index 33fa59d..9762ad9 100644 --- a/src/code/index.rs +++ b/src/code/index.rs @@ -10,7 +10,7 @@ use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; pub const INDEX_FILE: &str = "index.bin"; -pub const INDEX_VERSION: u32 = 1; +pub const INDEX_VERSION: u32 = 2; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Span { @@ -44,6 +44,7 @@ pub enum DefinitionKind { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Call { pub callee: String, + pub qualifier: Option, pub span: Span, pub file: PathBuf, pub caller: Option, @@ -210,6 +211,7 @@ mod tests { }], calls: vec![Call { callee: "other_fn".to_string(), + qualifier: None, span: Span { start_byte: 50, end_byte: 60, diff --git a/src/code/mod.rs b/src/code/mod.rs index 2b3e76f..3bc6f18 100644 --- a/src/code/mod.rs +++ b/src/code/mod.rs @@ -2,3 +2,4 @@ pub mod extract; pub mod grammar; pub mod graph; pub mod index; +pub mod resolve; diff --git a/src/code/resolve.rs b/src/code/resolve.rs new file mode 100644 index 0000000..84a8baf --- /dev/null +++ b/src/code/resolve.rs @@ -0,0 +1,426 @@ +use std::path::Path; + +use super::index::{Definition, Index}; + +fn import_to_file_patterns(module_path: &str, lang: &str) -> Vec { + let clean = module_path.trim_matches(|c| c == '"' || c == '\'' || c == '<' || c == '>'); + + match lang { + "rs" => { + let stripped = clean + .trim_start_matches("crate::") + .trim_start_matches("self::") + .trim_start_matches("super::"); + let parts: Vec<&str> = stripped.split("::").filter(|p| !p.is_empty()).collect(); + if parts.is_empty() { + return vec![]; + } + let file_path = parts.join("/"); + vec![ + format!("{}.rs", file_path), + format!("{}/mod.rs", file_path), + format!("src/{}.rs", file_path), + format!("src/{}/mod.rs", file_path), + ] + } + "py" => { + if clean.starts_with('.') { + return vec![]; + } + let parts: Vec<&str> = clean.split('.').collect(); + if parts.is_empty() { + return vec![]; + } + let file_path = parts.join("/"); + vec![ + format!("{}.py", file_path), + format!("{}/__init__.py", file_path), + format!("src/{}.py", file_path), + ] + } + "go" => { + let parts: Vec<&str> = clean.split('/').collect(); + let local_parts: Vec<&str> = if parts.len() >= 3 && parts[0].contains('.') { + parts[3..].to_vec() + } else { + parts + }; + if local_parts.is_empty() { + return vec![]; + } + let dir_path = local_parts.join("/"); + vec![dir_path] + } + "ts" | "tsx" | "js" | "jsx" | "mjs" | "cjs" => { + let base = clean + .trim_start_matches("./") + .trim_start_matches("../") + .trim_start_matches("@/") + .trim_start_matches('@'); + vec![ + format!("{}.ts", base), + format!("{}.tsx", base), + format!("{}.js", base), + format!("{}/index.ts", base), + format!("{}/index.tsx", base), + format!("{}/index.js", base), + ] + } + "java" => { + let file_path = clean.replace('.', "/"); + vec![ + format!("{}.java", file_path), + format!("src/{}.java", file_path), + format!("src/main/java/{}.java", file_path), + ] + } + "scala" | "sc" => { + let trimmed = clean.trim_end_matches("._").trim_end_matches(".*"); + let file_path = trimmed.replace('.', "/"); + vec![format!("{}.scala", file_path), format!("{}.sc", file_path)] + } + "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" => { + vec![ + clean.to_string(), + format!("include/{}", clean), + format!("src/{}", clean), + ] + } + "zig" => { + if clean.ends_with(".zig") || clean.contains('/') { + vec![clean.to_string(), format!("src/{}", clean)] + } else { + vec![format!("{}.zig", clean), format!("src/{}.zig", clean)] + } + } + _ => vec![clean.to_string()], + } +} + +fn import_matches_callee(module_path: &str, callee: &str, lang: &str) -> bool { + let clean = module_path.trim_matches(|c| c == '"' || c == '\'' || c == '<' || c == '>'); + + match lang { + "rs" => { + let parts: Vec<&str> = clean.split("::").collect(); + parts.last().map(|s| *s == callee).unwrap_or(false) + } + "py" => { + let parts: Vec<&str> = clean.split('.').collect(); + parts.last().map(|s| *s == callee).unwrap_or(false) + } + "go" => { + let parts: Vec<&str> = clean.split('/').collect(); + parts.last().map(|s| *s == callee).unwrap_or(false) + } + "java" | "scala" | "sc" => { + let parts: Vec<&str> = clean.split('.').collect(); + parts + .last() + .map(|s| *s == callee || *s == "*" || *s == "_") + .unwrap_or(false) + } + _ => true, + } +} + +fn file_matches_pattern(file_path: &Path, pattern: &str) -> bool { + let file_str = file_path.to_string_lossy(); + + if pattern.contains('/') { + file_str.ends_with(pattern) || file_str.contains(&format!("/{}", pattern)) + } else { + file_path + .file_name() + .and_then(|n| n.to_str()) + .map(|n| n == pattern) + .unwrap_or(false) + } +} + +pub struct Resolver<'a> { + index: &'a Index, + strict: bool, +} + +impl<'a> Resolver<'a> { + pub fn new(index: &'a Index) -> Self { + Self { + index, + strict: false, + } + } + + pub fn with_strict(index: &'a Index, strict: bool) -> Self { + Self { index, strict } + } + + /// Resolve a callee to its definition. + /// + /// Resolution order: + /// 1. Same file - check if callee is defined in the calling file + /// 2. Via imports - use import statements to find the defining file + /// 3. Global fallback (unless strict mode) - search entire index by name + /// + /// Note: Global fallback can produce false positives when multiple functions + /// share the same name (e.g., `parse`). Use strict mode to disable it. + pub fn resolve( + &self, + callee: &str, + _qualifier: Option<&str>, + from_file: &Path, + ) -> Option { + if let Some(def) = self.resolve_same_file(callee, from_file) { + return Some(def); + } + + if let Some(def) = self.resolve_via_imports(callee, from_file) { + return Some(def); + } + + if !self.strict { + return self.resolve_by_index(callee); + } + + None + } + + fn resolve_same_file(&self, callee: &str, file: &Path) -> Option { + let record = self.index.get(file)?; + record + .definitions + .iter() + .find(|d| d.name == callee) + .cloned() + } + + fn resolve_by_index(&self, callee: &str) -> Option { + self.index.definitions().find(|d| d.name == callee).cloned() + } + + fn resolve_via_imports(&self, callee: &str, from_file: &Path) -> Option { + let record = self.index.get(from_file)?; + let ext = from_file.extension().and_then(|e| e.to_str()).unwrap_or(""); + + for import in &record.imports { + if !import_matches_callee(&import.module_path, callee, ext) { + continue; + } + + let patterns = import_to_file_patterns(&import.module_path, ext); + + for indexed_file in self.index.files.keys() { + for pattern in &patterns { + if file_matches_pattern(indexed_file, pattern) { + if let Some(def) = self.find_def_in_file(indexed_file, callee) { + return Some(def); + } + } + } + } + } + + None + } + + fn find_def_in_file(&self, file: &Path, name: &str) -> Option { + let record = self.index.get(file)?; + record.definitions.iter().find(|d| d.name == name).cloned() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::code::index::{Definition, DefinitionKind, FileRecord, Import, Span}; + use std::path::PathBuf; + + fn make_def(name: &str, file: &str) -> Definition { + Definition { + name: name.to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 3, + }, + file: PathBuf::from(file), + } + } + + fn make_import(module_path: &str, file: &str) -> Import { + Import { + module_path: module_path.to_string(), + alias: None, + span: Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 1, + }, + file: PathBuf::from(file), + } + } + + #[test] + fn test_resolve_same_file() { + let mut index = Index::new(); + let file = PathBuf::from("src/main.rs"); + + index.update(FileRecord { + path: file.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("foo", "src/main.rs")], + calls: vec![], + imports: vec![], + }); + + let resolver = Resolver::new(&index); + let found = resolver.resolve("foo", None, &file); + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "foo"); + + let not_found = resolver.resolve("bar", None, &file); + assert!(not_found.is_none()); + } + + #[test] + fn test_resolve_prefers_same_file() { + let mut index = Index::new(); + let file_a = PathBuf::from("src/a.rs"); + let file_b = PathBuf::from("src/b.rs"); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("foo", "src/a.rs")], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: file_b.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("foo", "src/b.rs")], + calls: vec![], + imports: vec![], + }); + + let resolver = Resolver::new(&index); + + let found = resolver.resolve("foo", None, &file_a); + assert!(found.is_some()); + assert_eq!(found.unwrap().file, file_a); + + let found = resolver.resolve("foo", None, &file_b); + assert!(found.is_some()); + assert_eq!(found.unwrap().file, file_b); + } + + #[test] + fn test_resolve_via_imports() { + let mut index = Index::new(); + let main_file = PathBuf::from("src/main.rs"); + let helper_file = PathBuf::from("src/utils/helper.rs"); + + index.update(FileRecord { + path: helper_file.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("helper", "src/utils/helper.rs")], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: main_file.clone(), + mtime: 0, + size: 0, + definitions: vec![], + calls: vec![], + imports: vec![make_import("crate::utils::helper", "src/main.rs")], + }); + + let resolver = Resolver::new(&index); + let found = resolver.resolve("helper", None, &main_file); + + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "helper"); + } + + #[test] + fn test_resolve_falls_back_to_index() { + let mut index = Index::new(); + let main_file = PathBuf::from("src/main.rs"); + + index.update(FileRecord { + path: PathBuf::from("src/parse.rs"), + mtime: 0, + size: 0, + definitions: vec![make_def("parse", "src/parse.rs")], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: main_file.clone(), + mtime: 0, + size: 0, + definitions: vec![], + calls: vec![], + imports: vec![], + }); + + let resolver = Resolver::new(&index); + + // Should find via global index lookup + let found = resolver.resolve("parse", None, &main_file); + assert!(found.is_some()); + assert_eq!(found.unwrap().file, PathBuf::from("src/parse.rs")); + } + + #[test] + fn test_file_matches_pattern() { + assert!(file_matches_pattern( + Path::new("src/utils/helper.rs"), + "utils/helper.rs" + )); + assert!(file_matches_pattern( + Path::new("src/utils/helper.rs"), + "helper.rs" + )); + assert!(file_matches_pattern(Path::new("helper.rs"), "helper.rs")); + assert!(!file_matches_pattern( + Path::new("src/other.rs"), + "helper.rs" + )); + } + + #[test] + fn test_import_to_file_patterns_rust() { + let patterns = import_to_file_patterns("crate::utils::helper", "rs"); + assert!(patterns.iter().any(|p| p.contains("utils/helper.rs"))); + assert!(patterns.iter().any(|p| p.contains("utils/helper/mod.rs"))); + } + + #[test] + fn test_import_to_file_patterns_python() { + let patterns = import_to_file_patterns("mypackage.utils.helper", "py"); + assert!(patterns + .iter() + .any(|p| p.contains("mypackage/utils/helper.py"))); + } + + #[test] + fn test_import_to_file_patterns_js() { + let patterns = import_to_file_patterns("./components/Button", "ts"); + assert!(patterns.iter().any(|p| p.contains("components/Button.ts"))); + assert!(patterns + .iter() + .any(|p| p.contains("components/Button/index.ts"))); + } +} diff --git a/src/main.rs b/src/main.rs index 34294ea..11d04c4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -262,7 +262,10 @@ fn apply_repo_config(args: &mut Cli, repo_config: &RepoConfig) { } fn handle_code_command(args: &CodeArgs) -> Result<()> { - let root = args.root.canonicalize().unwrap_or_else(|_| args.root.clone()); + let root = args + .root + .canonicalize() + .unwrap_or_else(|_| args.root.clone()); let target = FunctionTarget::parse(&args.target)?; let mut index = load_index(&root)?.unwrap_or_else(Index::new); @@ -272,7 +275,7 @@ fn handle_code_command(args: &CodeArgs) -> Result<()> { save_index(&index, &root)?; } - let graph = CallGraph::build(&index); + let graph = CallGraph::build_with_options(&index, args.strict); let node_id = if let Some(ref file) = target.file { let file_path = root.join(file); @@ -288,16 +291,14 @@ fn handle_code_command(args: &CodeArgs) -> Result<()> { }; let Some(node_id) = node_id else { - bail!( - "function '{}' not found in index", - target.function - ); + bail!("function '{}' not found in index", target.function); }; let depth = args.depth.unwrap_or(1); - + let definitions = if args.callers { - graph.get_callers_to_depth(node_id, depth) + graph + .get_callers_to_depth(node_id, depth) .into_iter() .filter_map(|id| graph.get_node(id).map(|n| &n.definition)) .collect() @@ -416,7 +417,13 @@ fn index_directory(root: &Path, index: &mut Index) -> Result { } let (mtime, size) = file_fingerprint(path).ok()?; if index.is_stale(rel_path, mtime, size) { - Some((path.to_path_buf(), rel_path.to_path_buf(), ext.to_string(), mtime, size)) + Some(( + path.to_path_buf(), + rel_path.to_path_buf(), + ext.to_string(), + mtime, + size, + )) } else { None } diff --git a/tests/integration.rs b/tests/integration.rs index b0a64f3..f3d643e 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -66,6 +66,7 @@ mod call_graph_tests { size: 0, definitions: vec![make_def("caller", &file_a)], calls: vec![Call { + qualifier: None, callee: "callee".to_string(), caller: Some("caller".to_string()), span: make_span(), @@ -130,6 +131,7 @@ mod call_graph_tests { size: 0, definitions: vec![make_def("main", &file_main)], calls: vec![Call { + qualifier: None, callee: "helper".to_string(), caller: Some("main".to_string()), span: make_span(), @@ -168,6 +170,7 @@ mod call_graph_tests { size: 0, definitions: vec![make_def("entry", &file_a)], calls: vec![Call { + qualifier: None, callee: "middle".to_string(), caller: Some("entry".to_string()), span: make_span(), @@ -182,6 +185,7 @@ mod call_graph_tests { size: 0, definitions: vec![make_def("middle", &file_b)], calls: vec![Call { + qualifier: None, callee: "leaf".to_string(), caller: Some("middle".to_string()), span: make_span(), @@ -206,7 +210,10 @@ mod call_graph_tests { assert_eq!(transitive.len(), 2); - let names: HashSet<_> = transitive.iter().map(|n| n.definition.name.as_str()).collect(); + let names: HashSet<_> = transitive + .iter() + .map(|n| n.definition.name.as_str()) + .collect(); assert!(names.contains("middle")); assert!(names.contains("leaf")); @@ -232,6 +239,7 @@ mod call_graph_tests { size: 0, definitions: vec![make_def("caller", &file_a)], calls: vec![Call { + qualifier: None, callee: "nonexistent".to_string(), caller: Some("caller".to_string()), span: make_span(), @@ -369,7 +377,12 @@ def format_output(s): let extractor = Extractor::new("python").unwrap(); index_file(&mut index, &extractor, &dir.path().join("main.py"), main_py); - index_file(&mut index, &extractor, &dir.path().join("utils.py"), utils_py); + index_file( + &mut index, + &extractor, + &dir.path().join("utils.py"), + utils_py, + ); let graph = CallGraph::build(&index); @@ -421,7 +434,12 @@ function format(s: string): string { let extractor = Extractor::new("typescript").unwrap(); index_file(&mut index, &extractor, &dir.path().join("main.ts"), main_ts); - index_file(&mut index, &extractor, &dir.path().join("utils.ts"), utils_ts); + index_file( + &mut index, + &extractor, + &dir.path().join("utils.ts"), + utils_ts, + ); let graph = CallGraph::build(&index); From 7071834374abb8ba4b03cb967b8662da29ab37ad Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 14:17:36 -0800 Subject: [PATCH 29/35] feat: add LSP-based type resolution with --precise flag - add LspClient for LSP server communication (stdin/stdout JSON-RPC) - add LspResolver for go-to-definition based call resolution - support auto-download for rust-analyzer and clangd binaries - support npm package installation (bun/npm) for pyright, tsserver - support go install for gopls with GOBIN set to lsp directory - install all LSP binaries locally in ~/.local/share/glimpse/lsp/ - add --precise flag to use LSP mode instead of heuristic resolution - add CallGraph::build_precise() integrating LSP with fallback - add comprehensive integration tests for all supported languages - update registry.toml with lsp configs (npm_package, go_package) --- Cargo.lock | 367 +++++++++++++++- Cargo.toml | 5 +- registry.toml | 63 +++ src/cli.rs | 4 + src/code/grammar.rs | 23 + src/code/graph.rs | 66 +++ src/code/lsp.rs | 891 ++++++++++++++++++++++++++++++++++++++ src/code/mod.rs | 1 + src/main.rs | 2 +- tests/lsp_integration.rs | 915 +++++++++++++++++++++++++++++++++++++++ 10 files changed, 2328 insertions(+), 9 deletions(-) create mode 100644 src/code/lsp.rs create mode 100644 tests/lsp_integration.rs diff --git a/Cargo.lock b/Cargo.lock index 2c371fc..1cf5629 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,17 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "ahash" version = "0.8.11" @@ -101,6 +112,15 @@ version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arboard" version = "3.4.1" @@ -217,6 +237,15 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "block2" version = "0.5.1" @@ -267,6 +296,25 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "cassowary" version = "0.3.0" @@ -305,6 +353,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + [[package]] name = "clap" version = "4.5.30" @@ -396,6 +454,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + [[package]] name = "core-foundation" version = "0.9.4" @@ -436,6 +500,30 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.4.2" @@ -495,6 +583,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "cssparser" version = "0.31.2" @@ -553,6 +651,12 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "deflate64" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26bf8fc351c5ed29b5c2f0cbbac1b209b74f60ecd62e675a998df72c49af5204" + [[package]] name = "deranged" version = "0.3.11" @@ -573,6 +677,17 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "derive_builder" version = "0.20.2" @@ -615,6 +730,17 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + [[package]] name = "dirs" version = "5.0.1" @@ -783,6 +909,15 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fluent-uri" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17c704e9dbe1ddd863da1e6ff3567795087b1eb201ce80d8fa81162e1516500d" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "fnv" version = "1.0.7" @@ -913,6 +1048,16 @@ dependencies = [ "byteorder", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "gethostname" version = "0.4.3" @@ -950,8 +1095,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi 0.13.3+wasi-0.2.2", + "wasm-bindgen", "windows-targets 0.52.6", ] @@ -988,12 +1135,14 @@ dependencies = [ "clap", "crossterm", "dirs", + "flate2", "git2", "glob", "grep", "ignore", "indicatif", "libloading", + "lsp-types", "mockito", "num-format", "once_cell", @@ -1014,6 +1163,7 @@ dependencies = [ "url", "walkdir", "which", + "zip", ] [[package]] @@ -1182,10 +1332,19 @@ dependencies = [ "rand 0.8.5", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "ureq", ] +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + [[package]] name = "html5ever" version = "0.26.0" @@ -1544,6 +1703,15 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + [[package]] name = "instability" version = "0.3.7" @@ -1735,9 +1903,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.25" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lopdf" @@ -1765,6 +1933,40 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "lsp-types" +version = "0.97.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53353550a17c04ac46c585feb189c2db82154fc84b79c7a66c96c2c644f66071" +dependencies = [ + "bitflags 1.3.2", + "fluent-uri", + "serde", + "serde_json", + "serde_repr", +] + +[[package]] +name = "lzma-rs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "297e814c836ae64db86b36cf2a557ba54368d03f6afcd7d947c266692f71115e" +dependencies = [ + "byteorder", + "crc", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "mac" version = "0.1.1" @@ -2212,6 +2414,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest", + "hmac", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -2542,7 +2754,7 @@ checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom 0.2.15", "libredox", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -2834,6 +3046,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "serde_spanned" version = "0.6.8" @@ -2877,6 +3100,17 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -3151,7 +3385,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl 2.0.17", ] [[package]] @@ -3165,6 +3408,17 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "tiff" version = "0.9.1" @@ -3260,7 +3514,7 @@ dependencies = [ "serde", "serde_json", "spm_precompiled", - "thiserror", + "thiserror 1.0.69", "unicode-normalization-alignments", "unicode-segmentation", "unicode_categories", @@ -3409,6 +3663,12 @@ version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49d64318d8311fc2668e48b63969f4343e0a85c4a109aa8460d6672e364b8bd1" +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + [[package]] name = "unicode-ident" version = "1.0.16" @@ -3989,7 +4249,7 @@ dependencies = [ "nix", "os_pipe", "tempfile", - "thiserror", + "thiserror 1.0.69", "tree_magic_mini", "wayland-backend", "wayland-client", @@ -4026,6 +4286,15 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec107c4503ea0b4a98ef47356329af139c0a4f7750e621cf2973cd3385ebcb3d" +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yoke" version = "0.7.5" @@ -4097,6 +4366,20 @@ name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] [[package]] name = "zerovec" @@ -4119,3 +4402,73 @@ dependencies = [ "quote", "syn 2.0.98", ] + +[[package]] +name = "zip" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50" +dependencies = [ + "aes", + "arbitrary", + "bzip2", + "constant_time_eq", + "crc32fast", + "crossbeam-utils", + "deflate64", + "displaydoc", + "flate2", + "getrandom 0.3.1", + "hmac", + "indexmap", + "lzma-rs", + "memchr", + "pbkdf2", + "sha1", + "thiserror 2.0.17", + "time", + "xz2", + "zeroize", + "zopfli", + "zstd", +] + +[[package]] +name = "zopfli" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edfc5ee405f504cd4984ecc6f14d02d55cfda60fa4b689434ef4102aae150cd7" +dependencies = [ + "bumpalo", + "crc32fast", + "log", + "simd-adler32", +] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index 3dc82bb..e5250a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,10 @@ toml = "0.8.19" tree-sitter = "0.25" url = "2.5" walkdir = "2.5.0" +lsp-types = "0.97" +which = "8.0" +flate2 = "1.0" +zip = "2.2" [build-dependencies] serde = { version = "1.0.217", features = ["derive"] } @@ -53,4 +57,3 @@ serde_yaml = "0.9" [dev-dependencies] tempfile = "3.14.0" mockito = "1.4" -which = "8.0.0" diff --git a/registry.toml b/registry.toml index 7a1bc01..fda9b2d 100644 --- a/registry.toml +++ b/registry.toml @@ -46,6 +46,11 @@ import_query = """ ]) """ +[language.lsp] +binary = "gopls" +args = ["serve"] +go_package = "golang.org/x/tools/gopls@latest" + [[language]] name = "zig" extensions = ["zig"] @@ -119,6 +124,20 @@ import_query = """ ]) @include """ +[language.lsp] +binary = "clangd" +args = [] +version = "19.1.0" +url_template = "https://github.com/clangd/clangd/releases/download/{version}/clangd-{target}.zip" +archive = "zip" +binary_path = "clangd_{version}/bin/clangd" + +[language.lsp.targets] +"x86_64-unknown-linux-gnu" = "linux-x86_64" +"x86_64-apple-darwin" = "mac-x86_64" +"aarch64-apple-darwin" = "mac-arm64" +"x86_64-pc-windows-msvc" = "windows-x86_64" + [[language]] name = "cpp" extensions = ["cpp", "cc", "cxx", "hpp", "hxx"] @@ -203,6 +222,20 @@ import_query = """ ]) @include """ +[language.lsp] +binary = "clangd" +args = [] +version = "19.1.0" +url_template = "https://github.com/clangd/clangd/releases/download/{version}/clangd-{target}.zip" +archive = "zip" +binary_path = "clangd_{version}/bin/clangd" + +[language.lsp.targets] +"x86_64-unknown-linux-gnu" = "linux-x86_64" +"x86_64-apple-darwin" = "mac-x86_64" +"aarch64-apple-darwin" = "mac-arm64" +"x86_64-pc-windows-msvc" = "windows-x86_64" + [[language]] name = "bash" extensions = ["sh", "bash"] @@ -287,6 +320,11 @@ import_query = """ (wildcard_import)? @wildcard) @import """ +[language.lsp] +binary = "pyright-langserver" +args = ["--stdio"] +npm_package = "pyright" + [[language]] name = "typescript" extensions = ["ts", "mts", "cts"] @@ -342,6 +380,11 @@ import_query = """ source: (string (string_fragment) @source)) @import """ +[language.lsp] +binary = "typescript-language-server" +args = ["--stdio"] +npm_package = "typescript-language-server typescript" + [[language]] name = "rust" extensions = ["rs"] @@ -393,6 +436,21 @@ import_query = """ name: (identifier) @mod_name) @mod_decl """ +[language.lsp] +binary = "rust-analyzer" +args = [] +version = "2024-12-23" +url_template = "https://github.com/rust-lang/rust-analyzer/releases/download/{version}/rust-analyzer-{target}.gz" +archive = "gz" + +[language.lsp.targets] +"x86_64-unknown-linux-gnu" = "x86_64-unknown-linux-gnu" +"x86_64-unknown-linux-musl" = "x86_64-unknown-linux-musl" +"aarch64-unknown-linux-gnu" = "aarch64-unknown-linux-gnu" +"x86_64-apple-darwin" = "x86_64-apple-darwin" +"aarch64-apple-darwin" = "aarch64-apple-darwin" +"x86_64-pc-windows-msvc" = "x86_64-pc-windows-msvc" + [[language]] name = "javascript" extensions = ["js", "mjs", "cjs"] @@ -452,6 +510,11 @@ import_query = """ (#eq? @_require "require")) @require """ +[language.lsp] +binary = "typescript-language-server" +args = ["--stdio"] +npm_package = "typescript-language-server typescript" + [[language]] name = "java" extensions = ["java"] diff --git a/src/cli.rs b/src/cli.rs index e2e4d10..182c7a9 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -84,6 +84,10 @@ pub struct CodeArgs { /// Strict mode: only resolve calls via imports (no global name matching) #[arg(long)] pub strict: bool, + + /// Precise mode: use LSP for type-aware resolution (slower but more accurate) + #[arg(long)] + pub precise: bool, } #[derive(Parser, Debug, Clone)] diff --git a/src/code/grammar.rs b/src/code/grammar.rs index f336bb3..085175c 100644 --- a/src/code/grammar.rs +++ b/src/code/grammar.rs @@ -19,6 +19,21 @@ static LOADED_LANGUAGES: Lazy>> = Lazy::new(|| Mutex::new(HashMap::new())); static LOADED_LIBRARIES: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); +#[derive(Debug, Clone, Deserialize)] +pub struct LspConfig { + pub binary: String, + #[serde(default)] + pub args: Vec, + pub version: Option, + pub url_template: Option, + pub archive: Option, + pub binary_path: Option, + #[serde(default)] + pub targets: std::collections::HashMap, + pub npm_package: Option, + pub go_package: Option, +} + #[derive(Debug, Clone, Deserialize)] pub struct LanguageEntry { pub name: String, @@ -30,6 +45,7 @@ pub struct LanguageEntry { pub definition_query: String, pub call_query: String, pub import_query: String, + pub lsp: Option, } #[derive(Debug, Deserialize)] @@ -249,6 +265,13 @@ pub fn cache_dir() -> PathBuf { .join("grammars") } +pub fn lsp_dir() -> PathBuf { + dirs::data_local_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join("glimpse") + .join("lsp") +} + pub fn load_language(name: &str) -> Result { { let cache = LOADED_LANGUAGES.lock().unwrap(); diff --git a/src/code/graph.rs b/src/code/graph.rs index 21ae7f9..3daaa60 100644 --- a/src/code/graph.rs +++ b/src/code/graph.rs @@ -1,7 +1,10 @@ use std::collections::{HashMap, HashSet, VecDeque}; use std::path::Path; +use indicatif::{ProgressBar, ProgressStyle}; + use super::index::{Definition, Index}; +use super::lsp::LspResolver; use super::resolve::Resolver; pub type NodeId = usize; @@ -69,6 +72,69 @@ impl CallGraph { graph } + pub fn build_with_lsp(index: &Index, root: &Path) -> Self { + let mut lsp_resolver = LspResolver::new(root); + let heuristic_resolver = Resolver::with_strict(index, false); + let mut graph = CallGraph::new(); + + for def in index.definitions() { + graph.add_definition(def.clone()); + } + + let calls: Vec<_> = index.calls().collect(); + let total = calls.len(); + + if total == 0 { + return graph; + } + + let pb = ProgressBar::new(total as u64); + pb.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} resolving calls") + .expect("valid template") + .progress_chars("#>-"), + ); + + for call in &calls { + pb.inc(1); + + let caller_id = call + .caller + .as_ref() + .and_then(|name| graph.find_node_by_file_and_name(&call.file, name)); + + let Some(caller_id) = caller_id else { + continue; + }; + + let callee_def = lsp_resolver + .resolve_call(call, index) + .or_else(|| heuristic_resolver.resolve(&call.callee, call.qualifier.as_deref(), &call.file)); + + let callee_id = if let Some(def) = callee_def { + graph + .find_node_by_file_and_name(&def.file, &def.name) + .unwrap_or_else(|| graph.add_definition(def)) + } else { + continue; + }; + + graph.add_edge(caller_id, callee_id); + } + + pb.finish_and_clear(); + graph + } + + pub fn build_precise(index: &Index, root: &Path, strict: bool, precise: bool) -> Self { + if precise { + Self::build_with_lsp(index, root) + } else { + Self::build_with_options(index, strict) + } + } + fn add_definition(&mut self, definition: Definition) -> NodeId { let file_key = definition.file.to_string_lossy().to_string(); let composite_key = (file_key, definition.name.clone()); diff --git a/src/code/lsp.rs b/src/code/lsp.rs new file mode 100644 index 0000000..f1210bb --- /dev/null +++ b/src/code/lsp.rs @@ -0,0 +1,891 @@ +use std::collections::HashMap; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader, BufWriter, Read, Write}; +use std::path::{Path, PathBuf}; +use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; +use std::sync::atomic::{AtomicI32, Ordering}; + +use anyhow::{bail, Context, Result}; +use flate2::read::GzDecoder; +use lsp_types::{ + ClientCapabilities, DidOpenTextDocumentParams, GotoDefinitionParams, GotoDefinitionResponse, + InitializeParams, InitializedParams, Position, TextDocumentIdentifier, + TextDocumentPositionParams, Uri, WorkspaceFolder, +}; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; + +use super::grammar::{lsp_dir, LspConfig, Registry}; +use super::index::{Call, Definition, Index}; + +fn current_target() -> &'static str { + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + { + "x86_64-unknown-linux-gnu" + } + #[cfg(all(target_os = "linux", target_arch = "aarch64"))] + { + "aarch64-unknown-linux-gnu" + } + #[cfg(all(target_os = "macos", target_arch = "x86_64"))] + { + "x86_64-apple-darwin" + } + #[cfg(all(target_os = "macos", target_arch = "aarch64"))] + { + "aarch64-apple-darwin" + } + #[cfg(all(target_os = "windows", target_arch = "x86_64"))] + { + "x86_64-pc-windows-msvc" + } + #[cfg(not(any( + all(target_os = "linux", target_arch = "x86_64"), + all(target_os = "linux", target_arch = "aarch64"), + all(target_os = "macos", target_arch = "x86_64"), + all(target_os = "macos", target_arch = "aarch64"), + all(target_os = "windows", target_arch = "x86_64"), + )))] + { + "unknown" + } +} + +fn binary_extension() -> &'static str { + if cfg!(target_os = "windows") { + ".exe" + } else { + "" + } +} + +fn lsp_binary_path(lsp: &LspConfig) -> PathBuf { + let dir = lsp_dir(); + dir.join(format!("{}{}", lsp.binary, binary_extension())) +} + +fn path_to_uri(path: &Path) -> Result { + let url = url::Url::from_file_path(path) + .map_err(|_| anyhow::anyhow!("invalid path: {}", path.display()))?; + url.as_str().parse().context("failed to convert URL to URI") +} + +fn uri_to_path(uri: &Uri) -> Option { + let url = url::Url::parse(uri.as_str()).ok()?; + url.to_file_path().ok() +} + +fn download_and_extract(lsp: &LspConfig) -> Result { + let Some(ref url_template) = lsp.url_template else { + bail!("no download URL configured for {}", lsp.binary); + }; + + let Some(ref version) = lsp.version else { + bail!("no version configured for {}", lsp.binary); + }; + + let target = current_target(); + let Some(target_name) = lsp.targets.get(target) else { + bail!( + "no pre-built binary available for {} on {}", + lsp.binary, + target + ); + }; + + let url = url_template + .replace("{version}", version) + .replace("{target}", target_name); + + eprintln!("Downloading {} from {}...", lsp.binary, url); + + let dir = lsp_dir(); + fs::create_dir_all(&dir)?; + + let response = + reqwest::blocking::get(&url).with_context(|| format!("failed to download {}", url))?; + + if !response.status().is_success() { + bail!("download failed with status: {}", response.status()); + } + + let bytes = response.bytes()?; + let archive_type = lsp.archive.as_deref().unwrap_or("gz"); + + let final_path = lsp_binary_path(lsp); + + match archive_type { + "gz" => { + let mut decoder = GzDecoder::new(&bytes[..]); + let mut output = File::create(&final_path)?; + std::io::copy(&mut decoder, &mut output)?; + } + "zip" => { + let cursor = std::io::Cursor::new(&bytes); + let mut archive = zip::ZipArchive::new(cursor)?; + + let binary_path = if let Some(ref path) = lsp.binary_path { + path.replace("{version}", version) + } else { + lsp.binary.clone() + }; + + let mut found = false; + for i in 0..archive.len() { + let mut file = archive.by_index(i)?; + let name = file.name().to_string(); + + if name.ends_with(&binary_path) + || name.ends_with(&format!("{}{}", binary_path, binary_extension())) + { + let mut output = File::create(&final_path)?; + std::io::copy(&mut file, &mut output)?; + found = true; + break; + } + } + + if !found { + bail!("binary {} not found in archive", binary_path); + } + } + other => bail!("unsupported archive type: {}", other), + } + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(&final_path)?.permissions(); + perms.set_mode(0o755); + fs::set_permissions(&final_path, perms)?; + } + + eprintln!("Installed {} to {}", lsp.binary, final_path.display()); + Ok(final_path) +} + +fn install_npm_package(lsp: &LspConfig) -> Result { + let Some(ref package) = lsp.npm_package else { + bail!("no npm package configured for {}", lsp.binary); + }; + + let (pkg_manager, pkg_manager_path) = if let Ok(bun) = which::which("bun") { + ("bun", bun) + } else if let Ok(npm) = which::which("npm") { + ("npm", npm) + } else { + bail!("neither bun nor npm found. Install one of them or install the LSP manually"); + }; + + let pkg_dir = lsp_dir().join(&lsp.binary); + fs::create_dir_all(&pkg_dir)?; + + eprintln!("Installing {} via {} (local)...", package, pkg_manager); + + let init_status = Command::new(&pkg_manager_path) + .args(["init", "--yes"]) + .current_dir(&pkg_dir) + .status() + .with_context(|| format!("failed to run {} init", pkg_manager))?; + + if !init_status.success() { + bail!("{} init failed", pkg_manager); + } + + let packages: Vec<&str> = package.split_whitespace().collect(); + let mut install_args = vec!["install"]; + install_args.extend(packages.iter()); + + let install_status = Command::new(&pkg_manager_path) + .args(&install_args) + .current_dir(&pkg_dir) + .status() + .with_context(|| format!("failed to run {} install", pkg_manager))?; + + if !install_status.success() { + bail!("{} install failed for {}", pkg_manager, package); + } + + let bin_path = pkg_dir.join("node_modules").join(".bin").join(&lsp.binary); + if !bin_path.exists() { + bail!( + "installed {} but binary not found at {}", + package, + bin_path.display() + ); + } + + let wrapper_path = lsp_binary_path(lsp); + create_wrapper_script(&wrapper_path, &bin_path)?; + + eprintln!("Installed {} to {}", lsp.binary, wrapper_path.display()); + Ok(wrapper_path) +} + +fn create_wrapper_script(wrapper_path: &Path, target_path: &Path) -> Result<()> { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + + let script = format!( + "#!/bin/sh\nexec \"{}\" \"$@\"\n", + target_path.display() + ); + fs::write(wrapper_path, script)?; + + let mut perms = fs::metadata(wrapper_path)?.permissions(); + perms.set_mode(0o755); + fs::set_permissions(wrapper_path, perms)?; + } + + #[cfg(windows)] + { + let script = format!( + "@echo off\r\n\"{}\" %*\r\n", + target_path.display() + ); + let wrapper_cmd = wrapper_path.with_extension("cmd"); + fs::write(&wrapper_cmd, script)?; + } + + Ok(()) +} + +fn install_go_package(lsp: &LspConfig) -> Result { + let Some(ref package) = lsp.go_package else { + bail!("no go package configured for {}", lsp.binary); + }; + + let go_path = + which::which("go").context("go not found. Install Go or install the LSP manually")?; + + let install_dir = lsp_dir(); + fs::create_dir_all(&install_dir)?; + + eprintln!("Installing {} via go install...", package); + + let status = Command::new(&go_path) + .args(["install", package]) + .env("GOBIN", &install_dir) + .status() + .context("failed to run go install")?; + + if !status.success() { + bail!("go install failed for {}", package); + } + + let binary_path = install_dir.join(&lsp.binary); + if binary_path.exists() { + eprintln!("Installed {} to {}", lsp.binary, binary_path.display()); + return Ok(binary_path); + } + + bail!( + "go install succeeded but binary {} not found at {}", + lsp.binary, + binary_path.display() + ); +} + +fn find_lsp_binary(lsp: &LspConfig) -> Result { + let local_path = lsp_binary_path(lsp); + if local_path.exists() { + return Ok(local_path); + } + + if let Ok(system_path) = which::which(&lsp.binary) { + return Ok(system_path); + } + + if lsp.url_template.is_some() { + return download_and_extract(lsp); + } + + if lsp.npm_package.is_some() { + return install_npm_package(lsp); + } + + if lsp.go_package.is_some() { + return install_go_package(lsp); + } + + bail!( + "LSP server '{}' not found. Install it manually.", + lsp.binary + ); +} + +#[derive(Debug)] +struct LspClient { + process: Child, + stdin: BufWriter, + stdout: BufReader, + request_id: AtomicI32, + root_uri: Uri, + opened_files: HashMap, +} + +#[derive(Debug, Serialize, Deserialize)] +struct LspMessage { + jsonrpc: String, + #[serde(skip_serializing_if = "Option::is_none")] + id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + method: Option, + #[serde(skip_serializing_if = "Option::is_none")] + params: Option, + #[serde(skip_serializing_if = "Option::is_none")] + result: Option, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option, +} + +impl LspClient { + fn new(lsp: &LspConfig, root: &Path) -> Result { + let binary_path = find_lsp_binary(lsp)?; + + let mut process = Command::new(&binary_path) + .args(&lsp.args) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn() + .with_context(|| format!("failed to spawn {}", binary_path.display()))?; + + let stdin = process.stdin.take().context("failed to get stdin")?; + let stdout = process.stdout.take().context("failed to get stdout")?; + + let root_uri = path_to_uri(&root.canonicalize().unwrap_or_else(|_| root.to_path_buf()))?; + + Ok(Self { + process, + stdin: BufWriter::new(stdin), + stdout: BufReader::new(stdout), + request_id: AtomicI32::new(1), + root_uri, + opened_files: HashMap::new(), + }) + } + + fn next_id(&self) -> i32 { + self.request_id.fetch_add(1, Ordering::SeqCst) + } + + fn send_message(&mut self, msg: &LspMessage) -> Result<()> { + let content = serde_json::to_string(msg)?; + let header = format!("Content-Length: {}\r\n\r\n", content.len()); + + self.stdin.write_all(header.as_bytes())?; + self.stdin.write_all(content.as_bytes())?; + self.stdin.flush()?; + + Ok(()) + } + + fn read_message(&mut self) -> Result { + let mut content_length: Option = None; + let mut header_line = String::new(); + + loop { + header_line.clear(); + self.stdout.read_line(&mut header_line)?; + + if header_line == "\r\n" || header_line.is_empty() { + break; + } + + if let Some(len_str) = header_line.strip_prefix("Content-Length: ") { + content_length = Some(len_str.trim().parse()?); + } + } + + let len = content_length.context("missing Content-Length header")?; + let mut body = vec![0u8; len]; + self.stdout.read_exact(&mut body)?; + + let msg: LspMessage = serde_json::from_slice(&body)?; + Ok(msg) + } + + fn send_request(&mut self, method: &str, params: Value) -> Result { + let id = self.next_id(); + let msg = LspMessage { + jsonrpc: "2.0".to_string(), + id: Some(id), + method: Some(method.to_string()), + params: Some(params), + result: None, + error: None, + }; + + self.send_message(&msg)?; + + loop { + let response = self.read_message()?; + + if response.id == Some(id) { + if let Some(error) = response.error { + bail!("LSP error: {}", error); + } + return Ok(response.result.unwrap_or(Value::Null)); + } + } + } + + fn send_notification(&mut self, method: &str, params: Value) -> Result<()> { + let msg = LspMessage { + jsonrpc: "2.0".to_string(), + id: None, + method: Some(method.to_string()), + params: Some(params), + result: None, + error: None, + }; + + self.send_message(&msg) + } + + fn wait_for_ready(&mut self, path: &Path, max_attempts: u32) -> Result { + use std::thread; + use std::time::Duration; + + let uri = path_to_uri(path)?; + + // First wait for basic syntax analysis (documentSymbol) + for _ in 0..10 { + let params = lsp_types::DocumentSymbolParams { + text_document: TextDocumentIdentifier { uri: uri.clone() }, + work_done_progress_params: Default::default(), + partial_result_params: Default::default(), + }; + + match self.send_request("textDocument/documentSymbol", serde_json::to_value(params)?) { + Ok(Value::Array(arr)) if !arr.is_empty() => break, + _ => thread::sleep(Duration::from_millis(200)), + } + } + + // Then wait for semantic analysis (hover on a known symbol) + // This indicates rust-analyzer has finished loading the project + for attempt in 0..max_attempts { + let hover_params = json!({ + "textDocument": { "uri": uri.as_str() }, + "position": { "line": 0, "character": 4 } // "mod" keyword + }); + + match self.send_request("textDocument/hover", hover_params) { + Ok(result) if !result.is_null() => return Ok(true), + _ => {} + } + + if attempt < max_attempts - 1 { + thread::sleep(Duration::from_millis(500)); + } + } + + Ok(false) + } + + fn initialize(&mut self) -> Result<()> { + let text_document_caps = lsp_types::TextDocumentClientCapabilities { + definition: Some(lsp_types::GotoCapability { + dynamic_registration: Some(false), + link_support: Some(true), + }), + synchronization: Some(lsp_types::TextDocumentSyncClientCapabilities { + dynamic_registration: Some(false), + will_save: Some(false), + will_save_wait_until: Some(false), + did_save: Some(false), + }), + ..Default::default() + }; + + let capabilities = ClientCapabilities { + text_document: Some(text_document_caps), + ..Default::default() + }; + + let params = InitializeParams { + root_uri: Some(self.root_uri.clone()), + capabilities, + workspace_folders: Some(vec![WorkspaceFolder { + uri: self.root_uri.clone(), + name: "root".to_string(), + }]), + ..Default::default() + }; + + self.send_request("initialize", serde_json::to_value(params)?)?; + self.send_notification("initialized", serde_json::to_value(InitializedParams {})?)?; + + Ok(()) + } + + fn open_file(&mut self, path: &Path, content: &str, language_id: &str) -> Result<()> { + if self.opened_files.contains_key(path) { + return Ok(()); + } + + let uri = path_to_uri(path)?; + + let version = 1; + self.opened_files.insert(path.to_path_buf(), version); + + let params = DidOpenTextDocumentParams { + text_document: lsp_types::TextDocumentItem { + uri, + language_id: language_id.to_string(), + version, + text: content.to_string(), + }, + }; + + self.send_notification("textDocument/didOpen", serde_json::to_value(params)?) + } + + fn goto_definition( + &mut self, + path: &Path, + line: u32, + character: u32, + ) -> Result> { + let uri = path_to_uri(path)?; + + let params = GotoDefinitionParams { + text_document_position_params: TextDocumentPositionParams { + text_document: TextDocumentIdentifier { uri }, + position: Position { line, character }, + }, + work_done_progress_params: Default::default(), + partial_result_params: Default::default(), + }; + + let result = self.send_request("textDocument/definition", serde_json::to_value(params)?)?; + + if result.is_null() { + return Ok(None); + } + + let response: GotoDefinitionResponse = serde_json::from_value(result)?; + + match response { + GotoDefinitionResponse::Scalar(loc) => Ok(Some(loc)), + GotoDefinitionResponse::Array(locs) => Ok(locs.into_iter().next()), + GotoDefinitionResponse::Link(links) => { + Ok(links.into_iter().next().map(|l| lsp_types::Location { + uri: l.target_uri, + range: l.target_selection_range, + })) + } + } + } + + fn shutdown(&mut self) -> Result<()> { + self.send_request("shutdown", json!(null))?; + self.send_notification("exit", json!(null))?; + let _ = self.process.wait(); + Ok(()) + } +} + +impl Drop for LspClient { + fn drop(&mut self) { + let _ = self.shutdown(); + } +} + +pub struct LspResolver { + clients: HashMap, + root: PathBuf, + file_cache: HashMap, +} + +impl LspResolver { + pub fn new(root: &Path) -> Self { + Self { + clients: HashMap::new(), + root: root.to_path_buf(), + file_cache: HashMap::new(), + } + } + + fn get_or_create_client(&mut self, ext: &str) -> Result<&mut LspClient> { + let registry = Registry::global(); + let lang_entry = registry + .get_by_extension(ext) + .with_context(|| format!("no language for extension: {}", ext))?; + + let lsp_config = lang_entry + .lsp + .as_ref() + .with_context(|| format!("no LSP config for language: {}", lang_entry.name))?; + + let key = lsp_config.binary.clone(); + + if !self.clients.contains_key(&key) { + let mut client = LspClient::new(lsp_config, &self.root)?; + client.initialize()?; + self.clients.insert(key.clone(), client); + } + + Ok(self.clients.get_mut(&key).unwrap()) + } + + fn read_file(&mut self, path: &Path) -> Result { + if let Some(content) = self.file_cache.get(path) { + return Ok(content.clone()); + } + + let content = fs::read_to_string(path) + .with_context(|| format!("failed to read {}", path.display()))?; + + self.file_cache.insert(path.to_path_buf(), content.clone()); + Ok(content) + } + + fn language_id_for_ext(ext: &str) -> &'static str { + match ext { + "rs" => "rust", + "ts" | "tsx" | "mts" | "cts" => "typescript", + "js" | "jsx" | "mjs" | "cjs" => "javascript", + "py" | "pyi" => "python", + "go" => "go", + "c" | "h" => "c", + "cpp" | "cc" | "cxx" | "hpp" | "hxx" => "cpp", + "java" => "java", + _ => "text", + } + } + + pub fn resolve_call(&mut self, call: &Call, index: &Index) -> Option { + let ext = call.file.extension()?.to_str()?.to_string(); + let abs_path = self.root.join(&call.file); + let language_id = Self::language_id_for_ext(&ext); + let callee = call.callee.clone(); + let start_line_idx = call.span.start_line.saturating_sub(1); + + let content = self.read_file(&abs_path).ok()?; + + let lines: Vec<&str> = content.lines().collect(); + if start_line_idx >= lines.len() { + return None; + } + + let line_content = lines[start_line_idx]; + let col = line_content.find(&callee).unwrap_or(0) as u32; + + let client = self.get_or_create_client(&ext).ok()?; + + if client.open_file(&abs_path, &content, language_id).is_err() { + return None; + } + + let location = client + .goto_definition(&abs_path, start_line_idx as u32, col) + .ok()??; + + let def_path = uri_to_path(&location.uri)?; + let root = self.root.clone(); + let rel_path = def_path.strip_prefix(&root).ok()?.to_path_buf(); + + let start_line = location.range.start.line as usize + 1; + let end_line = location.range.end.line as usize + 1; + + let record = index.get(&rel_path)?; + record + .definitions + .iter() + .find(|d| d.span.start_line <= start_line && d.span.end_line >= end_line) + .cloned() + } + + pub fn resolve_calls_batch( + &mut self, + calls: &[&Call], + index: &Index, + ) -> HashMap { + let mut results = HashMap::new(); + + for (i, call) in calls.iter().enumerate() { + if let Some(def) = self.resolve_call(call, index) { + results.insert(i, def); + } + } + + results + } +} + +#[derive(Debug, Clone)] +pub struct LspAvailability { + pub available: bool, + pub location: Option, + pub can_auto_install: bool, + pub install_method: Option, +} + +pub fn check_lsp_availability() -> HashMap { + let registry = Registry::global(); + let mut result = HashMap::new(); + + for lang in registry.languages() { + if let Some(ref lsp) = lang.lsp { + let local_path = lsp_binary_path(lsp); + let system_available = which::which(&lsp.binary).is_ok(); + let local_available = local_path.exists(); + let available = system_available || local_available; + + let location = if local_available { + Some(local_path.display().to_string()) + } else if system_available { + which::which(&lsp.binary) + .ok() + .map(|p| p.display().to_string()) + } else { + None + }; + + let (can_auto_install, install_method) = if lsp.url_template.is_some() { + (true, Some("download".to_string())) + } else if lsp.npm_package.is_some() { + let bun_available = which::which("bun").is_ok(); + let npm_available = which::which("npm").is_ok(); + if bun_available { + (true, Some("bun".to_string())) + } else if npm_available { + (true, Some("npm".to_string())) + } else { + (false, Some("npm/bun".to_string())) + } + } else if lsp.go_package.is_some() { + let go_available = which::which("go").is_ok(); + (go_available, Some("go".to_string())) + } else { + (false, None) + }; + + result.insert( + lang.name.clone(), + LspAvailability { + available, + location, + can_auto_install: can_auto_install && !available, + install_method, + }, + ); + } + } + + result +} + +pub fn ensure_lsp_for_extension(ext: &str) -> Result { + let registry = Registry::global(); + let lang_entry = registry + .get_by_extension(ext) + .with_context(|| format!("no language for extension: {}", ext))?; + + let lsp_config = lang_entry + .lsp + .as_ref() + .with_context(|| format!("no LSP config for language: {}", lang_entry.name))?; + + find_lsp_binary(lsp_config) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_current_target() { + let target = current_target(); + assert!(!target.is_empty()); + assert_ne!(target, "unknown"); + } + + #[test] + fn test_lsp_binary_path() { + let lsp = LspConfig { + binary: "rust-analyzer".to_string(), + args: vec![], + version: None, + url_template: None, + archive: None, + binary_path: None, + targets: HashMap::new(), + npm_package: None, + go_package: None, + }; + + let path = lsp_binary_path(&lsp); + assert!(path.to_string_lossy().contains("rust-analyzer")); + assert!(path.to_string_lossy().contains("lsp")); + } + + #[test] + fn test_language_id_for_ext() { + assert_eq!(LspResolver::language_id_for_ext("rs"), "rust"); + assert_eq!(LspResolver::language_id_for_ext("ts"), "typescript"); + assert_eq!(LspResolver::language_id_for_ext("py"), "python"); + assert_eq!(LspResolver::language_id_for_ext("go"), "go"); + assert_eq!(LspResolver::language_id_for_ext("c"), "c"); + assert_eq!(LspResolver::language_id_for_ext("cpp"), "cpp"); + } + + #[test] + fn test_check_lsp_availability() { + let availability = check_lsp_availability(); + assert!(!availability.is_empty()); + } +} + +#[cfg(test)] +mod integration_tests { + use super::*; + use std::env; + use std::thread; + use std::time::Duration; + + #[test] + #[ignore] // Run with: cargo test --release -- --ignored test_lsp_client_rust + fn test_lsp_client_rust() { + let root = env::current_dir().expect("failed to get current dir"); + let registry = Registry::global(); + let rust_entry = registry.get("rust").expect("rust not in registry"); + let lsp_config = rust_entry.lsp.as_ref().expect("rust has no LSP config"); + + let mut client = LspClient::new(lsp_config, &root).expect("failed to create LSP client"); + client.initialize().expect("failed to initialize LSP"); + + let test_file = root.join("src/main.rs"); + let content = fs::read_to_string(&test_file).expect("failed to read test file"); + + client + .open_file(&test_file, &content, "rust") + .expect("failed to open file"); + + client + .wait_for_ready(&test_file, 30) + .expect("wait_for_ready failed"); + + // Line 61: ".filter(|path| is_url_or_git(path))" + let line = content.lines().nth(60).unwrap(); + let col = line.find("is_url_or_git").unwrap_or(0); + + // Retry a few times in case of "content modified" errors + for _ in 0..5 { + match client.goto_definition(&test_file, 60, col as u32) { + Ok(Some(loc)) => { + let path = uri_to_path(&loc.uri).expect("invalid uri"); + assert!(path.ends_with("main.rs")); + assert_eq!(loc.range.start.line, 25); // fn is_url_or_git definition + return; + } + Ok(None) | Err(_) => thread::sleep(Duration::from_secs(2)), + } + } + panic!("Failed to resolve definition after all attempts"); + } +} diff --git a/src/code/mod.rs b/src/code/mod.rs index 3bc6f18..702c16d 100644 --- a/src/code/mod.rs +++ b/src/code/mod.rs @@ -2,4 +2,5 @@ pub mod extract; pub mod grammar; pub mod graph; pub mod index; +pub mod lsp; pub mod resolve; diff --git a/src/main.rs b/src/main.rs index 11d04c4..d54f2a3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -275,7 +275,7 @@ fn handle_code_command(args: &CodeArgs) -> Result<()> { save_index(&index, &root)?; } - let graph = CallGraph::build_with_options(&index, args.strict); + let graph = CallGraph::build_precise(&index, &root, args.strict, args.precise); let node_id = if let Some(ref file) = target.file { let file_path = root.join(file); diff --git a/tests/lsp_integration.rs b/tests/lsp_integration.rs new file mode 100644 index 0000000..51c226c --- /dev/null +++ b/tests/lsp_integration.rs @@ -0,0 +1,915 @@ +use std::fs; +use std::path::{Path, PathBuf}; +use std::thread; +use std::time::Duration; + +use glimpse::code::extract::Extractor; +use glimpse::code::index::{file_fingerprint, Call, FileRecord, Index}; +use glimpse::code::lsp::LspResolver; +use tempfile::TempDir; +use tree_sitter::Parser; + +fn index_file(index: &mut Index, extractor: &Extractor, path: &Path, source: &str) { + let mut parser = Parser::new(); + parser.set_language(extractor.language()).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + let rel_path = path.file_name().map(PathBuf::from).unwrap_or(path.to_path_buf()); + let (mtime, size) = file_fingerprint(path).unwrap_or((0, source.len() as u64)); + + let record = FileRecord { + path: rel_path, + mtime, + size, + definitions: extractor.extract_definitions(&tree, source.as_bytes(), path), + calls: extractor.extract_calls(&tree, source.as_bytes(), path), + imports: extractor.extract_imports(&tree, source.as_bytes(), path), + }; + + index.update(record); +} + +fn lsp_available(binary: &str) -> bool { + which::which(binary).is_ok() || { + let lsp_dir = dirs::data_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join("glimpse") + .join("lsp"); + lsp_dir.join(binary).exists() + } +} + +fn wait_for_lsp_ready(resolver: &mut LspResolver, calls: &[Call], index: &Index) { + for _ in 0..30 { + if let Some(call) = calls.first() { + if resolver.resolve_call(call, index).is_some() { + return; + } + } + thread::sleep(Duration::from_millis(500)); + } +} + +fn collect_calls(index: &Index) -> Vec { + index.calls().cloned().collect() +} + +mod rust_lsp { + use super::*; + + fn rust_analyzer_available() -> bool { + lsp_available("rust-analyzer") + } + + #[test] + #[ignore] + fn test_rust_same_file_definition() { + if !rust_analyzer_available() { + eprintln!("Skipping: rust-analyzer not available"); + return; + } + + let dir = TempDir::new().unwrap(); + let src = dir.path().join("src"); + fs::create_dir_all(&src).unwrap(); + + let main_rs = r#"fn main() { + helper(); +} + +fn helper() { + println!("hello"); +} +"#; + + let cargo_toml = r#"[package] +name = "test_project" +version = "0.1.0" +edition = "2021" +"#; + + fs::write(src.join("main.rs"), main_rs).unwrap(); + fs::write(dir.path().join("Cargo.toml"), cargo_toml).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("rust").unwrap(); + + let main_rs_path = src.join("main.rs"); + let rel_path = main_rs_path.strip_prefix(dir.path()).unwrap(); + + let mut parser = tree_sitter::Parser::new(); + parser.set_language(extractor.language()).unwrap(); + let tree = parser.parse(main_rs, None).unwrap(); + + let record = FileRecord { + path: rel_path.to_path_buf(), + mtime: 0, + size: main_rs.len() as u64, + definitions: extractor.extract_definitions(&tree, main_rs.as_bytes(), rel_path), + calls: extractor.extract_calls(&tree, main_rs.as_bytes(), rel_path), + imports: extractor.extract_imports(&tree, main_rs.as_bytes(), rel_path), + }; + + eprintln!("Index record path: {:?}", record.path); + eprintln!("Definitions: {:?}", record.definitions.iter().map(|d| (&d.name, &d.file)).collect::>()); + eprintln!("Calls: {:?}", record.calls.iter().map(|c| (&c.callee, &c.file, c.span.start_line)).collect::>()); + + index.update(record); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from Rust code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + eprintln!("Resolving call: callee={}, file={:?}, line={}", call.callee, call.file, call.span.start_line); + let def = resolver.resolve_call(call, &index); + if def.is_none() { + eprintln!("Resolution failed! Check LSP logs."); + } + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + assert_eq!(def.span.start_line, 5); + } + } + + #[test] + #[ignore] + fn test_rust_cross_module_definition() { + if !rust_analyzer_available() { + eprintln!("Skipping: rust-analyzer not available"); + return; + } + + let dir = TempDir::new().unwrap(); + let src = dir.path().join("src"); + fs::create_dir_all(&src).unwrap(); + + let main_rs = r#"mod utils; + +fn main() { + utils::process(); +} +"#; + + let utils_rs = r#"pub fn process() { + println!("processing"); +} +"#; + + let cargo_toml = r#"[package] +name = "test_project" +version = "0.1.0" +edition = "2021" +"#; + + fs::write(src.join("main.rs"), main_rs).unwrap(); + fs::write(src.join("utils.rs"), utils_rs).unwrap(); + fs::write(dir.path().join("Cargo.toml"), cargo_toml).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("rust").unwrap(); + index_file(&mut index, &extractor, &src.join("main.rs"), main_rs); + index_file(&mut index, &extractor, &src.join("utils.rs"), utils_rs); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "process"); + assert!(process_call.is_some(), "Should find call to process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve utils::process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "process"); + } + } +} + +mod go_lsp { + use super::*; + + fn gopls_available() -> bool { + lsp_available("gopls") + } + + #[test] + #[ignore] + fn test_go_same_file_definition() { + if !gopls_available() { + eprintln!("Skipping: gopls not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_go = r#"package main + +func main() { + helper() +} + +func helper() { + println("hello") +} +"#; + + let go_mod = "module test_project\n\ngo 1.21\n"; + + fs::write(dir.path().join("main.go"), main_go).unwrap(); + fs::write(dir.path().join("go.mod"), go_mod).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("go").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.go"), main_go); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from Go code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } + + #[test] + #[ignore] + fn test_go_cross_package_definition() { + if !gopls_available() { + eprintln!("Skipping: gopls not available"); + return; + } + + let dir = TempDir::new().unwrap(); + let utils_dir = dir.path().join("utils"); + fs::create_dir_all(&utils_dir).unwrap(); + + let main_go = r#"package main + +import "test_project/utils" + +func main() { + utils.Process() +} +"#; + + let utils_go = r#"package utils + +func Process() { + println("processing") +} +"#; + + let go_mod = "module test_project\n\ngo 1.21\n"; + + fs::write(dir.path().join("main.go"), main_go).unwrap(); + fs::write(utils_dir.join("utils.go"), utils_go).unwrap(); + fs::write(dir.path().join("go.mod"), go_mod).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("go").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.go"), main_go); + index_file(&mut index, &extractor, &utils_dir.join("utils.go"), utils_go); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "Process"); + assert!(process_call.is_some(), "Should find call to Process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve utils.Process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "Process"); + } + } +} + +mod python_lsp { + use super::*; + + fn pyright_available() -> bool { + lsp_available("pyright-langserver") || lsp_available("pyright") + } + + #[test] + #[ignore] + fn test_python_same_file_definition() { + if !pyright_available() { + eprintln!("Skipping: pyright not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_py = r#"def main(): + helper() + +def helper(): + print("hello") + +if __name__ == "__main__": + main() +"#; + + fs::write(dir.path().join("main.py"), main_py).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("python").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.py"), main_py); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from Python code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } + + #[test] + #[ignore] + fn test_python_cross_module_definition() { + if !pyright_available() { + eprintln!("Skipping: pyright not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_py = r#"from utils import process + +def main(): + process() + +if __name__ == "__main__": + main() +"#; + + let utils_py = r#"def process(): + print("processing") +"#; + + fs::write(dir.path().join("main.py"), main_py).unwrap(); + fs::write(dir.path().join("utils.py"), utils_py).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("python").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.py"), main_py); + index_file(&mut index, &extractor, &dir.path().join("utils.py"), utils_py); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "process"); + assert!(process_call.is_some(), "Should find call to process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "process"); + } + } +} + +mod typescript_lsp { + use super::*; + + fn tsserver_available() -> bool { + lsp_available("typescript-language-server") || lsp_available("tsserver") + } + + #[test] + #[ignore] + fn test_typescript_same_file_definition() { + if !tsserver_available() { + eprintln!("Skipping: typescript-language-server not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_ts = r#"function main() { + helper(); +} + +function helper() { + console.log("hello"); +} + +main(); +"#; + + let tsconfig = r#"{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "strict": true + } +} +"#; + + fs::write(dir.path().join("main.ts"), main_ts).unwrap(); + fs::write(dir.path().join("tsconfig.json"), tsconfig).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("typescript").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.ts"), main_ts); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from TypeScript code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } + + #[test] + #[ignore] + fn test_typescript_cross_module_definition() { + if !tsserver_available() { + eprintln!("Skipping: typescript-language-server not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_ts = r#"import { process } from "./utils"; + +function main() { + process(); +} + +main(); +"#; + + let utils_ts = r#"export function process() { + console.log("processing"); +} +"#; + + let tsconfig = r#"{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "strict": true + } +} +"#; + + fs::write(dir.path().join("main.ts"), main_ts).unwrap(); + fs::write(dir.path().join("utils.ts"), utils_ts).unwrap(); + fs::write(dir.path().join("tsconfig.json"), tsconfig).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("typescript").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.ts"), main_ts); + index_file(&mut index, &extractor, &dir.path().join("utils.ts"), utils_ts); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "process"); + assert!(process_call.is_some(), "Should find call to process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "process"); + } + } +} + +mod javascript_lsp { + use super::*; + + fn tsserver_available() -> bool { + lsp_available("typescript-language-server") || lsp_available("tsserver") + } + + #[test] + #[ignore] + fn test_javascript_same_file_definition() { + if !tsserver_available() { + eprintln!("Skipping: typescript-language-server not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_js = r#"function main() { + helper(); +} + +function helper() { + console.log("hello"); +} + +main(); +"#; + + let jsconfig = r#"{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs" + } +} +"#; + + fs::write(dir.path().join("main.js"), main_js).unwrap(); + fs::write(dir.path().join("jsconfig.json"), jsconfig).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("javascript").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.js"), main_js); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from JavaScript code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } +} + +mod c_lsp { + use super::*; + + fn clangd_available() -> bool { + lsp_available("clangd") + } + + #[test] + #[ignore] + fn test_c_same_file_definition() { + if !clangd_available() { + eprintln!("Skipping: clangd not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_c = r#"#include + +void helper(void); + +int main(void) { + helper(); + return 0; +} + +void helper(void) { + printf("hello\n"); +} +"#; + + let compile_commands = r#"[ + { + "directory": ".", + "command": "cc -c main.c", + "file": "main.c" + } +] +"#; + + fs::write(dir.path().join("main.c"), main_c).unwrap(); + fs::write(dir.path().join("compile_commands.json"), compile_commands).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("c").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.c"), main_c); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from C code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } + + #[test] + #[ignore] + fn test_c_cross_file_definition() { + if !clangd_available() { + eprintln!("Skipping: clangd not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_c = r#"#include "utils.h" + +int main(void) { + process(); + return 0; +} +"#; + + let utils_h = r#"#ifndef UTILS_H +#define UTILS_H + +void process(void); + +#endif +"#; + + let utils_c = r#"#include "utils.h" +#include + +void process(void) { + printf("processing\n"); +} +"#; + + let compile_commands = r#"[ + { + "directory": ".", + "command": "cc -c main.c", + "file": "main.c" + }, + { + "directory": ".", + "command": "cc -c utils.c", + "file": "utils.c" + } +] +"#; + + fs::write(dir.path().join("main.c"), main_c).unwrap(); + fs::write(dir.path().join("utils.h"), utils_h).unwrap(); + fs::write(dir.path().join("utils.c"), utils_c).unwrap(); + fs::write(dir.path().join("compile_commands.json"), compile_commands).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("c").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.c"), main_c); + index_file(&mut index, &extractor, &dir.path().join("utils.c"), utils_c); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "process"); + assert!(process_call.is_some(), "Should find call to process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "process"); + } + } +} + +mod cpp_lsp { + use super::*; + + fn clangd_available() -> bool { + lsp_available("clangd") + } + + #[test] + #[ignore] + fn test_cpp_same_file_definition() { + if !clangd_available() { + eprintln!("Skipping: clangd not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_cpp = r#"#include + +void helper(); + +int main() { + helper(); + return 0; +} + +void helper() { + std::cout << "hello" << std::endl; +} +"#; + + let compile_commands = r#"[ + { + "directory": ".", + "command": "c++ -std=c++17 -c main.cpp", + "file": "main.cpp" + } +] +"#; + + fs::write(dir.path().join("main.cpp"), main_cpp).unwrap(); + fs::write(dir.path().join("compile_commands.json"), compile_commands).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("cpp").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.cpp"), main_cpp); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from C++ code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } + + #[test] + #[ignore] + fn test_cpp_method_definition() { + if !clangd_available() { + eprintln!("Skipping: clangd not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_cpp = r#"#include + +class Processor { +public: + void process(); +}; + +void Processor::process() { + std::cout << "processing" << std::endl; +} + +int main() { + Processor p; + p.process(); + return 0; +} +"#; + + let compile_commands = r#"[ + { + "directory": ".", + "command": "c++ -std=c++17 -c main.cpp", + "file": "main.cpp" + } +] +"#; + + fs::write(dir.path().join("main.cpp"), main_cpp).unwrap(); + fs::write(dir.path().join("compile_commands.json"), compile_commands).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("cpp").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.cpp"), main_cpp); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "process"); + assert!(process_call.is_some(), "Should find call to process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve p.process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "process"); + } + } +} + +mod lsp_availability { + use glimpse::code::lsp::check_lsp_availability; + + #[test] + fn test_check_lsp_availability_returns_results() { + let availability = check_lsp_availability(); + + assert!(!availability.is_empty(), "Should return availability for at least one language"); + + for (lang, info) in &availability { + println!( + " {}: available={}, location={:?}, can_install={}, method={:?}", + lang, info.available, info.location, info.can_auto_install, info.install_method + ); + } + } + + #[test] + fn test_rust_analyzer_detection() { + let availability = check_lsp_availability(); + + if let Some(info) = availability.get("rust") { + println!( + "rust-analyzer: available={}, location={:?}, can_install={}", + info.available, info.location, info.can_auto_install + ); + if info.available { + assert!(info.location.is_some(), "If available, should have location"); + } + } + } + + #[test] + fn test_npm_packages_can_be_installed() { + let availability = check_lsp_availability(); + + if let Some(info) = availability.get("typescript") { + println!( + "typescript-language-server: available={}, can_install={}, method={:?}", + info.available, info.can_auto_install, info.install_method + ); + if !info.available { + assert_eq!(info.install_method.as_deref(), Some("bun")); + } + } + + if let Some(info) = availability.get("python") { + println!( + "pyright: available={}, can_install={}, method={:?}", + info.available, info.can_auto_install, info.install_method + ); + if !info.available { + assert_eq!(info.install_method.as_deref(), Some("bun")); + } + } + } + + #[test] + fn test_go_package_can_be_installed() { + let availability = check_lsp_availability(); + + if let Some(info) = availability.get("go") { + println!( + "gopls: available={}, can_install={}, method={:?}", + info.available, info.can_auto_install, info.install_method + ); + if !info.available { + assert_eq!(info.install_method.as_deref(), Some("go")); + } + } + } +} From 28195cecbb923ec39a77dea372f8051d674d2df9 Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 14:27:08 -0800 Subject: [PATCH 30/35] feat: add LSP support for zig, bash, java, and scala --- Cargo.lock | 112 +++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 2 + registry.toml | 27 +++++++++++- src/code/lsp.rs | 75 +++++++++++++++++++++++++++----- 4 files changed, 204 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1cf5629..d793cc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -893,6 +893,18 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.60.2", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -1155,6 +1167,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", + "tar", "tempfile", "tiktoken-rs", "tokenizers", @@ -1163,6 +1176,7 @@ dependencies = [ "url", "walkdir", "which", + "xz2", "zip", ] @@ -1839,6 +1853,7 @@ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags 2.8.0", "libc", + "redox_syscall", ] [[package]] @@ -3345,6 +3360,17 @@ dependencies = [ "libc", ] +[[package]] +name = "tar" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" version = "3.17.1" @@ -4082,6 +4108,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -4106,13 +4141,30 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -4125,6 +4177,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -4137,6 +4195,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -4149,12 +4213,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -4167,6 +4243,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -4179,6 +4261,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -4191,6 +4279,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -4203,6 +4297,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "winnow" version = "0.7.2" @@ -4286,6 +4386,16 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec107c4503ea0b4a98ef47356329af139c0a4f7750e621cf2973cd3385ebcb3d" +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix 1.1.3", +] + [[package]] name = "xz2" version = "0.1.7" diff --git a/Cargo.toml b/Cargo.toml index e5250a4..dedbd78 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,8 @@ lsp-types = "0.97" which = "8.0" flate2 = "1.0" zip = "2.2" +tar = "0.4" +xz2 = "0.1" [build-dependencies] serde = { version = "1.0.217", features = ["derive"] } diff --git a/registry.toml b/registry.toml index fda9b2d..a9f40a6 100644 --- a/registry.toml +++ b/registry.toml @@ -85,6 +85,18 @@ import_query = """ (#eq? @_builtin "@import")) @import """ +[language.lsp] +binary = "zls" +args = [] +url_template = "https://github.com/zigtools/zls/releases/download/{version}/zls-{target}.tar.xz" +archive = "tar.xz" + +[language.lsp.targets] +"x86_64-unknown-linux-gnu" = "x86_64-linux" +"aarch64-unknown-linux-gnu" = "aarch64-linux" +"x86_64-apple-darwin" = "x86_64-macos" +"aarch64-apple-darwin" = "aarch64-macos" + [[language]] name = "c" extensions = ["c", "h"] @@ -266,6 +278,11 @@ import_query = """ (#any-of? @_cmd "source" ".")) @import """ +[language.lsp] +binary = "bash-language-server" +args = ["start"] +npm_package = "bash-language-server" + [[language]] name = "python" extensions = ["py"] @@ -559,6 +576,10 @@ import_query = """ (asterisk)? @wildcard) @import """ +[language.lsp] +binary = "jdtls" +args = [] + [[language]] name = "scala" extensions = ["scala", "sc"] @@ -618,4 +639,8 @@ import_query = """ (namespace_selectors) @selectors (as_renamed_identifier) @alias ]?) @import -""" \ No newline at end of file +""" + +[language.lsp] +binary = "metals" +args = [] \ No newline at end of file diff --git a/src/code/lsp.rs b/src/code/lsp.rs index f1210bb..2b0f442 100644 --- a/src/code/lsp.rs +++ b/src/code/lsp.rs @@ -75,13 +75,41 @@ fn uri_to_path(uri: &Uri) -> Option { url.to_file_path().ok() } -fn download_and_extract(lsp: &LspConfig) -> Result { +fn detect_zig_version_from_zon(root: &Path) -> Option { + let zon_path = root.join("build.zig.zon"); + let content = fs::read_to_string(zon_path).ok()?; + let re = regex::Regex::new(r#"\.minimum_zig_version\s*=\s*"([^"]+)""#).ok()?; + let caps = re.captures(&content)?; + Some(caps.get(1)?.as_str().to_string()) +} + +fn detect_zig_version(root: &Path) -> Option { + if let Ok(output) = Command::new("zig").arg("version").output() { + if output.status.success() { + let version_str = String::from_utf8_lossy(&output.stdout); + let version = version_str.trim(); + if let Some(base) = version.split('-').next() { + return Some(base.to_string()); + } + } + } + + detect_zig_version_from_zon(root) +} + +fn download_and_extract(lsp: &LspConfig, root: &Path) -> Result { let Some(ref url_template) = lsp.url_template else { bail!("no download URL configured for {}", lsp.binary); }; - let Some(ref version) = lsp.version else { - bail!("no version configured for {}", lsp.binary); + let version = if lsp.binary == "zls" { + detect_zig_version(root).with_context(|| { + "failed to detect zig version. Install zig or install zls manually" + })? + } else { + lsp.version + .clone() + .with_context(|| format!("no version configured for {}", lsp.binary))? }; let target = current_target(); @@ -94,7 +122,7 @@ fn download_and_extract(lsp: &LspConfig) -> Result { }; let url = url_template - .replace("{version}", version) + .replace("{version}", &version) .replace("{target}", target_name); eprintln!("Downloading {} from {}...", lsp.binary, url); @@ -120,12 +148,36 @@ fn download_and_extract(lsp: &LspConfig) -> Result { let mut output = File::create(&final_path)?; std::io::copy(&mut decoder, &mut output)?; } + "tar.xz" => { + let decoder = xz2::read::XzDecoder::new(&bytes[..]); + let mut archive = tar::Archive::new(decoder); + + let binary_name = format!("{}{}", lsp.binary, binary_extension()); + let mut found = false; + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?; + if let Some(name) = path.file_name() { + if name == binary_name.as_str() { + let mut output = File::create(&final_path)?; + std::io::copy(&mut entry, &mut output)?; + found = true; + break; + } + } + } + + if !found { + bail!("binary {} not found in tar.xz archive", binary_name); + } + } "zip" => { let cursor = std::io::Cursor::new(&bytes); let mut archive = zip::ZipArchive::new(cursor)?; let binary_path = if let Some(ref path) = lsp.binary_path { - path.replace("{version}", version) + path.replace("{version}", &version) } else { lsp.binary.clone() }; @@ -287,7 +339,7 @@ fn install_go_package(lsp: &LspConfig) -> Result { ); } -fn find_lsp_binary(lsp: &LspConfig) -> Result { +fn find_lsp_binary(lsp: &LspConfig, root: &Path) -> Result { let local_path = lsp_binary_path(lsp); if local_path.exists() { return Ok(local_path); @@ -298,7 +350,7 @@ fn find_lsp_binary(lsp: &LspConfig) -> Result { } if lsp.url_template.is_some() { - return download_and_extract(lsp); + return download_and_extract(lsp, root); } if lsp.npm_package.is_some() { @@ -342,7 +394,7 @@ struct LspMessage { impl LspClient { fn new(lsp: &LspConfig, root: &Path) -> Result { - let binary_path = find_lsp_binary(lsp)?; + let binary_path = find_lsp_binary(lsp, root)?; let mut process = Command::new(&binary_path) .args(&lsp.args) @@ -654,6 +706,9 @@ impl LspResolver { "c" | "h" => "c", "cpp" | "cc" | "cxx" | "hpp" | "hxx" => "cpp", "java" => "java", + "zig" => "zig", + "sh" | "bash" => "shellscript", + "scala" | "sc" => "scala", _ => "text", } } @@ -780,7 +835,7 @@ pub fn check_lsp_availability() -> HashMap { result } -pub fn ensure_lsp_for_extension(ext: &str) -> Result { +pub fn ensure_lsp_for_extension(ext: &str, root: &Path) -> Result { let registry = Registry::global(); let lang_entry = registry .get_by_extension(ext) @@ -791,7 +846,7 @@ pub fn ensure_lsp_for_extension(ext: &str) -> Result { .as_ref() .with_context(|| format!("no LSP config for language: {}", lang_entry.name))?; - find_lsp_binary(lsp_config) + find_lsp_binary(lsp_config, root) } #[cfg(test)] From cb55939a7fbb850d3b5dc572a4ddf871366ac127 Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 14:46:57 -0800 Subject: [PATCH 31/35] fix: add LSP warmup for reliable call resolution --- src/cli.rs | 4 + src/code/extract.rs | 2 + src/code/graph.rs | 29 +++- src/code/index.rs | 15 +- src/code/lsp.rs | 292 ++++++++++++++++++++++++++++++++++----- src/code/resolve.rs | 1 + src/main.rs | 122 ++++++++++++++-- tests/integration.rs | 6 + tests/lsp_integration.rs | 103 +++++++++++--- 9 files changed, 503 insertions(+), 71 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index 182c7a9..6b11ae4 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -107,6 +107,10 @@ pub enum IndexCommand { /// Force rebuild (ignore existing index) #[arg(short, long)] force: bool, + + /// Use LSP for precise call resolution (slower but more accurate) + #[arg(long)] + precise: bool, }, /// Clear the index for a project diff --git a/src/code/extract.rs b/src/code/extract.rs index 31995da..0bd5c16 100644 --- a/src/code/extract.rs +++ b/src/code/extract.rs @@ -157,6 +157,7 @@ impl Extractor { kind, span: node_to_span(&node), file: path.to_path_buf(), + signature: None, }); } } @@ -194,6 +195,7 @@ impl Extractor { span: node_to_span(&node), file: path.to_path_buf(), caller, + resolved: None, }); } } diff --git a/src/code/graph.rs b/src/code/graph.rs index 3daaa60..3c5757d 100644 --- a/src/code/graph.rs +++ b/src/code/graph.rs @@ -56,7 +56,18 @@ impl CallGraph { continue; }; - let callee_def = resolver.resolve(&call.callee, call.qualifier.as_deref(), &call.file); + let callee_def = if let Some(ref resolved) = call.resolved { + index + .get(&resolved.target_file) + .and_then(|r| { + r.definitions + .iter() + .find(|d| d.name == resolved.target_name) + }) + .cloned() + } else { + resolver.resolve(&call.callee, call.qualifier.as_deref(), &call.file) + }; let callee_id = if let Some(def) = callee_def { graph @@ -73,7 +84,6 @@ impl CallGraph { } pub fn build_with_lsp(index: &Index, root: &Path) -> Self { - let mut lsp_resolver = LspResolver::new(root); let heuristic_resolver = Resolver::with_strict(index, false); let mut graph = CallGraph::new(); @@ -91,10 +101,15 @@ impl CallGraph { let pb = ProgressBar::new(total as u64); pb.set_style( ProgressStyle::default_bar() - .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} resolving calls") + .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} {msg}") .expect("valid template") .progress_chars("#>-"), ); + pb.set_message("initializing LSP..."); + + let mut lsp_resolver = LspResolver::with_progress(root, pb.clone()); + + pb.set_message("resolving calls"); for call in &calls { pb.inc(1); @@ -108,9 +123,9 @@ impl CallGraph { continue; }; - let callee_def = lsp_resolver - .resolve_call(call, index) - .or_else(|| heuristic_resolver.resolve(&call.callee, call.qualifier.as_deref(), &call.file)); + let callee_def = lsp_resolver.resolve_call(call, index).or_else(|| { + heuristic_resolver.resolve(&call.callee, call.qualifier.as_deref(), &call.file) + }); let callee_id = if let Some(def) = callee_def { graph @@ -412,6 +427,7 @@ mod tests { kind: DefinitionKind::Function, span: make_span(), file: PathBuf::from(file), + signature: None, } } @@ -422,6 +438,7 @@ mod tests { span: make_span(), file: PathBuf::from(file), caller: caller.map(|s| s.to_string()), + resolved: None, } } diff --git a/src/code/index.rs b/src/code/index.rs index 9762ad9..2c54639 100644 --- a/src/code/index.rs +++ b/src/code/index.rs @@ -10,7 +10,7 @@ use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; pub const INDEX_FILE: &str = "index.bin"; -pub const INDEX_VERSION: u32 = 2; +pub const INDEX_VERSION: u32 = 3; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Span { @@ -26,6 +26,16 @@ pub struct Definition { pub kind: DefinitionKind, pub span: Span, pub file: PathBuf, + pub signature: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ResolvedCall { + pub target_file: PathBuf, + pub target_name: String, + pub target_span: Span, + pub signature: Option, + pub receiver_type: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -48,6 +58,7 @@ pub struct Call { pub span: Span, pub file: PathBuf, pub caller: Option, + pub resolved: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -208,6 +219,7 @@ mod tests { end_line: 10, }, file: PathBuf::from(format!("src/{}.rs", name)), + signature: None, }], calls: vec![Call { callee: "other_fn".to_string(), @@ -220,6 +232,7 @@ mod tests { }, file: PathBuf::from(format!("src/{}.rs", name)), caller: Some(format!("{}_fn", name)), + resolved: None, }], imports: vec![Import { module_path: "std::fs".to_string(), diff --git a/src/code/lsp.rs b/src/code/lsp.rs index 2b0f442..93acfe0 100644 --- a/src/code/lsp.rs +++ b/src/code/lsp.rs @@ -7,6 +7,7 @@ use std::sync::atomic::{AtomicI32, Ordering}; use anyhow::{bail, Context, Result}; use flate2::read::GzDecoder; +use indicatif::{ProgressBar, ProgressStyle}; use lsp_types::{ ClientCapabilities, DidOpenTextDocumentParams, GotoDefinitionParams, GotoDefinitionResponse, InitializeParams, InitializedParams, Position, TextDocumentIdentifier, @@ -16,7 +17,7 @@ use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use super::grammar::{lsp_dir, LspConfig, Registry}; -use super::index::{Call, Definition, Index}; +use super::index::{Call, Definition, Index, ResolvedCall}; fn current_target() -> &'static str { #[cfg(all(target_os = "linux", target_arch = "x86_64"))] @@ -103,9 +104,8 @@ fn download_and_extract(lsp: &LspConfig, root: &Path) -> Result { }; let version = if lsp.binary == "zls" { - detect_zig_version(root).with_context(|| { - "failed to detect zig version. Install zig or install zls manually" - })? + detect_zig_version(root) + .with_context(|| "failed to detect zig version. Install zig or install zls manually")? } else { lsp.version .clone() @@ -125,8 +125,6 @@ fn download_and_extract(lsp: &LspConfig, root: &Path) -> Result { .replace("{version}", &version) .replace("{target}", target_name); - eprintln!("Downloading {} from {}...", lsp.binary, url); - let dir = lsp_dir(); fs::create_dir_all(&dir)?; @@ -137,7 +135,28 @@ fn download_and_extract(lsp: &LspConfig, root: &Path) -> Result { bail!("download failed with status: {}", response.status()); } - let bytes = response.bytes()?; + let total_size = response.content_length().unwrap_or(0); + let pb = ProgressBar::new(total_size); + pb.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({eta}) downloading {msg}") + .expect("valid template") + .progress_chars("#>-"), + ); + pb.set_message(lsp.binary.clone()); + + let mut bytes = Vec::new(); + let mut reader = response; + let mut buffer = [0u8; 8192]; + loop { + let n = reader.read(&mut buffer)?; + if n == 0 { + break; + } + bytes.extend_from_slice(&buffer[..n]); + pb.set_position(bytes.len() as u64); + } + pb.finish_and_clear(); let archive_type = lsp.archive.as_deref().unwrap_or("gz"); let final_path = lsp_binary_path(lsp); @@ -279,10 +298,7 @@ fn create_wrapper_script(wrapper_path: &Path, target_path: &Path) -> Result<()> { use std::os::unix::fs::PermissionsExt; - let script = format!( - "#!/bin/sh\nexec \"{}\" \"$@\"\n", - target_path.display() - ); + let script = format!("#!/bin/sh\nexec \"{}\" \"$@\"\n", target_path.display()); fs::write(wrapper_path, script)?; let mut perms = fs::metadata(wrapper_path)?.permissions(); @@ -292,10 +308,7 @@ fn create_wrapper_script(wrapper_path: &Path, target_path: &Path) -> Result<()> #[cfg(windows)] { - let script = format!( - "@echo off\r\n\"{}\" %*\r\n", - target_path.display() - ); + let script = format!("@echo off\r\n\"{}\" %*\r\n", target_path.display()); let wrapper_cmd = wrapper_path.with_extension("cmd"); fs::write(&wrapper_cmd, script)?; } @@ -497,13 +510,21 @@ impl LspClient { self.send_message(&msg) } - fn wait_for_ready(&mut self, path: &Path, max_attempts: u32) -> Result { + fn wait_for_ready( + &mut self, + path: &Path, + max_attempts: u32, + pb: Option<&ProgressBar>, + ) -> Result { use std::thread; use std::time::Duration; let uri = path_to_uri(path)?; - // First wait for basic syntax analysis (documentSymbol) + if let Some(pb) = pb { + pb.set_message("waiting for syntax analysis..."); + } + for _ in 0..10 { let params = lsp_types::DocumentSymbolParams { text_document: TextDocumentIdentifier { uri: uri.clone() }, @@ -517,12 +538,14 @@ impl LspClient { } } - // Then wait for semantic analysis (hover on a known symbol) - // This indicates rust-analyzer has finished loading the project + if let Some(pb) = pb { + pb.set_message("waiting for semantic analysis..."); + } + for attempt in 0..max_attempts { let hover_params = json!({ "textDocument": { "uri": uri.as_str() }, - "position": { "line": 0, "character": 4 } // "mod" keyword + "position": { "line": 0, "character": 4 } }); match self.send_request("textDocument/hover", hover_params) { @@ -633,6 +656,35 @@ impl LspClient { } } + fn hover(&mut self, path: &Path, line: u32, character: u32) -> Result> { + let uri = path_to_uri(path)?; + + let params = json!({ + "textDocument": { "uri": uri.as_str() }, + "position": { "line": line, "character": character } + }); + + let result = self.send_request("textDocument/hover", params)?; + + if result.is_null() { + return Ok(None); + } + + let hover: lsp_types::Hover = serde_json::from_value(result)?; + + let content = match hover.contents { + lsp_types::HoverContents::Scalar(marked) => extract_marked_string(&marked), + lsp_types::HoverContents::Array(arr) => arr + .into_iter() + .map(|m| extract_marked_string(&m)) + .collect::>() + .join("\n"), + lsp_types::HoverContents::Markup(markup) => markup.value, + }; + + Ok(Some(content)) + } + fn shutdown(&mut self) -> Result<()> { self.send_request("shutdown", json!(null))?; self.send_notification("exit", json!(null))?; @@ -641,16 +693,34 @@ impl LspClient { } } +fn extract_marked_string(marked: &lsp_types::MarkedString) -> String { + match marked { + lsp_types::MarkedString::String(s) => s.clone(), + lsp_types::MarkedString::LanguageString(ls) => ls.value.clone(), + } +} + impl Drop for LspClient { fn drop(&mut self) { let _ = self.shutdown(); } } +#[derive(Debug, Default)] +pub struct LspStats { + pub resolved: usize, + pub no_definition: usize, + pub external: usize, + pub not_indexed: usize, + pub no_match: usize, +} + pub struct LspResolver { clients: HashMap, root: PathBuf, file_cache: HashMap, + progress: Option, + stats: LspStats, } impl LspResolver { @@ -659,9 +729,37 @@ impl LspResolver { clients: HashMap::new(), root: root.to_path_buf(), file_cache: HashMap::new(), + progress: None, + stats: LspStats::default(), + } + } + + pub fn with_progress(root: &Path, pb: ProgressBar) -> Self { + Self { + clients: HashMap::new(), + root: root.to_path_buf(), + file_cache: HashMap::new(), + progress: Some(pb), + stats: LspStats::default(), } } + pub fn stats(&self) -> &LspStats { + &self.stats + } + + pub fn set_progress(&mut self, pb: Option) { + self.progress = pb; + } + + fn find_sample_file(&self, ext: &str) -> Option { + let pattern = format!("**/*.{}", ext); + glob::glob(&self.root.join(&pattern).to_string_lossy()) + .ok()? + .filter_map(|p| p.ok()) + .find(|p| !p.to_string_lossy().contains("/target/")) + } + fn get_or_create_client(&mut self, ext: &str) -> Result<&mut LspClient> { let registry = Registry::global(); let lang_entry = registry @@ -676,8 +774,26 @@ impl LspResolver { let key = lsp_config.binary.clone(); if !self.clients.contains_key(&key) { + if let Some(ref pb) = self.progress { + pb.set_message(format!("starting {}...", lsp_config.binary)); + } + let mut client = LspClient::new(lsp_config, &self.root)?; client.initialize()?; + + let sample_file = self.find_sample_file(&ext); + if let Some(ref sample) = sample_file { + if let Ok(content) = std::fs::read_to_string(sample) { + let lang_id = Self::language_id_for_ext(&ext); + let _ = client.open_file(sample, &content, lang_id); + let _ = client.wait_for_ready(sample, 60, self.progress.as_ref()); + } + } + + if let Some(ref pb) = self.progress { + pb.set_message(format!("{} ready", lsp_config.binary)); + } + self.clients.insert(key.clone(), client); } @@ -713,8 +829,8 @@ impl LspResolver { } } - pub fn resolve_call(&mut self, call: &Call, index: &Index) -> Option { - let ext = call.file.extension()?.to_str()?.to_string(); + pub fn resolve_call_full(&mut self, call: &Call, index: &Index) -> Option { + let ext = call.file.extension().and_then(|e| e.to_str())?.to_string(); let abs_path = self.root.join(&call.file); let language_id = Self::language_id_for_ext(&ext); let callee = call.callee.clone(); @@ -731,27 +847,96 @@ impl LspResolver { let col = line_content.find(&callee).unwrap_or(0) as u32; let client = self.get_or_create_client(&ext).ok()?; - - if client.open_file(&abs_path, &content, language_id).is_err() { - return None; - } + client.open_file(&abs_path, &content, language_id).ok()?; + + let signature = client + .hover(&abs_path, start_line_idx as u32, col) + .ok() + .flatten() + .and_then(|h| extract_signature(&h)); + + let receiver_type = call.qualifier.as_ref().and_then(|_| { + let qualifier_col = line_content.find(call.qualifier.as_deref()?)?; + client + .hover(&abs_path, start_line_idx as u32, qualifier_col as u32) + .ok() + .flatten() + .and_then(|h| extract_type(&h)) + }); let location = client .goto_definition(&abs_path, start_line_idx as u32, col) - .ok()??; + .ok() + .flatten(); + + let location = match location { + Some(loc) => loc, + None => { + self.stats.no_definition += 1; + return None; + } + }; let def_path = uri_to_path(&location.uri)?; + let root = self.root.clone(); - let rel_path = def_path.strip_prefix(&root).ok()?.to_path_buf(); + let rel_path = match def_path.strip_prefix(&root) { + Ok(p) => p.to_path_buf(), + Err(_) => { + self.stats.external += 1; + return None; + } + }; let start_line = location.range.start.line as usize + 1; let end_line = location.range.end.line as usize + 1; - let record = index.get(&rel_path)?; - record + let record = match index.get(&rel_path) { + Some(r) => r, + None => { + self.stats.not_indexed += 1; + return None; + } + }; + + let def = match record .definitions .iter() .find(|d| d.span.start_line <= start_line && d.span.end_line >= end_line) + { + Some(d) => d, + None => { + self.stats.no_match += 1; + return None; + } + }; + + self.stats.resolved += 1; + Some(ResolvedCall { + target_file: rel_path, + target_name: def.name.clone(), + target_span: def.span.clone(), + signature, + receiver_type, + }) + } + + pub fn resolve_call(&mut self, call: &Call, index: &Index) -> Option { + if let Some(ref resolved) = call.resolved { + return index + .get(&resolved.target_file)? + .definitions + .iter() + .find(|d| d.name == resolved.target_name) + .cloned(); + } + + let resolved = self.resolve_call_full(call, index)?; + index + .get(&resolved.target_file)? + .definitions + .iter() + .find(|d| d.name == resolved.target_name) .cloned() } @@ -772,6 +957,51 @@ impl LspResolver { } } +fn extract_signature(hover_content: &str) -> Option { + let lines: Vec<&str> = hover_content.lines().collect(); + for line in &lines { + let trimmed = line.trim(); + if trimmed.starts_with("fn ") + || trimmed.starts_with("pub fn ") + || trimmed.starts_with("async fn ") + || trimmed.starts_with("pub async fn ") + || trimmed.starts_with("def ") + || trimmed.starts_with("function ") + || trimmed.starts_with("func ") + { + return Some(trimmed.to_string()); + } + if trimmed.contains("->") || trimmed.contains("=>") { + return Some(trimmed.to_string()); + } + } + lines.first().map(|s| s.trim().to_string()) +} + +fn extract_type(hover_content: &str) -> Option { + let content = hover_content.trim(); + if content.is_empty() { + return None; + } + + for line in content.lines() { + let trimmed = line.trim(); + if trimmed.starts_with("let ") || trimmed.starts_with("const ") { + if let Some(colon_pos) = trimmed.find(':') { + let type_part = trimmed[colon_pos + 1..].trim(); + let type_end = type_part.find('=').unwrap_or(type_part.len()); + return Some(type_part[..type_end].trim().to_string()); + } + } + if !trimmed.starts_with("fn ") && !trimmed.starts_with("def ") { + if let Some(first_line) = trimmed.split('\n').next() { + return Some(first_line.to_string()); + } + } + } + Some(content.lines().next()?.to_string()) +} + #[derive(Debug, Clone)] pub struct LspAvailability { pub available: bool, @@ -922,7 +1152,7 @@ mod integration_tests { .expect("failed to open file"); client - .wait_for_ready(&test_file, 30) + .wait_for_ready(&test_file, 30, None) .expect("wait_for_ready failed"); // Line 61: ".filter(|path| is_url_or_git(path))" diff --git a/src/code/resolve.rs b/src/code/resolve.rs index 84a8baf..9d46da6 100644 --- a/src/code/resolve.rs +++ b/src/code/resolve.rs @@ -246,6 +246,7 @@ mod tests { end_line: 3, }, file: PathBuf::from(file), + signature: None, } } diff --git a/src/main.rs b/src/main.rs index d54f2a3..99e541c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,6 +17,7 @@ use glimpse::code::graph::CallGraph; use glimpse::code::index::{ clear_index, file_fingerprint, load_index, save_index, FileRecord, Index, }; +use glimpse::code::lsp::LspResolver; use glimpse::fetch::{GitProcessor, UrlProcessor}; use glimpse::{ get_config_path, is_source_file, load_config, load_repo_config, save_config, save_repo_config, @@ -270,8 +271,16 @@ fn handle_code_command(args: &CodeArgs) -> Result<()> { let mut index = load_index(&root)?.unwrap_or_else(Index::new); let needs_update = index_directory(&root, &mut index)?; + let mut needs_save = needs_update > 0; - if needs_update > 0 { + if args.precise { + let resolved = resolve_calls_with_lsp(&root, &mut index)?; + if resolved > 0 { + needs_save = true; + } + } + + if needs_save { save_index(&index, &root)?; } @@ -320,7 +329,11 @@ fn handle_code_command(args: &CodeArgs) -> Result<()> { fn handle_index_command(cmd: &IndexCommand) -> Result<()> { match cmd { - IndexCommand::Build { path, force } => { + IndexCommand::Build { + path, + force, + precise, + } => { let root = path.canonicalize().unwrap_or_else(|_| path.clone()); let mut index = if *force { @@ -330,21 +343,30 @@ fn handle_index_command(cmd: &IndexCommand) -> Result<()> { }; let updated = index_directory(&root, &mut index)?; + + if *precise { + let resolved = resolve_calls_with_lsp(&root, &mut index)?; + if resolved > 0 { + eprintln!("Resolved {} calls with LSP", resolved); + } + } + save_index(&index, &root)?; let file_count = index.files.len(); let def_count = index.definitions().count(); let call_count = index.calls().count(); + let resolved_count = index.calls().filter(|c| c.resolved.is_some()).count(); - if updated > 0 { + if updated > 0 || *precise { eprintln!( - "Index updated: {} files ({} updated), {} definitions, {} calls", - file_count, updated, def_count, call_count + "Index updated: {} files ({} updated), {} definitions, {} calls ({} resolved)", + file_count, updated, def_count, call_count, resolved_count ); } else { eprintln!( - "Index up to date: {} files, {} definitions, {} calls", - file_count, def_count, call_count + "Index up to date: {} files, {} definitions, {} calls ({} resolved)", + file_count, def_count, call_count, resolved_count ); } } @@ -388,7 +410,8 @@ fn index_directory(root: &Path, index: &mut Index) -> Result { .template("{spinner:.green} {msg}") .expect("valid template"), ); - pb.set_message("Scanning files..."); + pb.set_message("scanning files..."); + pb.enable_steady_tick(std::time::Duration::from_millis(100)); let source_files: Vec<_> = ignore::WalkBuilder::new(root) .hidden(false) @@ -404,7 +427,10 @@ fn index_directory(root: &Path, index: &mut Index) -> Result { }) .collect(); - pb.finish_and_clear(); + pb.set_message(format!( + "found {} source files, checking for changes...", + source_files.len() + )); let stale_files: Vec<_> = source_files .into_iter() @@ -430,6 +456,8 @@ fn index_directory(root: &Path, index: &mut Index) -> Result { }) .collect(); + pb.finish_and_clear(); + let total = stale_files.len(); if total == 0 { return Ok(0); @@ -438,10 +466,11 @@ fn index_directory(root: &Path, index: &mut Index) -> Result { let pb = ProgressBar::new(total as u64); pb.set_style( ProgressStyle::default_bar() - .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len}") + .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} {msg}") .expect("valid template") .progress_chars("#>-"), ); + pb.set_message("indexing..."); let mut updated = 0; @@ -483,6 +512,79 @@ fn index_directory(root: &Path, index: &mut Index) -> Result { Ok(updated) } +fn resolve_calls_with_lsp(root: &Path, index: &mut Index) -> Result { + let unresolved_count: usize = index + .files + .values() + .map(|r| r.calls.iter().filter(|c| c.resolved.is_none()).count()) + .sum(); + + if unresolved_count == 0 { + return Ok(0); + } + + let pb = ProgressBar::new(unresolved_count as u64); + pb.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} {msg}") + .expect("valid template") + .progress_chars("#>-"), + ); + pb.set_message("resolving calls with LSP..."); + + let mut lsp_resolver = LspResolver::with_progress(root, pb.clone()); + let mut resolved = 0; + + let file_paths: Vec<_> = index.files.keys().cloned().collect(); + + for file_path in file_paths { + let Some(record) = index.files.get(&file_path) else { + continue; + }; + + let calls_to_resolve: Vec<_> = record + .calls + .iter() + .enumerate() + .filter(|(_, c)| c.resolved.is_none()) + .map(|(i, c)| (i, c.clone())) + .collect(); + + if calls_to_resolve.is_empty() { + continue; + } + + let mut resolutions = Vec::new(); + + for (call_idx, call) in &calls_to_resolve { + pb.inc(1); + + if let Some(resolved_call) = lsp_resolver.resolve_call_full(call, index) { + resolutions.push((*call_idx, resolved_call)); + } + } + + if let Some(record) = index.files.get_mut(&file_path) { + for (call_idx, resolved_call) in resolutions { + if call_idx < record.calls.len() { + record.calls[call_idx].resolved = Some(resolved_call); + resolved += 1; + } + } + } + } + + pb.finish_and_clear(); + + let stats = lsp_resolver.stats(); + eprintln!( + "LSP stats: {} resolved, {} external, {} no definition, {} not indexed, {} no match", + stats.resolved, stats.external, stats.no_definition, stats.not_indexed, stats.no_match + ); + + Ok(resolved) +} + fn format_definitions( definitions: &[&glimpse::code::index::Definition], root: &Path, diff --git a/tests/integration.rs b/tests/integration.rs index f3d643e..66929ab 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -46,6 +46,7 @@ mod call_graph_tests { kind: DefinitionKind::Function, span: make_span(), file: file.to_path_buf(), + signature: None, } } @@ -71,6 +72,7 @@ mod call_graph_tests { caller: Some("caller".to_string()), span: make_span(), file: file_a.clone(), + resolved: None, }], imports: vec![], }); @@ -136,6 +138,7 @@ mod call_graph_tests { caller: Some("main".to_string()), span: make_span(), file: file_main.clone(), + resolved: None, }], imports: vec![], }); @@ -175,6 +178,7 @@ mod call_graph_tests { caller: Some("entry".to_string()), span: make_span(), file: file_a.clone(), + resolved: None, }], imports: vec![], }); @@ -190,6 +194,7 @@ mod call_graph_tests { caller: Some("middle".to_string()), span: make_span(), file: file_b.clone(), + resolved: None, }], imports: vec![], }); @@ -244,6 +249,7 @@ mod call_graph_tests { caller: Some("caller".to_string()), span: make_span(), file: file_a.clone(), + resolved: None, }], imports: vec![], }); diff --git a/tests/lsp_integration.rs b/tests/lsp_integration.rs index 51c226c..b581235 100644 --- a/tests/lsp_integration.rs +++ b/tests/lsp_integration.rs @@ -14,7 +14,10 @@ fn index_file(index: &mut Index, extractor: &Extractor, path: &Path, source: &st parser.set_language(extractor.language()).unwrap(); let tree = parser.parse(source, None).unwrap(); - let rel_path = path.file_name().map(PathBuf::from).unwrap_or(path.to_path_buf()); + let rel_path = path + .file_name() + .map(PathBuf::from) + .unwrap_or(path.to_path_buf()); let (mtime, size) = file_fingerprint(path).unwrap_or((0, source.len() as u64)); let record = FileRecord { @@ -93,14 +96,14 @@ edition = "2021" let mut index = Index::new(); let extractor = Extractor::new("rust").unwrap(); - + let main_rs_path = src.join("main.rs"); let rel_path = main_rs_path.strip_prefix(dir.path()).unwrap(); - + let mut parser = tree_sitter::Parser::new(); parser.set_language(extractor.language()).unwrap(); let tree = parser.parse(main_rs, None).unwrap(); - + let record = FileRecord { path: rel_path.to_path_buf(), mtime: 0, @@ -109,11 +112,25 @@ edition = "2021" calls: extractor.extract_calls(&tree, main_rs.as_bytes(), rel_path), imports: extractor.extract_imports(&tree, main_rs.as_bytes(), rel_path), }; - + eprintln!("Index record path: {:?}", record.path); - eprintln!("Definitions: {:?}", record.definitions.iter().map(|d| (&d.name, &d.file)).collect::>()); - eprintln!("Calls: {:?}", record.calls.iter().map(|c| (&c.callee, &c.file, c.span.start_line)).collect::>()); - + eprintln!( + "Definitions: {:?}", + record + .definitions + .iter() + .map(|d| (&d.name, &d.file)) + .collect::>() + ); + eprintln!( + "Calls: {:?}", + record + .calls + .iter() + .map(|c| (&c.callee, &c.file, c.span.start_line)) + .collect::>() + ); + index.update(record); let calls = collect_calls(&index); @@ -126,7 +143,10 @@ edition = "2021" wait_for_lsp_ready(&mut resolver, &calls, &index); if let Some(call) = helper_call { - eprintln!("Resolving call: callee={}, file={:?}, line={}", call.callee, call.file, call.span.start_line); + eprintln!( + "Resolving call: callee={}, file={:?}, line={}", + call.callee, call.file, call.span.start_line + ); let def = resolver.resolve_call(call, &index); if def.is_none() { eprintln!("Resolution failed! Check LSP logs."); @@ -284,7 +304,12 @@ func Process() { let mut index = Index::new(); let extractor = Extractor::new("go").unwrap(); index_file(&mut index, &extractor, &dir.path().join("main.go"), main_go); - index_file(&mut index, &extractor, &utils_dir.join("utils.go"), utils_go); + index_file( + &mut index, + &extractor, + &utils_dir.join("utils.go"), + utils_go, + ); let calls = collect_calls(&index); let process_call = calls.iter().find(|c| c.callee == "Process"); @@ -381,7 +406,12 @@ if __name__ == "__main__": let mut index = Index::new(); let extractor = Extractor::new("python").unwrap(); index_file(&mut index, &extractor, &dir.path().join("main.py"), main_py); - index_file(&mut index, &extractor, &dir.path().join("utils.py"), utils_py); + index_file( + &mut index, + &extractor, + &dir.path().join("utils.py"), + utils_py, + ); let calls = collect_calls(&index); let process_call = calls.iter().find(|c| c.callee == "process"); @@ -444,7 +474,10 @@ main(); index_file(&mut index, &extractor, &dir.path().join("main.ts"), main_ts); let calls = collect_calls(&index); - assert!(!calls.is_empty(), "Should extract calls from TypeScript code"); + assert!( + !calls.is_empty(), + "Should extract calls from TypeScript code" + ); let helper_call = calls.iter().find(|c| c.callee == "helper"); assert!(helper_call.is_some(), "Should find call to helper()"); @@ -500,7 +533,12 @@ main(); let mut index = Index::new(); let extractor = Extractor::new("typescript").unwrap(); index_file(&mut index, &extractor, &dir.path().join("main.ts"), main_ts); - index_file(&mut index, &extractor, &dir.path().join("utils.ts"), utils_ts); + index_file( + &mut index, + &extractor, + &dir.path().join("utils.ts"), + utils_ts, + ); let calls = collect_calls(&index); let process_call = calls.iter().find(|c| c.callee == "process"); @@ -562,7 +600,10 @@ main(); index_file(&mut index, &extractor, &dir.path().join("main.js"), main_js); let calls = collect_calls(&index); - assert!(!calls.is_empty(), "Should extract calls from JavaScript code"); + assert!( + !calls.is_empty(), + "Should extract calls from JavaScript code" + ); let helper_call = calls.iter().find(|c| c.callee == "helper"); assert!(helper_call.is_some(), "Should find call to helper()"); @@ -762,7 +803,12 @@ void helper() { let mut index = Index::new(); let extractor = Extractor::new("cpp").unwrap(); - index_file(&mut index, &extractor, &dir.path().join("main.cpp"), main_cpp); + index_file( + &mut index, + &extractor, + &dir.path().join("main.cpp"), + main_cpp, + ); let calls = collect_calls(&index); assert!(!calls.is_empty(), "Should extract calls from C++ code"); @@ -823,7 +869,12 @@ int main() { let mut index = Index::new(); let extractor = Extractor::new("cpp").unwrap(); - index_file(&mut index, &extractor, &dir.path().join("main.cpp"), main_cpp); + index_file( + &mut index, + &extractor, + &dir.path().join("main.cpp"), + main_cpp, + ); let calls = collect_calls(&index); let process_call = calls.iter().find(|c| c.callee == "process"); @@ -847,9 +898,12 @@ mod lsp_availability { #[test] fn test_check_lsp_availability_returns_results() { let availability = check_lsp_availability(); - - assert!(!availability.is_empty(), "Should return availability for at least one language"); - + + assert!( + !availability.is_empty(), + "Should return availability for at least one language" + ); + for (lang, info) in &availability { println!( " {}: available={}, location={:?}, can_install={}, method={:?}", @@ -861,14 +915,17 @@ mod lsp_availability { #[test] fn test_rust_analyzer_detection() { let availability = check_lsp_availability(); - + if let Some(info) = availability.get("rust") { println!( "rust-analyzer: available={}, location={:?}, can_install={}", info.available, info.location, info.can_auto_install ); if info.available { - assert!(info.location.is_some(), "If available, should have location"); + assert!( + info.location.is_some(), + "If available, should have location" + ); } } } @@ -876,7 +933,7 @@ mod lsp_availability { #[test] fn test_npm_packages_can_be_installed() { let availability = check_lsp_availability(); - + if let Some(info) = availability.get("typescript") { println!( "typescript-language-server: available={}, can_install={}, method={:?}", @@ -901,7 +958,7 @@ mod lsp_availability { #[test] fn test_go_package_can_be_installed() { let availability = check_lsp_availability(); - + if let Some(info) = availability.get("go") { println!( "gopls: available={}, can_install={}, method={:?}", From 718fcf60f767b6ce4729855447713c587c9f7146 Mon Sep 17 00:00:00 2001 From: ro Date: Mon, 29 Dec 2025 18:00:50 -0800 Subject: [PATCH 32/35] chore: remove dead code and fix warnings --- src/code/graph.rs | 70 ------------------------------------- src/code/lsp.rs | 88 +++++++++++++++++++++++++++++++---------------- src/main.rs | 21 +++++++---- 3 files changed, 73 insertions(+), 106 deletions(-) diff --git a/src/code/graph.rs b/src/code/graph.rs index 3c5757d..b6ce631 100644 --- a/src/code/graph.rs +++ b/src/code/graph.rs @@ -1,10 +1,7 @@ use std::collections::{HashMap, HashSet, VecDeque}; use std::path::Path; -use indicatif::{ProgressBar, ProgressStyle}; - use super::index::{Definition, Index}; -use super::lsp::LspResolver; use super::resolve::Resolver; pub type NodeId = usize; @@ -83,73 +80,6 @@ impl CallGraph { graph } - pub fn build_with_lsp(index: &Index, root: &Path) -> Self { - let heuristic_resolver = Resolver::with_strict(index, false); - let mut graph = CallGraph::new(); - - for def in index.definitions() { - graph.add_definition(def.clone()); - } - - let calls: Vec<_> = index.calls().collect(); - let total = calls.len(); - - if total == 0 { - return graph; - } - - let pb = ProgressBar::new(total as u64); - pb.set_style( - ProgressStyle::default_bar() - .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} {msg}") - .expect("valid template") - .progress_chars("#>-"), - ); - pb.set_message("initializing LSP..."); - - let mut lsp_resolver = LspResolver::with_progress(root, pb.clone()); - - pb.set_message("resolving calls"); - - for call in &calls { - pb.inc(1); - - let caller_id = call - .caller - .as_ref() - .and_then(|name| graph.find_node_by_file_and_name(&call.file, name)); - - let Some(caller_id) = caller_id else { - continue; - }; - - let callee_def = lsp_resolver.resolve_call(call, index).or_else(|| { - heuristic_resolver.resolve(&call.callee, call.qualifier.as_deref(), &call.file) - }); - - let callee_id = if let Some(def) = callee_def { - graph - .find_node_by_file_and_name(&def.file, &def.name) - .unwrap_or_else(|| graph.add_definition(def)) - } else { - continue; - }; - - graph.add_edge(caller_id, callee_id); - } - - pb.finish_and_clear(); - graph - } - - pub fn build_precise(index: &Index, root: &Path, strict: bool, precise: bool) -> Self { - if precise { - Self::build_with_lsp(index, root) - } else { - Self::build_with_options(index, strict) - } - } - fn add_definition(&mut self, definition: Definition) -> NodeId { let file_key = definition.file.to_string_lossy().to_string(); let composite_key = (file_key, definition.name.clone()); diff --git a/src/code/lsp.rs b/src/code/lsp.rs index 93acfe0..17e31e2 100644 --- a/src/code/lsp.rs +++ b/src/code/lsp.rs @@ -98,6 +98,7 @@ fn detect_zig_version(root: &Path) -> Option { detect_zig_version_from_zon(root) } +#[allow(clippy::literal_string_with_formatting_args)] fn download_and_extract(lsp: &LspConfig, root: &Path) -> Result { let Some(ref url_template) = lsp.url_template else { bail!("no download URL configured for {}", lsp.binary); @@ -388,6 +389,7 @@ struct LspClient { request_id: AtomicI32, root_uri: Uri, opened_files: HashMap, + is_ready: bool, } #[derive(Debug, Serialize, Deserialize)] @@ -429,6 +431,7 @@ impl LspClient { request_id: AtomicI32::new(1), root_uri, opened_files: HashMap::new(), + is_ready: false, }) } @@ -582,7 +585,6 @@ impl LspClient { }; let params = InitializeParams { - root_uri: Some(self.root_uri.clone()), capabilities, workspace_folders: Some(vec![WorkspaceFolder { uri: self.root_uri.clone(), @@ -706,8 +708,8 @@ impl Drop for LspClient { } } -#[derive(Debug, Default)] -pub struct LspStats { +#[derive(Debug, Default, Clone)] +pub struct LspServerStats { pub resolved: usize, pub no_definition: usize, pub external: usize, @@ -715,6 +717,36 @@ pub struct LspStats { pub no_match: usize, } +#[derive(Debug, Default)] +pub struct LspStats { + pub by_server: HashMap, +} + +impl LspStats { + pub fn total_resolved(&self) -> usize { + self.by_server.values().map(|s| s.resolved).sum() + } +} + +impl std::fmt::Display for LspStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut servers: Vec<_> = self.by_server.iter().collect(); + servers.sort_by_key(|(name, _)| name.as_str()); + + let parts: Vec = servers + .iter() + .map(|(name, stats)| { + format!( + "{}: {} resolved, {} external, {} no-def", + name, stats.resolved, stats.external, stats.no_definition + ) + }) + .collect(); + + write!(f, "{}", parts.join(" | ")) + } +} + pub struct LspResolver { clients: HashMap, root: PathBuf, @@ -752,14 +784,6 @@ impl LspResolver { self.progress = pb; } - fn find_sample_file(&self, ext: &str) -> Option { - let pattern = format!("**/*.{}", ext); - glob::glob(&self.root.join(&pattern).to_string_lossy()) - .ok()? - .filter_map(|p| p.ok()) - .find(|p| !p.to_string_lossy().contains("/target/")) - } - fn get_or_create_client(&mut self, ext: &str) -> Result<&mut LspClient> { let registry = Registry::global(); let lang_entry = registry @@ -781,19 +805,6 @@ impl LspResolver { let mut client = LspClient::new(lsp_config, &self.root)?; client.initialize()?; - let sample_file = self.find_sample_file(&ext); - if let Some(ref sample) = sample_file { - if let Ok(content) = std::fs::read_to_string(sample) { - let lang_id = Self::language_id_for_ext(&ext); - let _ = client.open_file(sample, &content, lang_id); - let _ = client.wait_for_ready(sample, 60, self.progress.as_ref()); - } - } - - if let Some(ref pb) = self.progress { - pb.set_message(format!("{} ready", lsp_config.binary)); - } - self.clients.insert(key.clone(), client); } @@ -829,8 +840,22 @@ impl LspResolver { } } + fn server_name_for_ext(&self, ext: &str) -> Option { + let registry = Registry::global(); + let lang_entry = registry.get_by_extension(ext)?; + lang_entry.lsp.as_ref().map(|l| l.binary.clone()) + } + + fn get_server_stats(&mut self, server: &str) -> &mut LspServerStats { + self.stats + .by_server + .entry(server.to_string()) + .or_default() + } + pub fn resolve_call_full(&mut self, call: &Call, index: &Index) -> Option { let ext = call.file.extension().and_then(|e| e.to_str())?.to_string(); + let server_name = self.server_name_for_ext(&ext)?; let abs_path = self.root.join(&call.file); let language_id = Self::language_id_for_ext(&ext); let callee = call.callee.clone(); @@ -849,6 +874,11 @@ impl LspResolver { let client = self.get_or_create_client(&ext).ok()?; client.open_file(&abs_path, &content, language_id).ok()?; + if !client.is_ready { + client.wait_for_ready(&abs_path, 60, None).ok()?; + client.is_ready = true; + } + let signature = client .hover(&abs_path, start_line_idx as u32, col) .ok() @@ -872,7 +902,7 @@ impl LspResolver { let location = match location { Some(loc) => loc, None => { - self.stats.no_definition += 1; + self.get_server_stats(&server_name).no_definition += 1; return None; } }; @@ -883,7 +913,7 @@ impl LspResolver { let rel_path = match def_path.strip_prefix(&root) { Ok(p) => p.to_path_buf(), Err(_) => { - self.stats.external += 1; + self.get_server_stats(&server_name).external += 1; return None; } }; @@ -894,7 +924,7 @@ impl LspResolver { let record = match index.get(&rel_path) { Some(r) => r, None => { - self.stats.not_indexed += 1; + self.get_server_stats(&server_name).not_indexed += 1; return None; } }; @@ -906,12 +936,12 @@ impl LspResolver { { Some(d) => d, None => { - self.stats.no_match += 1; + self.get_server_stats(&server_name).no_match += 1; return None; } }; - self.stats.resolved += 1; + self.get_server_stats(&server_name).resolved += 1; Some(ResolvedCall { target_file: rel_path, target_name: def.name.clone(), diff --git a/src/main.rs b/src/main.rs index 99e541c..db8f390 100644 --- a/src/main.rs +++ b/src/main.rs @@ -273,7 +273,11 @@ fn handle_code_command(args: &CodeArgs) -> Result<()> { let needs_update = index_directory(&root, &mut index)?; let mut needs_save = needs_update > 0; - if args.precise { + // Only run LSP resolution if: + // 1. --precise is requested + // 2. Either files were updated OR no calls have been resolved yet (first --precise run) + let has_any_resolved = index.calls().any(|c| c.resolved.is_some()); + if args.precise && (needs_update > 0 || !has_any_resolved) { let resolved = resolve_calls_with_lsp(&root, &mut index)?; if resolved > 0 { needs_save = true; @@ -284,7 +288,9 @@ fn handle_code_command(args: &CodeArgs) -> Result<()> { save_index(&index, &root)?; } - let graph = CallGraph::build_precise(&index, &root, args.strict, args.precise); + // After LSP resolution, use build_with_options which checks call.resolved first + // This avoids creating another LSP resolver and re-trying failed calls + let graph = CallGraph::build_with_options(&index, args.strict); let node_id = if let Some(ref file) = target.file { let file_path = root.join(file); @@ -344,7 +350,9 @@ fn handle_index_command(cmd: &IndexCommand) -> Result<()> { let updated = index_directory(&root, &mut index)?; - if *precise { + // Only run LSP resolution if files were updated or no calls resolved yet + let has_any_resolved = index.calls().any(|c| c.resolved.is_some()); + if *precise && (updated > 0 || !has_any_resolved) { let resolved = resolve_calls_with_lsp(&root, &mut index)?; if resolved > 0 { eprintln!("Resolved {} calls with LSP", resolved); @@ -577,10 +585,9 @@ fn resolve_calls_with_lsp(root: &Path, index: &mut Index) -> Result { pb.finish_and_clear(); let stats = lsp_resolver.stats(); - eprintln!( - "LSP stats: {} resolved, {} external, {} no definition, {} not indexed, {} no match", - stats.resolved, stats.external, stats.no_definition, stats.not_indexed, stats.no_match - ); + if !stats.by_server.is_empty() { + eprintln!("LSP: {}", stats); + } Ok(resolved) } From 24c8698bdb75725b1f1b2be0592c2efd5d75d812 Mon Sep 17 00:00:00 2001 From: ro Date: Tue, 30 Dec 2025 19:03:35 -0800 Subject: [PATCH 33/35] feat: add LSP progress indicators, fix zig/clangd, add tracing --- Cargo.lock | 96 +++++++++++++++++++++++++++++++++-- Cargo.toml | 2 + registry.toml | 20 ++++---- src/analyzer.rs | 1 + src/cli.rs | 4 ++ src/code/lsp.rs | 131 ++++++++++++++++++++++++++++++++++++++---------- src/main.rs | 23 ++++++++- 7 files changed, 233 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d793cc2..2542a80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1172,6 +1172,8 @@ dependencies = [ "tiktoken-rs", "tokenizers", "toml", + "tracing", + "tracing-subscriber", "tree-sitter", "url", "walkdir", @@ -2018,6 +2020,15 @@ dependencies = [ "tendril", ] +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "md5" version = "0.7.0" @@ -2164,6 +2175,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.60.2", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -2890,7 +2910,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -3126,6 +3146,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -3445,6 +3474,15 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "tiff" version = "0.9.1" @@ -3627,21 +3665,63 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "tracing-core" -version = "0.1.33" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" dependencies = [ + "matchers", + "nu-ansi-term", "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -3811,6 +3891,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index dedbd78..f609b9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,8 @@ flate2 = "1.0" zip = "2.2" tar = "0.4" xz2 = "0.1" +tracing = "0.1.44" +tracing-subscriber = { version = "0.3.22", features = ["env-filter"] } [build-dependencies] serde = { version = "1.0.217", features = ["derive"] } diff --git a/registry.toml b/registry.toml index a9f40a6..97881eb 100644 --- a/registry.toml +++ b/registry.toml @@ -68,12 +68,12 @@ definition_query = """ """ call_query = """ (call_expression - function: [ - (identifier) @name - (field_expression - operand: (_) @qualifier - member: (identifier) @name) - ]) @reference.call + function: (identifier) @name) @reference.call + +(call_expression + function: (field_expression + object: (_) @qualifier + member: (identifier) @name)) @reference.call """ import_query = """ (variable_declaration @@ -139,8 +139,8 @@ import_query = """ [language.lsp] binary = "clangd" args = [] -version = "19.1.0" -url_template = "https://github.com/clangd/clangd/releases/download/{version}/clangd-{target}.zip" +version = "21.1.0" +url_template = "https://github.com/clangd/clangd/releases/download/{version}/clangd-{target}-{version}.zip" archive = "zip" binary_path = "clangd_{version}/bin/clangd" @@ -237,8 +237,8 @@ import_query = """ [language.lsp] binary = "clangd" args = [] -version = "19.1.0" -url_template = "https://github.com/clangd/clangd/releases/download/{version}/clangd-{target}.zip" +version = "21.1.0" +url_template = "https://github.com/clangd/clangd/releases/download/{version}/clangd-{target}-{version}.zip" archive = "zip" binary_path = "clangd_{version}/bin/clangd" diff --git a/src/analyzer.rs b/src/analyzer.rs index ba79d78..8e64f0f 100644 --- a/src/analyzer.rs +++ b/src/analyzer.rs @@ -371,6 +371,7 @@ mod tests { traverse_links: false, link_depth: None, xml: false, + verbose: 0, } } diff --git a/src/cli.rs b/src/cli.rs index 6b11ae4..ab89121 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -212,6 +212,10 @@ pub struct Cli { #[arg(long)] pub no_tokens: bool, + /// Verbosity level (-v, -vv, -vvv) + #[arg(short, long, action = clap::ArgAction::Count)] + pub verbose: u8, + #[arg(long, value_enum)] pub tokenizer: Option, diff --git a/src/code/lsp.rs b/src/code/lsp.rs index 17e31e2..0c097bc 100644 --- a/src/code/lsp.rs +++ b/src/code/lsp.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fs::{self, File}; use std::io::{BufRead, BufReader, BufWriter, Read, Write}; use std::path::{Path, PathBuf}; @@ -6,6 +6,7 @@ use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; use std::sync::atomic::{AtomicI32, Ordering}; use anyhow::{bail, Context, Result}; +use tracing::{debug, trace, warn}; use flate2::read::GzDecoder; use indicatif::{ProgressBar, ProgressStyle}; use lsp_types::{ @@ -85,17 +86,45 @@ fn detect_zig_version_from_zon(root: &Path) -> Option { } fn detect_zig_version(root: &Path) -> Option { - if let Ok(output) = Command::new("zig").arg("version").output() { + let zig_version = if let Ok(output) = Command::new("zig").arg("version").output() { if output.status.success() { let version_str = String::from_utf8_lossy(&output.stdout); let version = version_str.trim(); - if let Some(base) = version.split('-').next() { - return Some(base.to_string()); + version.split('-').next().map(|s| s.to_string()) + } else { + None + } + } else { + None + }; + + let zig_version = zig_version.or_else(|| detect_zig_version_from_zon(root))?; + + // zls releases may lag behind zig - try to find matching major.minor + // e.g., zig 0.15.2 -> try 0.15.2, 0.15.1, 0.15.0 + let parts: Vec<&str> = zig_version.split('.').collect(); + if parts.len() >= 2 { + let major_minor = format!("{}.{}", parts[0], parts[1]); + // Try decreasing patch versions + for patch in (0..=10).rev() { + let version = format!("{}.{}", major_minor, patch); + let url = format!( + "https://github.com/zigtools/zls/releases/download/{}/zls-x86_64-linux.tar.xz", + version + ); + if let Ok(resp) = reqwest::blocking::Client::new() + .head(&url) + .send() + { + if resp.status().is_success() || resp.status().as_u16() == 302 { + debug!(zig_version = %zig_version, zls_version = %version, "found matching zls version"); + return Some(version); + } } } } - detect_zig_version_from_zon(root) + Some(zig_version) } #[allow(clippy::literal_string_with_formatting_args)] @@ -249,14 +278,14 @@ fn install_npm_package(lsp: &LspConfig) -> Result { bail!("neither bun nor npm found. Install one of them or install the LSP manually"); }; - let pkg_dir = lsp_dir().join(&lsp.binary); + let pkg_dir = lsp_dir().join(format!("{}-pkg", &lsp.binary)); fs::create_dir_all(&pkg_dir)?; - eprintln!("Installing {} via {} (local)...", package, pkg_manager); - let init_status = Command::new(&pkg_manager_path) .args(["init", "--yes"]) .current_dir(&pkg_dir) + .stdout(Stdio::null()) + .stderr(Stdio::null()) .status() .with_context(|| format!("failed to run {} init", pkg_manager))?; @@ -265,17 +294,19 @@ fn install_npm_package(lsp: &LspConfig) -> Result { } let packages: Vec<&str> = package.split_whitespace().collect(); - let mut install_args = vec!["install"]; + let mut install_args = vec!["add"]; install_args.extend(packages.iter()); let install_status = Command::new(&pkg_manager_path) .args(&install_args) .current_dir(&pkg_dir) + .stdout(Stdio::null()) + .stderr(Stdio::null()) .status() - .with_context(|| format!("failed to run {} install", pkg_manager))?; + .with_context(|| format!("failed to run {} add", pkg_manager))?; if !install_status.success() { - bail!("{} install failed for {}", pkg_manager, package); + bail!("{} add failed for {}", pkg_manager, package); } let bin_path = pkg_dir.join("node_modules").join(".bin").join(&lsp.binary); @@ -290,7 +321,6 @@ fn install_npm_package(lsp: &LspConfig) -> Result { let wrapper_path = lsp_binary_path(lsp); create_wrapper_script(&wrapper_path, &bin_path)?; - eprintln!("Installed {} to {}", lsp.binary, wrapper_path.display()); Ok(wrapper_path) } @@ -328,11 +358,11 @@ fn install_go_package(lsp: &LspConfig) -> Result { let install_dir = lsp_dir(); fs::create_dir_all(&install_dir)?; - eprintln!("Installing {} via go install...", package); - let status = Command::new(&go_path) .args(["install", package]) .env("GOBIN", &install_dir) + .stdout(Stdio::null()) + .stderr(Stdio::null()) .status() .context("failed to run go install")?; @@ -342,7 +372,6 @@ fn install_go_package(lsp: &LspConfig) -> Result { let binary_path = install_dir.join(&lsp.binary); if binary_path.exists() { - eprintln!("Installed {} to {}", lsp.binary, binary_path.display()); return Ok(binary_path); } @@ -356,13 +385,17 @@ fn install_go_package(lsp: &LspConfig) -> Result { fn find_lsp_binary(lsp: &LspConfig, root: &Path) -> Result { let local_path = lsp_binary_path(lsp); if local_path.exists() { + debug!(binary = %lsp.binary, path = %local_path.display(), "using cached LSP binary"); return Ok(local_path); } if let Ok(system_path) = which::which(&lsp.binary) { + debug!(binary = %lsp.binary, path = %system_path.display(), "using system LSP binary"); return Ok(system_path); } + debug!(binary = %lsp.binary, "LSP not found, attempting install"); + if lsp.url_template.is_some() { return download_and_extract(lsp, root); } @@ -518,17 +551,21 @@ impl LspClient { path: &Path, max_attempts: u32, pb: Option<&ProgressBar>, + server_name: Option<&str>, ) -> Result { use std::thread; use std::time::Duration; let uri = path_to_uri(path)?; + let name = server_name.unwrap_or("LSP"); + + debug!(server = %name, "waiting for LSP to be ready"); if let Some(pb) = pb { - pb.set_message("waiting for syntax analysis..."); + pb.set_message(format!("{}: waiting for indexing...", name)); } - for _ in 0..10 { + for i in 0..10 { let params = lsp_types::DocumentSymbolParams { text_document: TextDocumentIdentifier { uri: uri.clone() }, work_done_progress_params: Default::default(), @@ -536,13 +573,16 @@ impl LspClient { }; match self.send_request("textDocument/documentSymbol", serde_json::to_value(params)?) { - Ok(Value::Array(arr)) if !arr.is_empty() => break, + Ok(Value::Array(arr)) if !arr.is_empty() => { + trace!(server = %name, attempt = i, "syntax analysis ready"); + break; + } _ => thread::sleep(Duration::from_millis(200)), } } if let Some(pb) = pb { - pb.set_message("waiting for semantic analysis..."); + pb.set_message(format!("{}: ready", name)); } for attempt in 0..max_attempts { @@ -552,7 +592,10 @@ impl LspClient { }); match self.send_request("textDocument/hover", hover_params) { - Ok(result) if !result.is_null() => return Ok(true), + Ok(result) if !result.is_null() => { + debug!(server = %name, attempts = attempt + 1, "LSP ready"); + return Ok(true); + } _ => {} } @@ -561,6 +604,7 @@ impl LspClient { } } + warn!(server = %name, "LSP did not become ready after {} attempts", max_attempts); Ok(false) } @@ -736,19 +780,21 @@ impl std::fmt::Display for LspStats { let parts: Vec = servers .iter() .map(|(name, stats)| { + let total = stats.resolved + stats.external + stats.no_definition + stats.not_indexed + stats.no_match; format!( - "{}: {} resolved, {} external, {} no-def", - name, stats.resolved, stats.external, stats.no_definition + "{}: {}/{} resolved ({} external, {} no-def, {} not-indexed, {} no-match)", + name, stats.resolved, total, stats.external, stats.no_definition, stats.not_indexed, stats.no_match ) }) .collect(); - write!(f, "{}", parts.join(" | ")) + write!(f, "{}", parts.join("\n ")) } } pub struct LspResolver { clients: HashMap, + failed_servers: HashSet, root: PathBuf, file_cache: HashMap, progress: Option, @@ -759,6 +805,7 @@ impl LspResolver { pub fn new(root: &Path) -> Self { Self { clients: HashMap::new(), + failed_servers: HashSet::new(), root: root.to_path_buf(), file_cache: HashMap::new(), progress: None, @@ -769,6 +816,7 @@ impl LspResolver { pub fn with_progress(root: &Path, pb: ProgressBar) -> Self { Self { clients: HashMap::new(), + failed_servers: HashSet::new(), root: root.to_path_buf(), file_cache: HashMap::new(), progress: Some(pb), @@ -797,13 +845,30 @@ impl LspResolver { let key = lsp_config.binary.clone(); + if self.failed_servers.contains(&key) { + bail!("{} previously failed to initialize", key); + } + if !self.clients.contains_key(&key) { if let Some(ref pb) = self.progress { pb.set_message(format!("starting {}...", lsp_config.binary)); } - let mut client = LspClient::new(lsp_config, &self.root)?; - client.initialize()?; + let client = match LspClient::new(lsp_config, &self.root) { + Ok(mut c) => { + if let Err(e) = c.initialize() { + self.failed_servers.insert(key.clone()); + warn!(server = %lsp_config.binary, error = ?e, "LSP initialization failed"); + return Err(e); + } + c + } + Err(e) => { + self.failed_servers.insert(key.clone()); + warn!(server = %lsp_config.binary, error = ?e, "LSP server failed to start"); + return Err(e); + } + }; self.clients.insert(key.clone(), client); } @@ -871,12 +936,22 @@ impl LspResolver { let line_content = lines[start_line_idx]; let col = line_content.find(&callee).unwrap_or(0) as u32; + let pb = self.progress.clone(); let client = self.get_or_create_client(&ext).ok()?; client.open_file(&abs_path, &content, language_id).ok()?; if !client.is_ready { - client.wait_for_ready(&abs_path, 60, None).ok()?; + let ready = client + .wait_for_ready(&abs_path, 60, pb.as_ref(), Some(&server_name)) + .unwrap_or(false); client.is_ready = true; + if let Some(ref pb) = pb { + if ready { + pb.set_message("resolving..."); + } else { + pb.set_message(format!("{}: indexing (may be slow)", server_name)); + } + } } let signature = client @@ -902,6 +977,7 @@ impl LspResolver { let location = match location { Some(loc) => loc, None => { + trace!(callee = %callee, file = %call.file.display(), "no definition found"); self.get_server_stats(&server_name).no_definition += 1; return None; } @@ -913,6 +989,7 @@ impl LspResolver { let rel_path = match def_path.strip_prefix(&root) { Ok(p) => p.to_path_buf(), Err(_) => { + trace!(callee = %callee, path = %def_path.display(), "definition is external"); self.get_server_stats(&server_name).external += 1; return None; } @@ -1182,7 +1259,7 @@ mod integration_tests { .expect("failed to open file"); client - .wait_for_ready(&test_file, 30, None) + .wait_for_ready(&test_file, 30, None, None) .expect("wait_for_ready failed"); // Line 61: ".filter(|path| is_url_or_git(path))" diff --git a/src/main.rs b/src/main.rs index db8f390..020341b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,8 @@ use std::path::{Path, PathBuf}; use anyhow::{bail, Context, Result}; use indicatif::{ProgressBar, ProgressStyle}; use rayon::prelude::*; +use tracing::debug; +use tracing_subscriber::EnvFilter; use crate::analyzer::process_directory; use crate::cli::{Cli, CodeArgs, Commands, FunctionTarget, IndexCommand}; @@ -40,9 +42,17 @@ fn has_custom_options(args: &Cli) -> bool { } fn main() -> Result<()> { + tracing_subscriber::fmt() + .with_env_filter(EnvFilter::from_default_env()) + .with_writer(std::io::stderr) + .without_time() + .init(); + let mut config = load_config()?; let mut args = Cli::parse_with_config(&config)?; + debug!("config loaded, args parsed"); + if let Some(ref cmd) = args.command { return match cmd { Commands::Code(code_args) => handle_code_command(code_args), @@ -486,7 +496,13 @@ fn index_directory(root: &Path, index: &mut Index) -> Result { let records: Vec = chunk .par_iter() .filter_map(|(path, rel_path, ext, mtime, size)| { - let extractor = Extractor::from_extension(ext).ok()?; + let extractor = match Extractor::from_extension(ext) { + Ok(e) => e, + Err(e) => { + debug!(ext = %ext, error = ?e, "no extractor for extension"); + return None; + } + }; let source = fs::read(path).ok()?; let mut parser = tree_sitter::Parser::new(); @@ -539,6 +555,7 @@ fn resolve_calls_with_lsp(root: &Path, index: &mut Index) -> Result { .progress_chars("#>-"), ); pb.set_message("resolving calls with LSP..."); + pb.enable_steady_tick(std::time::Duration::from_millis(100)); let mut lsp_resolver = LspResolver::with_progress(root, pb.clone()); let mut resolved = 0; @@ -585,7 +602,9 @@ fn resolve_calls_with_lsp(root: &Path, index: &mut Index) -> Result { pb.finish_and_clear(); let stats = lsp_resolver.stats(); - if !stats.by_server.is_empty() { + if stats.by_server.is_empty() { + eprintln!("LSP: no servers responded (check if LSP binaries are working)"); + } else { eprintln!("LSP: {}", stats); } From d708ecdde6433ffd715793fe1dae862492f20e4e Mon Sep 17 00:00:00 2001 From: ro Date: Wed, 31 Dec 2025 00:53:45 -0800 Subject: [PATCH 34/35] readme --- readme.md | 219 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 137 insertions(+), 82 deletions(-) diff --git a/readme.md b/readme.md index bae4add..765d73f 100644 --- a/readme.md +++ b/readme.md @@ -1,22 +1,24 @@ # Glimpse -A blazingly fast tool for peeking at codebases. Perfect for loading your codebase into an LLM's context, with built-in token counting support. +A blazingly fast tool for peeking at codebases. Perfect for loading your codebase into an LLM's context, with built-in token counting and code analysis. ## Features -- 🚀 Fast parallel file processing -- 🌳 Tree-view of codebase structure -- 📝 Source code content viewing -- 🔢 Token counting with multiple backends -- ⚙️ Configurable defaults -- 📋 Clipboard support -- 🎨 Customizable file type detection -- 🥷 Respects .gitignore automatically -- 📁 Local per-repo configuration with `.glimpse` file -- 🔗 Web content processing with Markdown conversion -- 📦 Git repository support -- 🌐 URL traversal with configurable depth -- 🏷️ XML output format for better LLM compatibility +- Fast parallel file processing +- Tree-view of codebase structure +- Source code content viewing +- Token counting with multiple backends (tiktoken, HuggingFace) +- Call graph generation for code analysis +- Configurable defaults with global and per-repo config +- Clipboard support +- Customizable file type detection +- Respects .gitignore automatically +- Web content processing with Markdown conversion +- Git repository support (GitHub, GitLab, Bitbucket, Azure DevOps) +- URL traversal with configurable depth +- XML output format for better LLM compatibility +- Interactive file picker +- PDF export ## Installation @@ -53,7 +55,8 @@ paru -S glimpse ## Usage -Basic usage: +### Basic Usage + ```bash # Process a local directory glimpse /path/to/project @@ -73,7 +76,8 @@ glimpse https://example.com/docs --traverse-links --link-depth 2 On first use in a repository, Glimpse will save a `.glimpse` configuration file locally with your specified options. This file can be referenced on subsequent runs, or overridden by passing options again. -Common options: +### Common Options + ```bash # Show hidden files glimpse -H /path/to/project @@ -90,9 +94,12 @@ glimpse -f output.txt /path/to/project # Print output to stdout instead of copying to clipboard glimpse -p /path/to/project -# Include specific file types +# Include specific file types (additive to source files) glimpse -i "*.rs,*.go" /path/to/project +# Only include specific patterns (replaces default source detection) +glimpse --only-include "*.rs,*.go" /path/to/project + # Exclude patterns or files glimpse -e "target/*,dist/*" /path/to/project @@ -111,20 +118,80 @@ glimpse https://github.com/username/repo.git --pdf output.pdf # Open interactive file picker glimpse --interactive /path/to/project +# Output in XML format for better LLM compatibility +glimpse -x /path/to/project + # Print the config file path and exit glimpse --config_path # Initialize a .glimpse config file in the current directory glimpse --config +``` -# Output in XML format for better LLM compatibility -glimpse -x /path/to/project +## Code Analysis + +Glimpse includes powerful code analysis features for understanding call relationships in your codebase. + +### Call Graph Generation + +Generate call graphs to see what functions a target function calls (callees) or what calls it (callers): + +```bash +# Generate call graph for a function (searches all files) +glimpse code :function_name + +# Specify file and function +glimpse code src/main.rs:main + +# Include callers (reverse call graph) +glimpse code src/main.rs:main --callers + +# Limit traversal depth +glimpse code :process --depth 3 + +# Output to file +glimpse code :build -f callgraph.md + +# Strict mode: only resolve via imports (no global name matching) +glimpse code :main --strict + +# Precise mode: use LSP for type-aware resolution (slower but accurate) +glimpse code :main --precise + +# Specify project root +glimpse code :main --root /path/to/project +``` + +### Code Index Management + +Glimpse maintains an index for faster code analysis. Manage it with: + +```bash +# Build or update the index +glimpse index build + +# Build with LSP for precise resolution +glimpse index build --precise + +# Force rebuild (ignore existing index) +glimpse index build --force + +# Clear the index +glimpse index clear + +# Show index status and stats +glimpse index status + +# Specify project path +glimpse index build /path/to/project ``` -## CLI Options +## CLI Reference ``` Usage: glimpse [OPTIONS] [PATH] + glimpse code [OPTIONS] + glimpse index Arguments: [PATH] Files, directories, or URLs to analyze [default: .] @@ -134,6 +201,7 @@ Options: --config Init glimpse config file in current directory --interactive Opens interactive file picker (? for help) -i, --include Additional patterns to include (e.g. "*.rs,*.go") + --only-include Only include these patterns (replaces source detection) -e, --exclude Additional patterns or files to exclude -s, --max-size Maximum file size in bytes --max-depth Maximum directory depth to traverse @@ -151,8 +219,26 @@ Options: --link-depth Maximum depth to traverse links (default: 1) --pdf Save output as PDF -x, --xml Output in XML format for better LLM compatibility + -v, --verbose Verbosity level (-v, -vv, -vvv) -h, --help Print help -V, --version Print version + +Code Subcommand: + glimpse code Generate call graph for a function + Target in file:function or :function format + --root Project root directory [default: .] + --callers Include callers (reverse call graph) + --depth Maximum depth to traverse + -f, --file Output file (default: stdout) + --strict Only resolve calls via imports + --precise Use LSP for type-aware resolution + +Index Subcommand: + glimpse index build [PATH] Build or update the index + --force Force rebuild + --precise Use LSP for precise resolution + glimpse index clear [PATH] Clear the index + glimpse index status [PATH] Show index status and stats ``` ## Configuration @@ -186,12 +272,10 @@ default_excludes = [ ## XML Output Format -Glimpse supports XML output format designed for better compatibility with Large Language Models (LLMs) like Claude, GPT, and others. When using the `-x` or `--xml` flag, the output is structured with clear XML tags that help LLMs better understand the context and structure of your codebase. +Glimpse supports XML output format designed for better compatibility with Large Language Models. When using the `-x` or `--xml` flag, the output is structured with clear XML tags that help LLMs better understand the context and structure of your codebase. ### XML Structure -The XML output wraps all content in a `` tag with the project name: - ```xml @@ -217,77 +301,34 @@ Total size: 45 bytes ### Benefits for LLM Usage -- **Clear Context Boundaries**: The `` wrapper helps LLMs understand where your codebase begins and ends -- **Structured Information**: Separate sections for directory tree, file contents, and summary -- **Proper Escaping**: XML-safe content that won't confuse parsers -- **Project Identification**: Automatic project name detection for better context - -### Usage Examples - -```bash -# Basic XML output -glimpse -x /path/to/project - -# XML output with file save -glimpse -x -f project.xml /path/to/project - -# XML output to stdout -glimpse -x --print /path/to/project - -# XML output with specific includes -glimpse -x -i "*.rs,*.py" /path/to/project -``` +- Clear context boundaries with the `` wrapper +- Structured sections for directory tree, file contents, and summary +- Proper XML escaping +- Automatic project name detection ## Token Counting Glimpse supports two tokenizer backends: -1. Tiktoken (Default): OpenAI's tokenizer implementation, perfect for accurately estimating tokens for GPT models. +1. **Tiktoken** (Default): OpenAI's tokenizer implementation, perfect for accurately estimating tokens for GPT models. -2. HuggingFace Tokenizers: Supports any model from the HuggingFace hub or local tokenizer files, great for custom models or other ML frameworks. +2. **HuggingFace Tokenizers**: Supports any model from the HuggingFace hub or local tokenizer files, great for custom models or other ML frameworks. The token count appears in both file content views and the final summary, helping you estimate context window usage for large language models. -Example token count output: -``` -File: src/main.rs -Tokens: 245 -================================================== -// File contents here... - -Summary: -Total files: 10 -Total size: 15360 bytes -Total tokens: 2456 -``` - -## Troubleshooting +## Git Repository Support -1. **File too large**: Adjust `max_size` in config -2. **Missing files**: Check `hidden` flag and exclude patterns -3. **Performance issues**: Try adjusting thread count with `-t` -4. **Tokenizer errors**: - - For HuggingFace models, ensure you have internet connection for downloading - - For local tokenizer files, verify the file path and format - - Try using the default tiktoken backend if issues persist - -## License - -MIT - -## Features in Detail - -### Git Repository Support -Glimpse can directly process Git repositories from popular hosting services: -- GitHub repositories -- GitLab repositories -- Bitbucket repositories -- Azure DevOps repositories +Glimpse can directly process Git repositories from: +- GitHub +- GitLab +- Bitbucket +- Azure DevOps - Any Git repository URL (ending with .git) The repository is cloned to a temporary directory, processed, and automatically cleaned up. -### Web Content Processing +## Web Content Processing + Glimpse can process web pages and convert them to Markdown: - Preserves heading structure - Converts links (both relative and absolute) @@ -297,10 +338,24 @@ Glimpse can process web pages and convert them to Markdown: With link traversal enabled, Glimpse can also process linked pages up to a specified depth, making it perfect for documentation sites and wikis. -### PDF Output +## PDF Output + Any processed content (local files, Git repositories, or web pages) can be saved as a PDF with: - Preserved formatting - Syntax highlighting - Table of contents - Page numbers -- Custom headers and footers + +## Troubleshooting + +1. **File too large**: Adjust `max_size` in config +2. **Missing files**: Check `hidden` flag and exclude patterns +3. **Performance issues**: Try adjusting thread count with `-t` +4. **Tokenizer errors**: + - For HuggingFace models, ensure you have internet connection for downloading + - For local tokenizer files, verify the file path and format + - Try using the default tiktoken backend if issues persist + +## License + +MIT From e037c981a79bae236b2ae6fcafd24987815e50eb Mon Sep 17 00:00:00 2001 From: ro Date: Wed, 31 Dec 2025 01:02:56 -0800 Subject: [PATCH 35/35] feat: add --hidden and --no-ignore flags to code/index subcommands --- src/cli.rs | 16 ++++++++++++++++ src/main.rs | 12 ++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index ab89121..22ae98a 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -88,6 +88,14 @@ pub struct CodeArgs { /// Precise mode: use LSP for type-aware resolution (slower but more accurate) #[arg(long)] pub precise: bool, + + /// Include hidden files and directories + #[arg(short = 'H', long)] + pub hidden: bool, + + /// Don't respect ignore files (.gitignore, .ignore, etc.) + #[arg(long)] + pub no_ignore: bool, } #[derive(Parser, Debug, Clone)] @@ -111,6 +119,14 @@ pub enum IndexCommand { /// Use LSP for precise call resolution (slower but more accurate) #[arg(long)] precise: bool, + + /// Include hidden files and directories + #[arg(short = 'H', long)] + hidden: bool, + + /// Don't respect ignore files (.gitignore, .ignore, etc.) + #[arg(long)] + no_ignore: bool, }, /// Clear the index for a project diff --git a/src/main.rs b/src/main.rs index 020341b..20d2925 100644 --- a/src/main.rs +++ b/src/main.rs @@ -280,7 +280,7 @@ fn handle_code_command(args: &CodeArgs) -> Result<()> { let target = FunctionTarget::parse(&args.target)?; let mut index = load_index(&root)?.unwrap_or_else(Index::new); - let needs_update = index_directory(&root, &mut index)?; + let needs_update = index_directory(&root, &mut index, args.hidden, args.no_ignore)?; let mut needs_save = needs_update > 0; // Only run LSP resolution if: @@ -349,6 +349,8 @@ fn handle_index_command(cmd: &IndexCommand) -> Result<()> { path, force, precise, + hidden, + no_ignore, } => { let root = path.canonicalize().unwrap_or_else(|_| path.clone()); @@ -358,7 +360,7 @@ fn handle_index_command(cmd: &IndexCommand) -> Result<()> { load_index(&root)?.unwrap_or_else(Index::new) }; - let updated = index_directory(&root, &mut index)?; + let updated = index_directory(&root, &mut index, *hidden, *no_ignore)?; // Only run LSP resolution if files were updated or no calls resolved yet let has_any_resolved = index.calls().any(|c| c.resolved.is_some()); @@ -421,7 +423,7 @@ fn handle_index_command(cmd: &IndexCommand) -> Result<()> { const INDEX_CHUNK_SIZE: usize = 256; -fn index_directory(root: &Path, index: &mut Index) -> Result { +fn index_directory(root: &Path, index: &mut Index, hidden: bool, no_ignore: bool) -> Result { let pb = ProgressBar::new_spinner(); pb.set_style( ProgressStyle::default_spinner() @@ -432,7 +434,9 @@ fn index_directory(root: &Path, index: &mut Index) -> Result { pb.enable_steady_tick(std::time::Duration::from_millis(100)); let source_files: Vec<_> = ignore::WalkBuilder::new(root) - .hidden(false) + .hidden(!hidden) + .git_ignore(!no_ignore) + .ignore(!no_ignore) .build() .filter_map(|e| e.ok()) .filter(|e| e.file_type().map(|ft| ft.is_file()).unwrap_or(false))