diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore index ea8c4bf..b34c1bd 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ /target +.direnv + +test/ +.todo.md diff --git a/AGENTS.md b/AGENTS.md index caae1e7..b7f00b1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,209 +1,178 @@ -# AGENTS.md - AI Agent Guide for Glimpse +# Glimpse Development Guide -## Project Overview +A blazingly fast tool for peeking at codebases. Perfect for loading your codebase into an LLM's context. -Glimpse is a fast Rust CLI tool for extracting codebase content into LLM-friendly formats. It's designed to help users prepare source code for loading into Large Language Models with built-in token counting, tree visualization, and multiple output formats. +## Task Tracking -**Key capabilities:** -- Fast parallel file processing using Rayon -- Directory tree visualization -- Source code content extraction -- Token counting (tiktoken/HuggingFace backends) -- Git repository cloning and processing -- Web page scraping with Markdown conversion -- Interactive TUI file picker -- XML and PDF output formats -- Per-repository configuration via `.glimpse` files +Check `.todo.md` for current tasks and next steps. Keep it updated: +- Mark items `[x]` when completed +- Add new tasks as they're discovered +- Reference it before asking "what's next?" -## Codebase Structure +## Commits -``` -glimpse/ -├── src/ -│ ├── main.rs # Entry point, CLI arg handling, routing -│ ├── cli.rs # CLI argument definitions using clap -│ ├── config.rs # Global and repo-level configuration -│ ├── analyzer.rs # Core file processing logic -│ ├── source_detection.rs # Source file detection (extensions, shebangs) -│ ├── output.rs # Output formatting (tree, files, XML, PDF) -│ ├── tokenizer.rs # Token counting backends -│ ├── git_processor.rs # Git repository cloning -│ ├── url_processor.rs # Web page fetching and HTML→Markdown -│ └── file_picker.rs # Interactive TUI file selector -├── build.rs # Build script that generates languages.rs from languages.yml -├── languages.yml # Language definitions (extensions, filenames, interpreters) -├── Cargo.toml # Dependencies and package metadata -├── .github/workflows/ -│ ├── test.yml # CI: tests, clippy, formatting -│ └── release.yml # CD: multi-platform builds, publishing -└── test_project/ # Test fixtures +Use `jj` for version control. Always commit after completing a phase: + +```bash +jj commit -m "feat: add glimpse-code crate scaffolding" ``` -## Development Environment +Use conventional commit prefixes: +- `feat` - new feature +- `fix` - bug fix +- `refactor` - restructure without behavior change +- `chore` - maintenance, dependencies, config +- `docs` - documentation only +- `test` - adding or updating tests -Always use the devshell from the flake for all commands: +## Build Commands ```bash -nix develop +cargo build # debug build +cargo build --release # release build +cargo run -- # run with arguments +cargo run -- . # analyze current directory +cargo run -- --help # show help ``` -## Development Commands +## Test Commands ```bash -# Build and run -cargo build -cargo run -- [OPTIONS] [PATH] +cargo test # run all tests +cargo test test_name # run single test by name +cargo test test_name -- --nocapture # run test with stdout +cargo test -- --test-threads=1 # run tests sequentially +``` -# Run tests -cargo test +## Lint & Format -# Check code quality (required to pass CI) -cargo clippy -- -D warnings -cargo fmt -- --check +```bash +cargo fmt # format all code +cargo fmt -- --check # check formatting (CI) +cargo clippy # run linter +cargo clippy -- -D warnings # fail on warnings (CI) +``` -# Format code -cargo fmt +## Project Structure -# Build release -cargo build --release +``` +glimpse/ +├── src/ +│ ├── main.rs # binary entry point +│ ├── lib.rs # library root +│ ├── cli.rs # CLI arg parsing +│ ├── analyzer.rs # directory processing +│ ├── output.rs # output formatting +│ ├── core/ # config, tokenizer, types, source detection +│ ├── fetch/ # git clone, url/html processing +│ ├── tui/ # file picker +│ └── code/ # code analysis (extract, graph, index, resolve) +├── tests/ # integration tests +├── languages.yml # language definitions for source detection +├── registry.toml # tree-sitter grammar registry +└── build.rs # generates language data from languages.yml ``` -## Key Architecture Decisions +## Code Style -### Source File Detection -Detection happens in `source_detection.rs` via `is_source_file()`: -1. Check known filenames (Makefile, Dockerfile, etc.) -2. Check file extensions against `SOURCE_EXTENSIONS` -3. Fall back to shebang parsing for scripts +### No Comments -Extension/filename data is code-generated at build time from `languages.yml` via `build.rs`. +Code should be self-documenting. The only acceptable documentation is: +- Brief `///` docstrings on public API functions that aren't obvious +- `//!` module-level docs when necessary -### Include/Exclude Pattern Behavior -- `--include` (or `-i`): **Additive** - patterns are added to default source detection -- `--only-include`: **Replacement** - only specified patterns are used, ignoring source detection -- `--exclude` (or `-e`): Applied after inclusion, works with both modes +```rust +// BAD: explaining what code does +// Check if the file is a source file +if is_source_file(path) { ... } -### Token Counting -Two backends available in `tokenizer.rs`: -- `TokenizerType::Tiktoken` (default) - Uses `tiktoken-rs` for OpenAI-compatible counting -- `TokenizerType::HuggingFace` - Uses `tokenizers` crate for HuggingFace models +// BAD: inline comments +let name = path.file_name(); // get the filename -### Configuration Hierarchy -1. Global config: `~/.config/glimpse/config.toml` (Linux/macOS) or `%APPDATA%\glimpse\config.toml` (Windows) -2. Repo config: `.glimpse` file in project root -3. CLI arguments (highest priority) +// GOOD: self-documenting code, no comments needed +if is_source_file(path) { ... } -### Output Formats -- Default: Copies to clipboard -- `-p/--print`: Outputs to stdout -- `-f/--file [PATH]`: Writes to file (default: `GLIMPSE.md`) -- `-x/--xml`: Wraps output in XML tags for better LLM parsing -- `--pdf PATH`: Generates PDF output +// GOOD: docstring for non-obvious public function +/// Extract interpreter from shebang line and exec pattern +fn extract_interpreter(data: &str) -> Option { ... } +``` -## Testing Conventions +### Import Order -- Unit tests are co-located with source code in `#[cfg(test)]` modules -- Integration tests use `tempfile` for isolated filesystem testing -- Tests should handle network-dependent operations gracefully (see `git_processor.rs` tests) -- Mock servers used for URL processing tests via `mockito` +Group imports in this order, separated by blank lines: +1. `std` library +2. External crates (alphabetical) +3. Internal crates - prefer `super::` over `crate::` when possible -Example test pattern: ```rust -#[cfg(test)] -mod tests { - use super::*; - use tempfile::tempdir; - - #[test] - fn test_feature() -> Result<()> { - let dir = tempdir()?; - // Test logic using temp directory - Ok(()) - } -} -``` +use std::fs; +use std::path::{Path, PathBuf}; -## CI/CD Pipeline +use anyhow::Result; +use serde::{Deserialize, Serialize}; -### Test Workflow (`.github/workflows/test.yml`) -Runs on push/PR to `master`: -- `cargo test --verbose` -- `cargo clippy -- -D warnings` -- `cargo fmt -- --check` +use super::types::FileEntry; // preferred for sibling modules +use crate::config::Config; // only when super:: won't reach +``` -### Release Workflow (`.github/workflows/release.yml`) -Triggered by version tags (`v*`): -1. Creates GitHub release -2. Builds binaries for: `x86_64-unknown-linux-gnu`, `aarch64-apple-darwin`, `x86_64-pc-windows-msvc` -3. Uploads release assets -4. Updates Homebrew tap -5. Publishes to crates.io +### Error Handling -## Code Style Guidelines +- Use `anyhow::Result` for fallible functions +- Propagate errors with `?` operator +- Use `.expect("message")` only when failure is a bug +- Never use `.unwrap()` outside of tests +- Use `anyhow::bail!` for early returns with errors -- Write terse, self-commenting code -- Comments only on docstrings for functions -- Follow standard Rust formatting (`cargo fmt`) -- Use `anyhow::Result` for error handling in application code -- Prefer `?` operator over explicit `match` for error propagation -- Use `#[derive]` macros for common traits +### Naming Conventions -## Version Control +- `snake_case` for functions, methods, variables, modules +- `PascalCase` for types, traits, enums +- `SCREAMING_SNAKE_CASE` for constants +- Prefer descriptive names over abbreviations +- Boolean functions: `is_`, `has_`, `can_`, `should_` -Use jujutsu (`jj`) instead of git for all version control operations. +### Type Definitions -```bash -jj status -jj diff -jj new -m "message" -jj describe -m "message" -jj bookmark set -jj git push -``` +- Derive common traits: `Debug`, `Clone`, `Serialize`, `Deserialize` +- Put derives in consistent order +- Use `pub` sparingly - only what's needed -## Common Patterns - -### Adding a new CLI option -1. Add field to `Cli` struct in `cli.rs` with appropriate `#[arg(...)]` attributes -2. Handle the option in `main.rs` routing logic -3. Update `RepoConfig` in `config.rs` if it should be saveable - -### Adding file type support -Edit `languages.yml` to add extensions, filenames, or interpreters. The build script will regenerate detection code automatically. +```rust +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileEntry { + pub path: PathBuf, + pub content: String, + pub size: u64, +} +``` -### Modifying output format -Edit `output.rs`: -- `generate_tree()` for tree structure -- `generate_files()` for file contents -- `generate_output()` orchestrates the full output +### Function Style -## Important Dependencies +- Keep functions focused and small +- Use early returns for guard clauses +- Prefer iterators and combinators over loops when clearer +- Use `impl Trait` for return types when appropriate -| Crate | Purpose | -|-------|---------| -| `clap` | CLI argument parsing with derive macros | -| `rayon` | Parallel file processing | -| `ignore` | .gitignore-aware file walking | -| `tiktoken-rs` | OpenAI tokenizer | -| `tokenizers` | HuggingFace tokenizer | -| `git2` | Git repository operations | -| `scraper` | HTML parsing for web processing | -| `ratatui` | Terminal UI for file picker | -| `arboard` | Clipboard access | -| `printpdf` | PDF generation | +### Testing -## Debugging Tips +- Tests live in `#[cfg(test)] mod tests` at bottom of file +- Use descriptive test names: `test__` +- Use `tempfile` for filesystem tests +- Group related assertions -- Use `--print` to see output directly instead of clipboard -- Use `--no-tokens` to skip tokenizer initialization during debugging -- For file selection issues, check `.gitignore` patterns with `--no-ignore` -- For hidden file issues, use `-H/--hidden` +### Patterns to Follow -## Version Bumping +- Use `Option` combinators: `.map()`, `.and_then()`, `.unwrap_or()` +- Use `Result` combinators: `.map_err()`, `.context()` +- Prefer `&str` over `String` in function parameters +- Use `impl AsRef` for path parameters when flexible +- Use builders for complex configuration -Version is defined in `Cargo.toml`. When releasing: -1. Update version in `Cargo.toml` -2. Commit: `jj new -m "bump: vX.Y.Z"` -3. Tag: `jj git push && git tag vX.Y.Z && git push --tags` +### Patterns to Avoid -The release workflow handles the rest automatically. +- Comments explaining what code does (code should be obvious) +- Deeply nested code (use early returns) +- Magic numbers (use named constants) +- `clone()` when borrowing works +- `Box` (use `anyhow::Error`) +- Panicking in library code diff --git a/Cargo.lock b/Cargo.lock index 438cb1a..2542a80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,17 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + [[package]] name = "ahash" version = "0.8.11" @@ -101,6 +112,15 @@ version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arboard" version = "3.4.1" @@ -181,6 +201,15 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -208,6 +237,15 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "block2" version = "0.5.1" @@ -258,6 +296,25 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "cassowary" version = "0.3.0" @@ -296,6 +353,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + [[package]] name = "clap" version = "4.5.30" @@ -353,11 +420,10 @@ checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "colored" -version = "2.2.0" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" +checksum = "fde0e0ec90c9dfb3b4b1a0891a7dcd0e2bffde2f7efed5fe7c9bb00e5bfb915e" dependencies = [ - "lazy_static", "windows-sys 0.59.0", ] @@ -388,6 +454,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + [[package]] name = "core-foundation" version = "0.9.4" @@ -428,6 +500,30 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eb8a2a1cd12ab0d987a5d5e825195d372001a4094a0376319d5a0ad71c1ba0d" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crc32fast" version = "1.4.2" @@ -472,7 +568,7 @@ dependencies = [ "crossterm_winapi", "mio", "parking_lot", - "rustix", + "rustix 0.38.44", "signal-hook", "signal-hook-mio", "winapi", @@ -487,6 +583,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "cssparser" version = "0.31.2" @@ -545,6 +651,12 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "deflate64" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26bf8fc351c5ed29b5c2f0cbbac1b209b74f60ecd62e675a998df72c49af5204" + [[package]] name = "deranged" version = "0.3.11" @@ -565,6 +677,17 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "derive_builder" version = "0.20.2" @@ -607,6 +730,17 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + [[package]] name = "dirs" version = "5.0.1" @@ -687,6 +821,21 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "encoding_rs_io" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83" +dependencies = [ + "encoding_rs", +] + +[[package]] +name = "env_home" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" + [[package]] name = "equivalent" version = "1.0.2" @@ -744,6 +893,18 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.60.2", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -760,6 +921,15 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fluent-uri" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17c704e9dbe1ddd863da1e6ff3567795087b1eb201ce80d8fa81162e1516500d" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "fnv" version = "1.0.7" @@ -890,6 +1060,16 @@ dependencies = [ "byteorder", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "gethostname" version = "0.4.3" @@ -927,8 +1107,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi 0.13.3+wasi-0.2.2", + "wasm-bindgen", "windows-targets 0.52.6", ] @@ -960,37 +1142,57 @@ dependencies = [ "anyhow", "arboard", "base64 0.22.1", + "bincode", + "cc", "clap", - "colored", "crossterm", "dirs", + "flate2", "git2", - "globset", + "glob", + "grep", "ignore", "indicatif", + "libloading", + "lsp-types", "mockito", "num-format", "once_cell", "printpdf", "ratatui", "rayon", + "regex", "reqwest", "scraper", "serde", + "serde_json", "serde_yaml", + "tar", "tempfile", "tiktoken-rs", "tokenizers", "toml", + "tracing", + "tracing-subscriber", + "tree-sitter", "url", "walkdir", + "which", + "xz2", + "zip", ] +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "globset" -version = "0.4.15" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" dependencies = [ "aho-corasick", "bstr", @@ -999,6 +1201,85 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "grep" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "309217bc53e2c691c314389c7fa91f9cd1a998cda19e25544ea47d94103880c3" +dependencies = [ + "grep-cli", + "grep-matcher", + "grep-printer", + "grep-regex", + "grep-searcher", +] + +[[package]] +name = "grep-cli" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf32d263c5d5cc2a23ce587097f5ddafdb188492ba2e6fb638eaccdc22453631" +dependencies = [ + "bstr", + "globset", + "libc", + "log", + "termcolor", + "winapi-util", +] + +[[package]] +name = "grep-matcher" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36d7b71093325ab22d780b40d7df3066ae4aebb518ba719d38c697a8228a8023" +dependencies = [ + "memchr", +] + +[[package]] +name = "grep-printer" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd76035e87871f51c1ee5b793e32122b3ccf9c692662d9622ef1686ff5321acb" +dependencies = [ + "bstr", + "grep-matcher", + "grep-searcher", + "log", + "serde", + "serde_json", + "termcolor", +] + +[[package]] +name = "grep-regex" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce0c256c3ad82bcc07b812c15a45ec1d398122e8e15124f96695234db7112ef" +dependencies = [ + "bstr", + "grep-matcher", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "grep-searcher" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac63295322dc48ebb20a25348147905d816318888e64f531bfc2a2bc0577dc34" +dependencies = [ + "bstr", + "encoding_rs", + "encoding_rs_io", + "grep-matcher", + "log", + "memchr", + "memmap2", +] + [[package]] name = "h2" version = "0.3.26" @@ -1020,16 +1301,16 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.7" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" dependencies = [ "atomic-waker", "bytes", "fnv", "futures-core", "futures-sink", - "http 1.2.0", + "http 1.4.0", "indexmap", "slab", "tokio", @@ -1064,13 +1345,22 @@ dependencies = [ "indicatif", "log", "native-tls", - "rand", + "rand 0.8.5", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "ureq", ] +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + [[package]] name = "html5ever" version = "0.26.0" @@ -1098,12 +1388,11 @@ dependencies = [ [[package]] name = "http" -version = "1.2.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -1125,18 +1414,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.2.0", + "http 1.4.0", ] [[package]] name = "http-body-util" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", - "futures-util", - "http 1.2.0", + "futures-core", + "http 1.4.0", "http-body 1.0.1", "pin-project-lite", ] @@ -1179,20 +1468,22 @@ dependencies = [ [[package]] name = "hyper" -version = "1.6.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ + "atomic-waker", "bytes", "futures-channel", - "futures-util", - "h2 0.4.7", - "http 1.2.0", + "futures-core", + "h2 0.4.12", + "http 1.4.0", "http-body 1.0.1", "httparse", "httpdate", "itoa", "pin-project-lite", + "pin-utils", "smallvec", "tokio", ] @@ -1212,15 +1503,15 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.10" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ "bytes", - "futures-util", - "http 1.2.0", + "futures-core", + "http 1.4.0", "http-body 1.0.1", - "hyper 1.6.0", + "hyper 1.8.1", "pin-project-lite", "tokio", ] @@ -1428,6 +1719,15 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + [[package]] name = "instability" version = "0.3.7" @@ -1519,9 +1819,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.169" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "libgit2-sys" @@ -1537,6 +1837,16 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libredox" version = "0.1.3" @@ -1545,6 +1855,7 @@ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags 2.8.0", "libc", + "redox_syscall", ] [[package]] @@ -1585,6 +1896,12 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + [[package]] name = "litemap" version = "0.7.4" @@ -1603,9 +1920,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.25" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lopdf" @@ -1633,6 +1950,40 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "lsp-types" +version = "0.97.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53353550a17c04ac46c585feb189c2db82154fc84b79c7a66c96c2c644f66071" +dependencies = [ + "bitflags 1.3.2", + "fluent-uri", + "serde", + "serde_json", + "serde_repr", +] + +[[package]] +name = "lzma-rs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "297e814c836ae64db86b36cf2a557ba54368d03f6afcd7d947c266692f71115e" +dependencies = [ + "byteorder", + "crc", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "mac" version = "0.1.1" @@ -1669,6 +2020,15 @@ dependencies = [ "tendril", ] +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "md5" version = "0.7.0" @@ -1681,6 +2041,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "memmap2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +dependencies = [ + "libc", +] + [[package]] name = "mime" version = "0.3.17" @@ -1717,21 +2086,22 @@ dependencies = [ [[package]] name = "mockito" -version = "1.6.1" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "652cd6d169a36eaf9d1e6bce1a221130439a966d7f27858af66a33a66e9c4ee2" +checksum = "7e0603425789b4a70fcc4ac4f5a46a566c116ee3e2a6b768dc623f7719c611de" dependencies = [ "assert-json-diff", "bytes", "colored", - "futures-util", - "http 1.2.0", + "futures-core", + "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.6.0", + "hyper 1.8.1", "hyper-util", "log", - "rand", + "pin-project-lite", + "rand 0.9.2", "regex", "serde_json", "serde_urlencoded", @@ -1805,6 +2175,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.60.2", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -2070,6 +2449,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest", + "hmac", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -2122,7 +2511,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" dependencies = [ "phf_shared 0.10.0", - "rand", + "rand 0.8.5", ] [[package]] @@ -2132,7 +2521,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared 0.11.3", - "rand", + "rand 0.8.5", ] [[package]] @@ -2279,8 +2668,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.3", ] [[package]] @@ -2290,7 +2689,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.3", ] [[package]] @@ -2302,6 +2711,15 @@ dependencies = [ "getrandom 0.2.15", ] +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.1", +] + [[package]] name = "ratatui" version = "0.29.0" @@ -2371,7 +2789,7 @@ checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom 0.2.15", "libredox", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -2478,10 +2896,23 @@ dependencies = [ "bitflags 2.8.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.15", "windows-sys 0.59.0", ] +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags 2.8.0", + "errno", + "libc", + "linux-raw-sys 0.11.0", + "windows-sys 0.60.2", +] + [[package]] name = "rustls" version = "0.23.23" @@ -2643,12 +3074,24 @@ version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" dependencies = [ + "indexmap", "itoa", "memchr", "ryu", "serde", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "serde_spanned" version = "0.6.8" @@ -2692,6 +3135,26 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2801,6 +3264,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "string_cache" version = "0.8.8" @@ -2920,6 +3389,17 @@ dependencies = [ "libc", ] +[[package]] +name = "tar" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" version = "3.17.1" @@ -2930,7 +3410,7 @@ dependencies = [ "fastrand", "getrandom 0.3.1", "once_cell", - "rustix", + "rustix 0.38.44", "windows-sys 0.59.0", ] @@ -2945,13 +3425,31 @@ dependencies = [ "utf-8", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "thiserror" version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl 2.0.17", ] [[package]] @@ -2965,6 +3463,26 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "tiff" version = "0.9.1" @@ -3052,7 +3570,7 @@ dependencies = [ "monostate", "onig", "paste", - "rand", + "rand 0.8.5", "rayon", "rayon-cond", "regex", @@ -3060,7 +3578,7 @@ dependencies = [ "serde", "serde_json", "spm_precompiled", - "thiserror", + "thiserror 1.0.69", "unicode-normalization-alignments", "unicode-segmentation", "unicode_categories", @@ -3147,23 +3665,85 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "tracing-core" -version = "0.1.33" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" dependencies = [ + "matchers", + "nu-ansi-term", "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "tree-sitter" +version = "0.25.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", ] +[[package]] +name = "tree-sitter-language" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce" + [[package]] name = "tree_magic_mini" version = "3.1.6" @@ -3189,6 +3769,12 @@ version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49d64318d8311fc2668e48b63969f4343e0a85c4a109aa8460d6672e364b8bd1" +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + [[package]] name = "unicode-ident" version = "1.0.16" @@ -3305,6 +3891,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" @@ -3430,7 +4022,7 @@ checksum = "b7208998eaa3870dad37ec8836979581506e0c5c64c20c9e79e9d2a10d6f47bf" dependencies = [ "cc", "downcast-rs", - "rustix", + "rustix 0.38.44", "smallvec", "wayland-sys", ] @@ -3442,7 +4034,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2120de3d33638aaef5b9f4472bff75f07c56379cf76ea320bd3a3d65ecaf73f" dependencies = [ "bitflags 2.8.0", - "rustix", + "rustix 0.38.44", "wayland-backend", "wayland-scanner", ] @@ -3527,6 +4119,17 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" +[[package]] +name = "which" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fabb953106c3c8eea8306e4393700d7657561cb43122571b172bbfb7c7ba1d" +dependencies = [ + "env_home", + "rustix 1.1.3", + "winsafe", +] + [[package]] name = "winapi" version = "0.3.9" @@ -3558,6 +4161,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + [[package]] name = "windows-sys" version = "0.48.0" @@ -3585,6 +4194,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -3609,13 +4227,30 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -3628,6 +4263,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -3640,6 +4281,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -3652,12 +4299,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -3670,6 +4329,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -3682,6 +4347,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -3694,6 +4365,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -3706,6 +4383,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "winnow" version = "0.7.2" @@ -3725,6 +4408,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "winsafe" +version = "0.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" + [[package]] name = "wit-bindgen-rt" version = "0.33.0" @@ -3746,7 +4435,7 @@ dependencies = [ "nix", "os_pipe", "tempfile", - "thiserror", + "thiserror 1.0.69", "tree_magic_mini", "wayland-backend", "wayland-client", @@ -3773,7 +4462,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d91ffca73ee7f68ce055750bf9f6eca0780b8c85eff9bc046a3b0da41755e12" dependencies = [ "gethostname", - "rustix", + "rustix 0.38.44", "x11rb-protocol", ] @@ -3783,6 +4472,25 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec107c4503ea0b4a98ef47356329af139c0a4f7750e621cf2973cd3385ebcb3d" +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix 1.1.3", +] + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yoke" version = "0.7.5" @@ -3854,6 +4562,20 @@ name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] [[package]] name = "zerovec" @@ -3876,3 +4598,73 @@ dependencies = [ "quote", "syn 2.0.98", ] + +[[package]] +name = "zip" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50" +dependencies = [ + "aes", + "arbitrary", + "bzip2", + "constant_time_eq", + "crc32fast", + "crossbeam-utils", + "deflate64", + "displaydoc", + "flate2", + "getrandom 0.3.1", + "hmac", + "indexmap", + "lzma-rs", + "memchr", + "pbkdf2", + "sha1", + "thiserror 2.0.17", + "time", + "xz2", + "zeroize", + "zopfli", + "zstd", +] + +[[package]] +name = "zopfli" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edfc5ee405f504cd4984ecc6f14d02d55cfda60fa4b689434ef4102aae150cd7" +dependencies = [ + "bumpalo", + "crc32fast", + "log", + "simd-adler32", +] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index a8c4e86..f609b9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,38 +2,62 @@ name = "glimpse" version = "0.7.8" edition = "2021" -description = "A blazingly fast tool for peeking at codebases. Perfect for loading your codebase into an LLM's context." license = "MIT" -build = "build.rs" +description = "A blazingly fast tool for peeking at codebases" + +[[bin]] +name = "glimpse" +path = "src/main.rs" + +[lib] +name = "glimpse" +path = "src/lib.rs" [dependencies] anyhow = "1.0.95" arboard = { version = "3.4.1", features = ["wayland-data-control"] } base64 = "0.22.1" +bincode = "1.3" +cc = "1.2" clap = { version = "4.5.23", features = ["derive"] } -colored = "2.2.0" crossterm = "0.28.1" dirs = "5.0.1" -globset = "0.4.15" +git2 = "0.18" +glob = "0.3" +grep = "0.4" ignore = "0.4.23" indicatif = "0.17.9" +libloading = "0.8" +num-format = "0.4.4" once_cell = "1.20.2" printpdf = "0.7.0" ratatui = "0.29.0" rayon = "1.10.0" +regex = "1.11" +reqwest = { version = "0.11", features = ["blocking"] } +scraper = "0.18" serde = { version = "1.0.217", features = ["derive"] } +serde_json = "1.0" tempfile = "3.14.0" tiktoken-rs = "0.6.0" tokenizers = { version = "0.21.0", features = ["http"] } toml = "0.8.19" -walkdir = "2.5.0" -reqwest = { version = "0.11", features = ["blocking"] } -scraper = "0.18" +tree-sitter = "0.25" url = "2.5" -git2 = "0.18" -mockito = "1.4" -num-format = { version = "0.4.4" } +walkdir = "2.5.0" +lsp-types = "0.97" +which = "8.0" +flate2 = "1.0" +zip = "2.2" +tar = "0.4" +xz2 = "0.1" +tracing = "0.1.44" +tracing-subscriber = { version = "0.3.22", features = ["env-filter"] } [build-dependencies] serde = { version = "1.0.217", features = ["derive"] } -serde_yaml = "0.9.34" +serde_yaml = "0.9" + +[dev-dependencies] +tempfile = "3.14.0" +mockito = "1.4" diff --git a/build.rs b/build.rs index b8c4423..5b12af4 100644 --- a/build.rs +++ b/build.rs @@ -19,22 +19,24 @@ struct Language { } fn main() { - println!("cargo:rerun-if-changed=languages.yml"); + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let languages_path = Path::new(&manifest_dir).join("languages.yml"); + + println!( + "cargo:rerun-if-changed={}", + languages_path.to_string_lossy() + ); - // Read and parse languages.yml let yaml_content = - std::fs::read_to_string("languages.yml").expect("Failed to read languages.yml"); + std::fs::read_to_string(&languages_path).expect("Failed to read languages.yml"); let languages: HashMap = serde_yaml::from_str(&yaml_content).expect("Failed to parse languages.yml"); - // Generate the rust code let mut code = String::new(); - // Add the use statements code.push_str("use once_cell::sync::Lazy;\n"); code.push_str("use std::collections::HashSet;\n\n"); - // Generate source extensions set code.push_str("pub static SOURCE_EXTENSIONS: Lazy> = Lazy::new(|| {\n"); code.push_str(" let mut set = HashSet::new();\n\n"); @@ -48,7 +50,6 @@ fn main() { code.push_str(" set\n"); code.push_str("});\n\n"); - // Generate filename mappings code.push_str("pub static KNOWN_FILENAMES: Lazy> = Lazy::new(|| {\n"); code.push_str(" let mut set = HashSet::new();\n\n"); @@ -61,7 +62,6 @@ fn main() { code.push_str(" set\n"); code.push_str("});\n\n"); - // Generate interpreter mappings code.push_str("pub static INTERPRETER_NAMES: Lazy> = Lazy::new(|| {\n"); code.push_str(" let mut set = HashSet::new();\n\n"); @@ -74,7 +74,6 @@ fn main() { code.push_str(" set\n"); code.push_str("});\n"); - // Write the generated code to file let out_dir = std::env::var_os("OUT_DIR").unwrap(); let dest_path = Path::new(&out_dir).join("languages.rs"); let mut f = File::create(dest_path).unwrap(); diff --git a/flake.nix b/flake.nix index 32848b4..ed3c640 100644 --- a/flake.nix +++ b/flake.nix @@ -71,10 +71,11 @@ buildInputs = with pkgs; [ - rust-bin.stable.latest.default + rust-bin.nightly.latest.default pkg-config openssl cacert + tree-sitter ] ++ lib.optionals stdenv.isDarwin [ darwin.apple_sdk.frameworks.Security diff --git a/readme.md b/readme.md index bae4add..765d73f 100644 --- a/readme.md +++ b/readme.md @@ -1,22 +1,24 @@ # Glimpse -A blazingly fast tool for peeking at codebases. Perfect for loading your codebase into an LLM's context, with built-in token counting support. +A blazingly fast tool for peeking at codebases. Perfect for loading your codebase into an LLM's context, with built-in token counting and code analysis. ## Features -- 🚀 Fast parallel file processing -- 🌳 Tree-view of codebase structure -- 📝 Source code content viewing -- 🔢 Token counting with multiple backends -- ⚙️ Configurable defaults -- 📋 Clipboard support -- 🎨 Customizable file type detection -- 🥷 Respects .gitignore automatically -- 📁 Local per-repo configuration with `.glimpse` file -- 🔗 Web content processing with Markdown conversion -- 📦 Git repository support -- 🌐 URL traversal with configurable depth -- 🏷️ XML output format for better LLM compatibility +- Fast parallel file processing +- Tree-view of codebase structure +- Source code content viewing +- Token counting with multiple backends (tiktoken, HuggingFace) +- Call graph generation for code analysis +- Configurable defaults with global and per-repo config +- Clipboard support +- Customizable file type detection +- Respects .gitignore automatically +- Web content processing with Markdown conversion +- Git repository support (GitHub, GitLab, Bitbucket, Azure DevOps) +- URL traversal with configurable depth +- XML output format for better LLM compatibility +- Interactive file picker +- PDF export ## Installation @@ -53,7 +55,8 @@ paru -S glimpse ## Usage -Basic usage: +### Basic Usage + ```bash # Process a local directory glimpse /path/to/project @@ -73,7 +76,8 @@ glimpse https://example.com/docs --traverse-links --link-depth 2 On first use in a repository, Glimpse will save a `.glimpse` configuration file locally with your specified options. This file can be referenced on subsequent runs, or overridden by passing options again. -Common options: +### Common Options + ```bash # Show hidden files glimpse -H /path/to/project @@ -90,9 +94,12 @@ glimpse -f output.txt /path/to/project # Print output to stdout instead of copying to clipboard glimpse -p /path/to/project -# Include specific file types +# Include specific file types (additive to source files) glimpse -i "*.rs,*.go" /path/to/project +# Only include specific patterns (replaces default source detection) +glimpse --only-include "*.rs,*.go" /path/to/project + # Exclude patterns or files glimpse -e "target/*,dist/*" /path/to/project @@ -111,20 +118,80 @@ glimpse https://github.com/username/repo.git --pdf output.pdf # Open interactive file picker glimpse --interactive /path/to/project +# Output in XML format for better LLM compatibility +glimpse -x /path/to/project + # Print the config file path and exit glimpse --config_path # Initialize a .glimpse config file in the current directory glimpse --config +``` -# Output in XML format for better LLM compatibility -glimpse -x /path/to/project +## Code Analysis + +Glimpse includes powerful code analysis features for understanding call relationships in your codebase. + +### Call Graph Generation + +Generate call graphs to see what functions a target function calls (callees) or what calls it (callers): + +```bash +# Generate call graph for a function (searches all files) +glimpse code :function_name + +# Specify file and function +glimpse code src/main.rs:main + +# Include callers (reverse call graph) +glimpse code src/main.rs:main --callers + +# Limit traversal depth +glimpse code :process --depth 3 + +# Output to file +glimpse code :build -f callgraph.md + +# Strict mode: only resolve via imports (no global name matching) +glimpse code :main --strict + +# Precise mode: use LSP for type-aware resolution (slower but accurate) +glimpse code :main --precise + +# Specify project root +glimpse code :main --root /path/to/project +``` + +### Code Index Management + +Glimpse maintains an index for faster code analysis. Manage it with: + +```bash +# Build or update the index +glimpse index build + +# Build with LSP for precise resolution +glimpse index build --precise + +# Force rebuild (ignore existing index) +glimpse index build --force + +# Clear the index +glimpse index clear + +# Show index status and stats +glimpse index status + +# Specify project path +glimpse index build /path/to/project ``` -## CLI Options +## CLI Reference ``` Usage: glimpse [OPTIONS] [PATH] + glimpse code [OPTIONS] + glimpse index Arguments: [PATH] Files, directories, or URLs to analyze [default: .] @@ -134,6 +201,7 @@ Options: --config Init glimpse config file in current directory --interactive Opens interactive file picker (? for help) -i, --include Additional patterns to include (e.g. "*.rs,*.go") + --only-include Only include these patterns (replaces source detection) -e, --exclude Additional patterns or files to exclude -s, --max-size Maximum file size in bytes --max-depth Maximum directory depth to traverse @@ -151,8 +219,26 @@ Options: --link-depth Maximum depth to traverse links (default: 1) --pdf Save output as PDF -x, --xml Output in XML format for better LLM compatibility + -v, --verbose Verbosity level (-v, -vv, -vvv) -h, --help Print help -V, --version Print version + +Code Subcommand: + glimpse code Generate call graph for a function + Target in file:function or :function format + --root Project root directory [default: .] + --callers Include callers (reverse call graph) + --depth Maximum depth to traverse + -f, --file Output file (default: stdout) + --strict Only resolve calls via imports + --precise Use LSP for type-aware resolution + +Index Subcommand: + glimpse index build [PATH] Build or update the index + --force Force rebuild + --precise Use LSP for precise resolution + glimpse index clear [PATH] Clear the index + glimpse index status [PATH] Show index status and stats ``` ## Configuration @@ -186,12 +272,10 @@ default_excludes = [ ## XML Output Format -Glimpse supports XML output format designed for better compatibility with Large Language Models (LLMs) like Claude, GPT, and others. When using the `-x` or `--xml` flag, the output is structured with clear XML tags that help LLMs better understand the context and structure of your codebase. +Glimpse supports XML output format designed for better compatibility with Large Language Models. When using the `-x` or `--xml` flag, the output is structured with clear XML tags that help LLMs better understand the context and structure of your codebase. ### XML Structure -The XML output wraps all content in a `` tag with the project name: - ```xml @@ -217,77 +301,34 @@ Total size: 45 bytes ### Benefits for LLM Usage -- **Clear Context Boundaries**: The `` wrapper helps LLMs understand where your codebase begins and ends -- **Structured Information**: Separate sections for directory tree, file contents, and summary -- **Proper Escaping**: XML-safe content that won't confuse parsers -- **Project Identification**: Automatic project name detection for better context - -### Usage Examples - -```bash -# Basic XML output -glimpse -x /path/to/project - -# XML output with file save -glimpse -x -f project.xml /path/to/project - -# XML output to stdout -glimpse -x --print /path/to/project - -# XML output with specific includes -glimpse -x -i "*.rs,*.py" /path/to/project -``` +- Clear context boundaries with the `` wrapper +- Structured sections for directory tree, file contents, and summary +- Proper XML escaping +- Automatic project name detection ## Token Counting Glimpse supports two tokenizer backends: -1. Tiktoken (Default): OpenAI's tokenizer implementation, perfect for accurately estimating tokens for GPT models. +1. **Tiktoken** (Default): OpenAI's tokenizer implementation, perfect for accurately estimating tokens for GPT models. -2. HuggingFace Tokenizers: Supports any model from the HuggingFace hub or local tokenizer files, great for custom models or other ML frameworks. +2. **HuggingFace Tokenizers**: Supports any model from the HuggingFace hub or local tokenizer files, great for custom models or other ML frameworks. The token count appears in both file content views and the final summary, helping you estimate context window usage for large language models. -Example token count output: -``` -File: src/main.rs -Tokens: 245 -================================================== -// File contents here... - -Summary: -Total files: 10 -Total size: 15360 bytes -Total tokens: 2456 -``` - -## Troubleshooting +## Git Repository Support -1. **File too large**: Adjust `max_size` in config -2. **Missing files**: Check `hidden` flag and exclude patterns -3. **Performance issues**: Try adjusting thread count with `-t` -4. **Tokenizer errors**: - - For HuggingFace models, ensure you have internet connection for downloading - - For local tokenizer files, verify the file path and format - - Try using the default tiktoken backend if issues persist - -## License - -MIT - -## Features in Detail - -### Git Repository Support -Glimpse can directly process Git repositories from popular hosting services: -- GitHub repositories -- GitLab repositories -- Bitbucket repositories -- Azure DevOps repositories +Glimpse can directly process Git repositories from: +- GitHub +- GitLab +- Bitbucket +- Azure DevOps - Any Git repository URL (ending with .git) The repository is cloned to a temporary directory, processed, and automatically cleaned up. -### Web Content Processing +## Web Content Processing + Glimpse can process web pages and convert them to Markdown: - Preserves heading structure - Converts links (both relative and absolute) @@ -297,10 +338,24 @@ Glimpse can process web pages and convert them to Markdown: With link traversal enabled, Glimpse can also process linked pages up to a specified depth, making it perfect for documentation sites and wikis. -### PDF Output +## PDF Output + Any processed content (local files, Git repositories, or web pages) can be saved as a PDF with: - Preserved formatting - Syntax highlighting - Table of contents - Page numbers -- Custom headers and footers + +## Troubleshooting + +1. **File too large**: Adjust `max_size` in config +2. **Missing files**: Check `hidden` flag and exclude patterns +3. **Performance issues**: Try adjusting thread count with `-t` +4. **Tokenizer errors**: + - For HuggingFace models, ensure you have internet connection for downloading + - For local tokenizer files, verify the file path and format + - Try using the default tiktoken backend if issues persist + +## License + +MIT diff --git a/registry.toml b/registry.toml new file mode 100644 index 0000000..97881eb --- /dev/null +++ b/registry.toml @@ -0,0 +1,646 @@ +[[language]] +name = "go" +extensions = ["go"] +repo = "https://github.com/tree-sitter/tree-sitter-go" +branch = "master" +symbol = "tree_sitter_go" +definition_query = """ +( + (comment)* @doc + . + (function_declaration + name: (identifier) @name + body: (block) @body) @function.definition +) +( + (comment)* @doc + . + (method_declaration + name: (field_identifier) @name + body: (block) @body) @method.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (parenthesized_expression (identifier) @name) + (selector_expression + operand: (_) @qualifier + field: (field_identifier) @name) + (parenthesized_expression (selector_expression + operand: (_) @qualifier + field: (field_identifier) @name)) + ]) @reference.call +""" +import_query = """ +(import_spec + name: [ + (package_identifier) @alias + (dot) @dot_import + (blank_identifier) @side_effect + ]? + path: [ + (interpreted_string_literal) @path + (raw_string_literal) @path + ]) +""" + +[language.lsp] +binary = "gopls" +args = ["serve"] +go_package = "golang.org/x/tools/gopls@latest" + +[[language]] +name = "zig" +extensions = ["zig"] +repo = "https://github.com/tree-sitter-grammars/tree-sitter-zig" +branch = "master" +symbol = "tree_sitter_zig" +definition_query = """ +( + (comment)* @doc + . + (function_declaration + name: (identifier) @name + body: (block) @body) @function.definition +) +""" +call_query = """ +(call_expression + function: (identifier) @name) @reference.call + +(call_expression + function: (field_expression + object: (_) @qualifier + member: (identifier) @name)) @reference.call +""" +import_query = """ +(variable_declaration + (identifier) @name + (builtin_function + (builtin_identifier) @_builtin + (arguments + (string (string_content) @path))) + (#eq? @_builtin "@import")) @import +""" + +[language.lsp] +binary = "zls" +args = [] +url_template = "https://github.com/zigtools/zls/releases/download/{version}/zls-{target}.tar.xz" +archive = "tar.xz" + +[language.lsp.targets] +"x86_64-unknown-linux-gnu" = "x86_64-linux" +"aarch64-unknown-linux-gnu" = "aarch64-linux" +"x86_64-apple-darwin" = "x86_64-macos" +"aarch64-apple-darwin" = "aarch64-macos" + +[[language]] +name = "c" +extensions = ["c", "h"] +repo = "https://github.com/tree-sitter/tree-sitter-c" +branch = "master" +symbol = "tree_sitter_c" +definition_query = """ +( + (comment)* @doc + . + (function_definition + declarator: [ + (function_declarator + declarator: (identifier) @name) + (pointer_declarator + declarator: (function_declarator + declarator: (identifier) @name)) + ] + body: (compound_statement) @body) @function.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (field_expression + argument: (_) @qualifier + field: (field_identifier) @name) + (parenthesized_expression (identifier) @name) + ]) @reference.call +""" +import_query = """ +(preproc_include + path: [ + (system_lib_string) @system_path + (string_literal) @local_path + ]) @include +""" + +[language.lsp] +binary = "clangd" +args = [] +version = "21.1.0" +url_template = "https://github.com/clangd/clangd/releases/download/{version}/clangd-{target}-{version}.zip" +archive = "zip" +binary_path = "clangd_{version}/bin/clangd" + +[language.lsp.targets] +"x86_64-unknown-linux-gnu" = "linux-x86_64" +"x86_64-apple-darwin" = "mac-x86_64" +"aarch64-apple-darwin" = "mac-arm64" +"x86_64-pc-windows-msvc" = "windows-x86_64" + +[[language]] +name = "cpp" +extensions = ["cpp", "cc", "cxx", "hpp", "hxx"] +repo = "https://github.com/tree-sitter/tree-sitter-cpp" +branch = "master" +symbol = "tree_sitter_cpp" +definition_query = """ +( + (comment)* @doc + . + (function_definition + declarator: (function_declarator + declarator: (identifier) @name) + body: (compound_statement) @body) @function.definition +) +( + (comment)* @doc + . + (function_definition + declarator: (function_declarator + declarator: (qualified_identifier + name: (identifier) @name)) + body: (compound_statement) @body) @method.definition +) +( + (comment)* @doc + . + (function_definition + declarator: (pointer_declarator + declarator: (function_declarator + declarator: (identifier) @name)) + body: (compound_statement) @body) @function.definition +) +( + (comment)* @doc + . + (function_definition + declarator: (reference_declarator + (function_declarator + declarator: (qualified_identifier + name: [(identifier) (operator_name) (destructor_name)] @name))) + body: (compound_statement) @body) @method.definition +) +( + (comment)* @doc + . + (function_definition + declarator: (function_declarator + declarator: (qualified_identifier + name: (destructor_name) @name)) + body: (compound_statement) @body) @method.definition +) +( + (comment)* @doc + . + (template_declaration + (function_definition + declarator: (function_declarator + declarator: (qualified_identifier + name: (identifier) @name)) + body: (compound_statement) @body)) @function.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (qualified_identifier + scope: (_) @qualifier + name: (identifier) @name) + (template_function name: (identifier) @name) + (field_expression + argument: (_) @qualifier + field: (field_identifier) @name) + ]) @reference.call +""" +import_query = """ +(preproc_include + path: [ + (system_lib_string) @system_path + (string_literal) @local_path + ]) @include +""" + +[language.lsp] +binary = "clangd" +args = [] +version = "21.1.0" +url_template = "https://github.com/clangd/clangd/releases/download/{version}/clangd-{target}-{version}.zip" +archive = "zip" +binary_path = "clangd_{version}/bin/clangd" + +[language.lsp.targets] +"x86_64-unknown-linux-gnu" = "linux-x86_64" +"x86_64-apple-darwin" = "mac-x86_64" +"aarch64-apple-darwin" = "mac-arm64" +"x86_64-pc-windows-msvc" = "windows-x86_64" + +[[language]] +name = "bash" +extensions = ["sh", "bash"] +repo = "https://github.com/tree-sitter/tree-sitter-bash" +branch = "master" +symbol = "tree_sitter_bash" +definition_query = """ +( + (comment)* @doc + . + (function_definition + name: (word) @name + body: (compound_statement) @body) @function.definition +) +""" +call_query = """ +(command_name (word) @name) @reference.call +""" +import_query = """ +(command + name: (command_name) @_cmd + argument: [ + (word) @path + (string) @path + (raw_string) @path + (concatenation) @path + ] + (#any-of? @_cmd "source" ".")) @import +""" + +[language.lsp] +binary = "bash-language-server" +args = ["start"] +npm_package = "bash-language-server" + +[[language]] +name = "python" +extensions = ["py"] +repo = "https://github.com/tree-sitter/tree-sitter-python" +branch = "master" +symbol = "tree_sitter_python" +definition_query = """ +( + (comment)* @doc + . + (function_definition + name: (identifier) @name + body: (block) @body) @function.definition +) +( + (comment)* @doc + . + (class_definition + name: (identifier) @name + body: (block) @body) @class.definition +) +""" +call_query = """ +(call + function: [ + (identifier) @name + (attribute + object: (_) @qualifier + attribute: (identifier) @name) + ]) @reference.call +""" +import_query = """ +(import_statement + name: [ + (dotted_name) @path + (aliased_import + name: (dotted_name) @path + alias: (identifier) @alias) + ]) @import + +(import_from_statement + module_name: [ + (dotted_name) @module + (relative_import) @module + ] + name: [ + (dotted_name) @name + (aliased_import + name: (dotted_name) @name + alias: (identifier) @alias) + ]? + (wildcard_import)? @wildcard) @import +""" + +[language.lsp] +binary = "pyright-langserver" +args = ["--stdio"] +npm_package = "pyright" + +[[language]] +name = "typescript" +extensions = ["ts", "mts", "cts"] +repo = "https://github.com/tree-sitter/tree-sitter-typescript" +branch = "master" +subpath = "typescript" +symbol = "tree_sitter_typescript" +definition_query = """ +( + (comment)* @doc + . + (function_declaration + name: (identifier) @name + body: (statement_block) @body) @function.definition +) +( + (comment)* @doc + . + (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (arrow_function + body: (_) @body))) @function.definition +) +( + (comment)* @doc + . + (method_definition + name: (property_identifier) @name + body: (statement_block) @body) @method.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (member_expression + object: (_) @qualifier + property: (property_identifier) @name) + ]) @reference.call +""" +import_query = """ +(import_statement + (import_clause + [ + (identifier) @default_import + (named_imports + (import_specifier + name: (identifier) @name + alias: (identifier)? @alias)) + (namespace_import (identifier) @namespace) + ]) + source: (string (string_fragment) @source)) @import +""" + +[language.lsp] +binary = "typescript-language-server" +args = ["--stdio"] +npm_package = "typescript-language-server typescript" + +[[language]] +name = "rust" +extensions = ["rs"] +repo = "https://github.com/tree-sitter/tree-sitter-rust" +branch = "master" +symbol = "tree_sitter_rust" +definition_query = """ +( + (line_comment)* @doc + . + (function_item + name: (identifier) @name + body: (block) @body) @function.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (scoped_identifier + path: (_) @qualifier + name: (identifier) @name) + (field_expression + value: (_) @qualifier + field: (field_identifier) @name) + ]) @reference.call +""" +import_query = """ +(use_declaration + argument: [ + (identifier) @path + (scoped_identifier) @path + (use_as_clause + path: [ + (identifier) @path + (scoped_identifier) @path + ] + alias: (identifier) @alias) + (use_list) @list + (scoped_use_list + path: [ + (identifier) @path + (scoped_identifier) @path + ]) + (use_wildcard) @wildcard + ]) @import + +(mod_item + name: (identifier) @mod_name) @mod_decl +""" + +[language.lsp] +binary = "rust-analyzer" +args = [] +version = "2024-12-23" +url_template = "https://github.com/rust-lang/rust-analyzer/releases/download/{version}/rust-analyzer-{target}.gz" +archive = "gz" + +[language.lsp.targets] +"x86_64-unknown-linux-gnu" = "x86_64-unknown-linux-gnu" +"x86_64-unknown-linux-musl" = "x86_64-unknown-linux-musl" +"aarch64-unknown-linux-gnu" = "aarch64-unknown-linux-gnu" +"x86_64-apple-darwin" = "x86_64-apple-darwin" +"aarch64-apple-darwin" = "aarch64-apple-darwin" +"x86_64-pc-windows-msvc" = "x86_64-pc-windows-msvc" + +[[language]] +name = "javascript" +extensions = ["js", "mjs", "cjs"] +repo = "https://github.com/tree-sitter/tree-sitter-javascript" +branch = "master" +symbol = "tree_sitter_javascript" +definition_query = """ +( + (comment)* @doc + . + (function_declaration + name: (identifier) @name + body: (statement_block) @body) @function.definition +) +( + (comment)* @doc + . + (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (arrow_function + body: (_) @body))) @function.definition +) +( + (comment)* @doc + . + (method_definition + name: (property_identifier) @name + body: (statement_block) @body) @method.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (member_expression + object: (_) @qualifier + property: (property_identifier) @name) + ]) @reference.call +""" +import_query = """ +(import_statement + (import_clause + [ + (identifier) @default_import + (named_imports + (import_specifier + name: (identifier) @name + alias: (identifier)? @alias)) + (namespace_import (identifier) @namespace) + ]) + source: (string (string_fragment) @source)) @import + +(call_expression + function: (identifier) @_require + arguments: (arguments (string (string_fragment) @source)) + (#eq? @_require "require")) @require +""" + +[language.lsp] +binary = "typescript-language-server" +args = ["--stdio"] +npm_package = "typescript-language-server typescript" + +[[language]] +name = "java" +extensions = ["java"] +repo = "https://github.com/tree-sitter/tree-sitter-java" +branch = "master" +symbol = "tree_sitter_java" +definition_query = """ +(class_body + (block_comment) @doc + . + (method_declaration + name: (identifier) @name + body: (_) @body) @method.definition +) +(class_body + (block_comment) @doc + . + (constructor_declaration + name: (identifier) @name + body: (_) @body) @method.definition +) +(method_declaration + name: (identifier) @name + body: (_) @body) @method.definition +(constructor_declaration + name: (identifier) @name + body: (_) @body) @method.definition +""" +call_query = """ +(method_invocation + object: (_)? @qualifier + name: (identifier) @name) @reference.call +(object_creation_expression + type: (type_identifier) @name) @reference.call +""" +import_query = """ +(import_declaration + [ + (scoped_identifier) @path + (identifier) @path + ] + (asterisk)? @wildcard) @import +""" + +[language.lsp] +binary = "jdtls" +args = [] + +[[language]] +name = "scala" +extensions = ["scala", "sc"] +repo = "https://github.com/tree-sitter/tree-sitter-scala" +branch = "master" +symbol = "tree_sitter_scala" +definition_query = """ +( + (block_comment)* @doc + . + (function_definition + name: (identifier) @name + body: (_) @body) @function.definition +) +( + (block_comment)* @doc + . + (function_declaration + name: (identifier) @name) @function.definition +) +( + (block_comment)* @doc + . + (class_definition + name: (identifier) @name + body: (template_body) @body) @class.definition +) +( + (block_comment)* @doc + . + (object_definition + name: (identifier) @name + body: (template_body) @body) @object.definition +) +( + (block_comment)* @doc + . + (trait_definition + name: (identifier) @name + body: (template_body) @body) @trait.definition +) +""" +call_query = """ +(call_expression + function: [ + (identifier) @name + (field_expression + value: (_) @qualifier + field: (identifier) @name) + ]) @reference.call +""" +import_query = """ +(import_declaration + path: (_)+ @path + [ + (namespace_wildcard) @wildcard + (namespace_selectors) @selectors + (as_renamed_identifier) @alias + ]?) @import +""" + +[language.lsp] +binary = "metals" +args = [] \ No newline at end of file diff --git a/src/analyzer.rs b/src/analyzer.rs index 2d350e9..8e64f0f 100644 --- a/src/analyzer.rs +++ b/src/analyzer.rs @@ -1,26 +1,24 @@ -use crate::cli::{Cli, Exclude, OutputFormat, TokenizerType}; -use crate::file_picker::FilePicker; -use crate::output::{ - display_token_counts, generate_output, generate_pdf, handle_output, FileEntry, -}; -use crate::source_detection; -use crate::tokenizer::TokenCounter; +use std::fs; +use std::path::{Path, PathBuf}; + use anyhow::Result; use ignore::{overrides::OverrideBuilder, WalkBuilder}; use indicatif::{ProgressBar, ProgressStyle}; use rayon::prelude::*; -use std::fs; -use std::path::{Path, PathBuf}; + +use glimpse::tui::FilePicker; +use glimpse::{is_source_file, Exclude, FileEntry, OutputFormat, TokenCounter, TokenizerType}; + +use crate::cli::Cli; +use crate::output::{display_token_counts, generate_output, generate_pdf, handle_output}; pub fn process_directory(args: &Cli) -> Result<()> { - // Configure thread pool if specified if let Some(threads) = args.threads { rayon::ThreadPoolBuilder::new() .num_threads(threads) .build_global()?; } - // Set up progress bar let pb = ProgressBar::new_spinner(); pb.set_style( ProgressStyle::default_spinner() @@ -30,25 +28,25 @@ pub fn process_directory(args: &Cli) -> Result<()> { pb.set_message("Scanning files..."); let output_format = args - .output - .clone() + .get_output_format() .expect("output format should be set from config"); let entries = process_entries(args)?; pb.finish(); if let Some(pdf_path) = &args.pdf { - let pdf_data = generate_pdf(&entries, args.output.clone().unwrap_or(OutputFormat::Both))?; + let pdf_data = generate_pdf( + &entries, + args.get_output_format().unwrap_or(OutputFormat::Both), + )?; fs::write(pdf_path, pdf_data)?; println!("PDF output written to: {}", pdf_path.display()); } else { - // Determine project name for XML output let project_name = if args.xml { Some(determine_project_name(&args.paths)) } else { None }; - // Handle output (print/copy/save) let output = generate_output(&entries, output_format, args.xml, project_name)?; handle_output(output, args)?; } @@ -65,14 +63,12 @@ fn determine_project_name(paths: &[String]) -> String { if let Some(first_path) = paths.first() { let path = std::path::Path::new(first_path); - // If it's a directory, use its name if path.is_dir() { if let Some(name) = path.file_name() { return name.to_string_lossy().to_string(); } } - // If it's a file, use the parent directory name if path.is_file() { if let Some(parent) = path.parent() { if let Some(name) = parent.file_name() { @@ -81,7 +77,6 @@ fn determine_project_name(paths: &[String]) -> String { } } - // Fallback to just the path itself first_path.clone() } else { "project".to_string() @@ -89,7 +84,6 @@ fn determine_project_name(paths: &[String]) -> String { } fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) -> bool { - // Basic file checks if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) { return false; } @@ -97,7 +91,6 @@ fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) - let path = entry.path(); let max_size = args.max_size.expect("max_size should be set from config"); - // Size check if !entry .metadata() .map(|m| m.len() <= max_size) @@ -106,7 +99,6 @@ fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) - return false; } - // Handle replacement mode with --only-include if let Some(ref only_includes) = args.only_include { let matches_only_include = matches_include_patterns(path, only_includes, base_path); @@ -114,7 +106,6 @@ fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) - return false; } - // Apply excludes if any if let Some(ref excludes) = args.exclude { return !matches_exclude_patterns(path, excludes, base_path); } @@ -122,25 +113,20 @@ fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) - return true; } - // Handle additive mode - // Check if it's a source file - let is_source = source_detection::is_source_file(path); + let is_source = is_source_file(path); - // Check if it matches additional include patterns let matches_include = if let Some(ref includes) = args.include { matches_include_patterns(path, includes, base_path) } else { false }; - // Include if EITHER source file OR matches include patterns let should_include = is_source || matches_include; if !should_include { return false; } - // Apply excludes to the union if let Some(ref excludes) = args.exclude { return !matches_exclude_patterns(path, excludes, base_path); } @@ -151,27 +137,23 @@ fn should_process_file(entry: &ignore::DirEntry, args: &Cli, base_path: &Path) - fn matches_include_patterns(path: &Path, includes: &[String], base_path: &Path) -> bool { let mut override_builder = OverrideBuilder::new(base_path); - // Add include patterns (positive) for pattern in includes { if let Err(e) = override_builder.add(pattern) { eprintln!("Warning: Invalid include pattern '{pattern}': {e}"); } } - let overrides = override_builder.build().unwrap_or_else(|_| { - // Return a default override that matches nothing if build fails - OverrideBuilder::new(base_path).build().unwrap() - }); + let overrides = override_builder + .build() + .unwrap_or_else(|_| OverrideBuilder::new(base_path).build().unwrap()); let match_result = overrides.matched(path, false); - // Must be whitelisted and not ignored match_result.is_whitelist() && !match_result.is_ignore() } fn matches_exclude_patterns(path: &Path, excludes: &[Exclude], base_path: &Path) -> bool { let mut override_builder = OverrideBuilder::new(base_path); - // Add exclude patterns (negative) for exclude in excludes { match exclude { Exclude::Pattern(pattern) => { @@ -185,7 +167,6 @@ fn matches_exclude_patterns(path: &Path, excludes: &[Exclude], base_path: &Path) } } Exclude::File(file_path) => { - // Handle file exclusions if file_path.is_absolute() { if file_path.exists() { if let Ok(relative_path) = file_path.strip_prefix(base_path) { @@ -209,10 +190,9 @@ fn matches_exclude_patterns(path: &Path, excludes: &[Exclude], base_path: &Path) } } - let overrides = override_builder.build().unwrap_or_else(|_| { - // Return a default override that matches nothing if build fails - OverrideBuilder::new(base_path).build().unwrap() - }); + let overrides = override_builder + .build() + .unwrap_or_else(|_| OverrideBuilder::new(base_path).build().unwrap()); let match_result = overrides.matched(path, false); match_result.is_ignore() @@ -231,7 +211,6 @@ pub fn process_entries(args: &Cli) -> Result> { ); let selected_paths = picker.run()?; - // Process selected files selected_paths .into_iter() .filter_map(|path| { @@ -270,7 +249,6 @@ pub fn process_entries(args: &Cli) -> Result> { all_entries.extend(dir_entries); } else if path.is_file() { - // Process single file let entry = ignore::WalkBuilder::new(path) .build() .next() @@ -290,10 +268,10 @@ pub fn process_entries(args: &Cli) -> Result> { Ok(entries) } -// Removed the is_excluded function as it's now handled by WalkBuilder overrides - pub fn create_token_counter(args: &Cli) -> Result { - match args.tokenizer.as_ref().unwrap_or(&TokenizerType::Tiktoken) { + let tokenizer_type = args.get_tokenizer_type().unwrap_or(TokenizerType::Tiktoken); + + match tokenizer_type { TokenizerType::Tiktoken => { if let Some(model) = &args.model { TokenCounter::new(model) @@ -315,10 +293,8 @@ pub fn create_token_counter(args: &Cli) -> Result { fn process_file(entry: &ignore::DirEntry, base_path: &Path) -> Result { let relative_path = if base_path.is_file() { - // If base_path is a file, use the file name as the relative path base_path.file_name().map(PathBuf::from).unwrap_or_default() } else { - // Otherwise, strip the base path as usual entry.path().strip_prefix(base_path)?.to_path_buf() }; let content = fs::read_to_string(entry.path())?; @@ -337,11 +313,12 @@ mod tests { use std::io::Write; use tempfile::{tempdir, TempDir}; + use crate::cli::CliOutputFormat; + fn setup_test_directory() -> Result<(TempDir, Vec)> { let dir = tempdir()?; let mut created_files = Vec::new(); - // Create a nested directory structure with various file types let test_files = vec![ ("src/main.rs", "fn main() {}"), ("src/lib.rs", "pub fn lib() {}"), @@ -370,15 +347,16 @@ mod tests { fn create_test_cli(dir_path: &Path) -> Cli { Cli { + command: None, config: false, paths: vec![dir_path.to_string_lossy().to_string()], config_path: false, include: None, only_include: None, exclude: None, - max_size: Some(10 * 1024 * 1024), // 10MB + max_size: Some(10 * 1024 * 1024), max_depth: Some(10), - output: Some(OutputFormat::Both), + output: Some(CliOutputFormat::Both), file: None, print: true, threads: None, @@ -393,6 +371,7 @@ mod tests { traverse_links: false, link_depth: None, xml: false, + verbose: 0, } } @@ -403,11 +382,9 @@ mod tests { let main_rs_path = dir.path().join("src/main.rs"); let test_cases = vec![ - // Pattern exclusions (Exclude::Pattern("**/*.rs".to_string()), true), (Exclude::Pattern("**/*.js".to_string()), false), (Exclude::Pattern("test/**".to_string()), false), - // File exclusions (Exclude::File(main_rs_path.clone()), true), (Exclude::File(PathBuf::from("nonexistent.rs")), false), ]; @@ -417,8 +394,6 @@ mod tests { match &exclude { Exclude::Pattern(pattern) => { - // For patterns that should exclude, we need to add a "!" prefix - // to make them negative patterns (exclusions) let exclude_pattern = if !pattern.starts_with('!') { format!("!{pattern}") } else { @@ -428,13 +403,11 @@ mod tests { } Exclude::File(file_path) => { if file_path.exists() { - // Get the file path relative to the test directory let rel_path = if file_path.is_absolute() { file_path.strip_prefix(dir.path()).unwrap_or(file_path) } else { file_path }; - // Add as a negative pattern let pattern = format!("!{}", rel_path.display()); override_builder.add(&pattern).unwrap(); } @@ -458,11 +431,9 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test excluding all Rust files cli.exclude = Some(vec![Exclude::Pattern("**/*.rs".to_string())]); let entries = process_entries(&cli)?; - // Verify no .rs files were processed for entry in &entries { assert_ne!( entry.path.extension().and_then(|ext| ext.to_str()), @@ -472,7 +443,6 @@ mod tests { ); } - // Test excluding specific directories cli.exclude = Some(vec![ Exclude::Pattern("**/node_modules/**".to_string()), Exclude::Pattern("**/target/**".to_string()), @@ -480,7 +450,6 @@ mod tests { ]); let entries = process_entries(&cli)?; - // Verify excluded directories were not processed for entry in &entries { let path_str = entry.path.to_string_lossy(); assert!( @@ -500,14 +469,11 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test including additional Rust files (should get all source files) cli.include = Some(vec!["**/*.rs".to_string()]); let entries = process_entries(&cli)?; - // Should include all source files (since .rs is already a source extension) assert!(!entries.is_empty(), "Should have found files"); - // Should include source files: .rs, .py, .md let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) @@ -516,23 +482,19 @@ mod tests { assert!(extensions.contains(&"py")); assert!(extensions.contains(&"md")); - // Test including a non-source extension as additional cli.include = Some(vec!["**/*.xyz".to_string()]); - - // Create a .xyz file fs::write(dir.path().join("test.xyz"), "data")?; let entries = process_entries(&cli)?; - // Should include BOTH .xyz files AND normal source files let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"xyz")); // Additional pattern - assert!(extensions.contains(&"rs")); // Normal source file - assert!(extensions.contains(&"py")); // Normal source file - assert!(extensions.contains(&"md")); // Normal source file + assert!(extensions.contains(&"xyz")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); Ok(()) } @@ -542,16 +504,13 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test additional includes with excludes - should get all source files plus additional, minus excludes cli.include = Some(vec!["**/*.xyz".to_string()]); cli.exclude = Some(vec![Exclude::Pattern("**/test.rs".to_string())]); - // Create a .xyz file fs::write(dir.path().join("test.xyz"), "data")?; let entries = process_entries(&cli)?; - // Should include all source files + .xyz files, but exclude test.rs assert!(!entries.is_empty(), "Should have found files"); let extensions: Vec<_> = entries @@ -559,13 +518,11 @@ mod tests { .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - // Should have .xyz (additional) plus source files (.rs, .py, .md) - assert!(extensions.contains(&"xyz")); // Additional pattern - assert!(extensions.contains(&"rs")); // Source files (but not test.rs) - assert!(extensions.contains(&"py")); // Source files - assert!(extensions.contains(&"md")); // Source files + assert!(extensions.contains(&"xyz")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); - // Verify test.rs was excluded for entry in &entries { assert!( !entry.path.to_string_lossy().contains("test.rs"), @@ -574,12 +531,10 @@ mod tests { ); } - // Test excluding a directory cli.include = Some(vec!["**/*.xyz".to_string()]); cli.exclude = Some(vec![Exclude::Pattern("**/nested/**".to_string())]); let entries = process_entries(&cli)?; - // Should include source files + .xyz, but exclude nested directory for entry in &entries { assert!( !entry.path.to_string_lossy().contains("nested"), @@ -596,15 +551,11 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test with depth limit of 1 cli.max_depth = Some(1); process_directory(&cli)?; - // Verify only top-level files were processed - // Test with depth limit of 2 cli.max_depth = Some(2); process_directory(&cli)?; - // Verify files up to depth 2 were processed Ok(()) } @@ -614,15 +565,11 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test without hidden files cli.hidden = false; process_directory(&cli)?; - // Verify hidden files were not processed - // Test with hidden files cli.hidden = true; process_directory(&cli)?; - // Verify hidden files were processed Ok(()) } @@ -634,7 +581,6 @@ mod tests { let cli = create_test_cli(rust_file); process_directory(&cli)?; - // Verify single file was processed correctly Ok(()) } @@ -643,19 +589,16 @@ mod tests { fn test_include_patterns_extend_source_detection() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create a .peb file (not recognized by source detection) let peb_path = dir.path().join("template.peb"); let mut peb_file = File::create(&peb_path)?; writeln!(peb_file, "template content")?; - // Create a .xyz file (also not recognized) let xyz_path = dir.path().join("data.xyz"); let mut xyz_file = File::create(&xyz_path)?; writeln!(xyz_file, "data content")?; let mut cli = create_test_cli(dir.path()); - // Test 1: Without include patterns, non-source files should be excluded cli.include = None; let entries = process_entries(&cli)?; assert!(!entries @@ -665,21 +608,18 @@ mod tests { .iter() .any(|e| e.path.extension().and_then(|ext| ext.to_str()) == Some("xyz"))); - // Test 2: With include patterns, should ADD to source detection cli.include = Some(vec!["*.peb".to_string()]); let entries = process_entries(&cli)?; - // Should include .peb files PLUS all normal source files let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"peb")); // Additional pattern - assert!(extensions.contains(&"rs")); // Normal source file - assert!(extensions.contains(&"py")); // Normal source file - assert!(extensions.contains(&"md")); // Normal source file + assert!(extensions.contains(&"peb")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); - // Test 3: Multiple include patterns (additive) cli.include = Some(vec!["*.peb".to_string(), "*.xyz".to_string()]); let entries = process_entries(&cli)?; @@ -687,13 +627,12 @@ mod tests { .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"peb")); // Additional pattern - assert!(extensions.contains(&"xyz")); // Additional pattern - assert!(extensions.contains(&"rs")); // Normal source file - assert!(extensions.contains(&"py")); // Normal source file - assert!(extensions.contains(&"md")); // Normal source file + assert!(extensions.contains(&"peb")); + assert!(extensions.contains(&"xyz")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); - // Test 4: Include + exclude patterns (union then subtract) cli.include = Some(vec!["*.peb".to_string(), "*.xyz".to_string()]); cli.exclude = Some(vec![Exclude::Pattern("*.xyz".to_string())]); let entries = process_entries(&cli)?; @@ -702,11 +641,11 @@ mod tests { .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"peb")); // Additional pattern, not excluded - assert!(!extensions.contains(&"xyz")); // Additional pattern, but excluded - assert!(extensions.contains(&"rs")); // Normal source file, not excluded - assert!(extensions.contains(&"py")); // Normal source file, not excluded - assert!(extensions.contains(&"md")); // Normal source file, not excluded + assert!(extensions.contains(&"peb")); + assert!(!extensions.contains(&"xyz")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); Ok(()) } @@ -715,7 +654,6 @@ mod tests { fn test_backward_compatibility_no_patterns() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Add some non-source files that should be ignored by default let binary_path = dir.path().join("binary.bin"); fs::write(&binary_path, b"\x00\x01\x02\x03")?; @@ -724,12 +662,10 @@ mod tests { let mut cli = create_test_cli(dir.path()); - // Test 1: No patterns specified - should only get source files cli.include = None; cli.exclude = None; let entries = process_entries(&cli)?; - // Should find source files (.rs, .py, .md) but not .bin or .conf let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) @@ -741,19 +677,17 @@ mod tests { assert!(!extensions.contains(&"bin")); assert!(!extensions.contains(&"conf")); - // Test 2: Only exclude patterns - should work as before cli.exclude = Some(vec![Exclude::Pattern("**/*.rs".to_string())]); let entries = process_entries(&cli)?; - // Should still apply source detection, but exclude .rs files let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(!extensions.contains(&"rs")); // Excluded - assert!(extensions.contains(&"py")); // Source file, not excluded - assert!(!extensions.contains(&"bin")); // Not source file + assert!(!extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(!extensions.contains(&"bin")); Ok(()) } @@ -762,23 +696,19 @@ mod tests { fn test_single_file_processing_with_patterns() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create a .peb file let peb_path = dir.path().join("template.peb"); fs::write(&peb_path, "template content")?; - // Test 1: Single .peb file without include patterns - should be rejected let mut cli = create_test_cli(&peb_path); cli.paths = vec![peb_path.to_string_lossy().to_string()]; cli.include = None; let entries = process_entries(&cli)?; assert_eq!(entries.len(), 0); - // Test 2: Single .peb file WITH include patterns - should be accepted cli.include = Some(vec!["*.peb".to_string()]); let entries = process_entries(&cli)?; assert_eq!(entries.len(), 1); - // Test 3: Single .rs file with exclude pattern - should be rejected let rs_path = dir.path().join("src/main.rs"); cli.paths = vec![rs_path.to_string_lossy().to_string()]; cli.include = None; @@ -793,45 +723,38 @@ mod tests { fn test_pattern_edge_cases() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create various test files fs::write(dir.path().join("test.peb"), "content")?; fs::write(dir.path().join("test.xyz"), "content")?; fs::write(dir.path().join("script.py"), "print('test')")?; let mut cli = create_test_cli(dir.path()); - // Test 1: Empty include patterns (edge case) - should still get source files cli.include = Some(vec![]); let entries = process_entries(&cli)?; - // With empty include patterns, should still get source files assert!(!entries.is_empty()); let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"rs")); // Source files should still be included + assert!(extensions.contains(&"rs")); assert!(extensions.contains(&"py")); assert!(extensions.contains(&"md")); - // Test 2: Include pattern that matches source files (additive) cli.include = Some(vec!["**/*.py".to_string()]); let entries = process_entries(&cli)?; - // Should include source files + additional .py matches let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"py")); // Both existing and additional - assert!(extensions.contains(&"rs")); // Source files - assert!(extensions.contains(&"md")); // Source files + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"md")); - // Test 3: Include everything, then exclude cli.include = Some(vec!["**/*".to_string()]); cli.exclude = Some(vec![Exclude::Pattern("**/*.rs".to_string())]); let entries = process_entries(&cli)?; - // Should include everything (.peb, .xyz, .py, .md from both source detection and include pattern) but not .rs files let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) @@ -851,19 +774,15 @@ mod tests { let (dir, _files) = setup_test_directory()?; let mut cli = create_test_cli(dir.path()); - // Test 1: Invalid glob pattern (this should not panic) cli.include = Some(vec!["[invalid".to_string()]); let _entries = process_entries(&cli)?; - // Should handle gracefully, possibly matching nothing - // Test 2: Mix of valid and invalid patterns cli.include = Some(vec![ "**/*.rs".to_string(), "[invalid".to_string(), "**/*.py".to_string(), ]); let _entries = process_entries(&cli)?; - // Should process valid patterns, ignore invalid ones Ok(()) } @@ -872,7 +791,6 @@ mod tests { fn test_include_patterns_are_additional() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create a .peb file fs::write(dir.path().join("template.peb"), "template content")?; let mut cli = create_test_cli(dir.path()); @@ -880,18 +798,16 @@ mod tests { let entries = process_entries(&cli)?; - // Should include BOTH .peb files AND normal source files let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(extensions.contains(&"peb")); // Additional pattern - assert!(extensions.contains(&"rs")); // Normal source file - assert!(extensions.contains(&"py")); // Normal source file - assert!(extensions.contains(&"md")); // Normal source file + assert!(extensions.contains(&"peb")); + assert!(extensions.contains(&"rs")); + assert!(extensions.contains(&"py")); + assert!(extensions.contains(&"md")); - // Should be more than just the .peb file assert!(entries.len() > 1); Ok(()) @@ -901,21 +817,18 @@ mod tests { fn test_only_include_replacement_behavior() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create various test files including non-source files fs::write(dir.path().join("config.conf"), "key=value")?; fs::write(dir.path().join("data.toml"), "[section]\nkey = 'value'")?; fs::write(dir.path().join("template.peb"), "template content")?; let mut cli = create_test_cli(dir.path()); - // Test 1: --only-include should ONLY include specified patterns, no other files cli.only_include = Some(vec!["*.conf".to_string()]); let entries = process_entries(&cli)?; assert_eq!(entries.len(), 1); assert!(entries[0].path.extension().and_then(|ext| ext.to_str()) == Some("conf")); - // Verify no other files are included let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) @@ -925,7 +838,6 @@ mod tests { assert!(!extensions.contains(&"md")); assert!(!extensions.contains(&"toml")); - // Test 2: Multiple patterns in --only-include cli.only_include = Some(vec!["*.conf".to_string(), "*.toml".to_string()]); let entries = process_entries(&cli)?; @@ -936,10 +848,9 @@ mod tests { .collect(); assert!(extensions.contains(&"conf")); assert!(extensions.contains(&"toml")); - assert!(!extensions.contains(&"rs")); // No other files + assert!(!extensions.contains(&"rs")); assert!(!extensions.contains(&"py")); - // Test 3: --only-include with exclude patterns cli.only_include = Some(vec![ "*.conf".to_string(), "*.toml".to_string(), @@ -948,22 +859,21 @@ mod tests { cli.exclude = Some(vec![Exclude::Pattern("*.toml".to_string())]); let entries = process_entries(&cli)?; - assert_eq!(entries.len(), 2); // conf and peb, but not toml (excluded) + assert_eq!(entries.len(), 2); let extensions: Vec<_> = entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); assert!(extensions.contains(&"conf")); assert!(extensions.contains(&"peb")); - assert!(!extensions.contains(&"toml")); // Excluded - assert!(!extensions.contains(&"rs")); // No other files + assert!(!extensions.contains(&"toml")); + assert!(!extensions.contains(&"rs")); - // Test 4: --only-include with pattern that matches nothing cli.only_include = Some(vec!["*.nonexistent".to_string()]); cli.exclude = None; let entries = process_entries(&cli)?; - assert_eq!(entries.len(), 0); // Should match nothing + assert_eq!(entries.len(), 0); Ok(()) } @@ -972,32 +882,27 @@ mod tests { fn test_only_include_vs_include_behavior_difference() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create a non-source file fs::write(dir.path().join("config.conf"), "key=value")?; let mut cli = create_test_cli(dir.path()); - // Test additive behavior with --include cli.include = Some(vec!["*.conf".to_string()]); cli.only_include = None; let additive_entries = process_entries(&cli)?; - // Should include conf + source files let additive_extensions: Vec<_> = additive_entries .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(additive_extensions.contains(&"conf")); // Additional pattern - assert!(additive_extensions.contains(&"rs")); // Source files - assert!(additive_extensions.contains(&"py")); // Source files - assert!(additive_extensions.contains(&"md")); // Source files + assert!(additive_extensions.contains(&"conf")); + assert!(additive_extensions.contains(&"rs")); + assert!(additive_extensions.contains(&"py")); + assert!(additive_extensions.contains(&"md")); - // Test replacement behavior with --only-include cli.include = None; cli.only_include = Some(vec!["*.conf".to_string()]); let replacement_entries = process_entries(&cli)?; - // Should include ONLY conf files assert_eq!(replacement_entries.len(), 1); assert!( replacement_entries[0] @@ -1011,12 +916,11 @@ mod tests { .iter() .filter_map(|e| e.path.extension().and_then(|ext| ext.to_str())) .collect(); - assert!(replacement_extensions.contains(&"conf")); // Only pattern - assert!(!replacement_extensions.contains(&"rs")); // No source files - assert!(!replacement_extensions.contains(&"py")); // No source files - assert!(!replacement_extensions.contains(&"md")); // No source files + assert!(replacement_extensions.contains(&"conf")); + assert!(!replacement_extensions.contains(&"rs")); + assert!(!replacement_extensions.contains(&"py")); + assert!(!replacement_extensions.contains(&"md")); - // Verify the counts are different assert!(additive_entries.len() > replacement_entries.len()); Ok(()) @@ -1026,32 +930,27 @@ mod tests { fn test_only_include_single_file_processing() -> Result<()> { let (dir, _files) = setup_test_directory()?; - // Create and test single file processing with a truly non-source file let config_path = dir.path().join("config.conf"); fs::write(&config_path, "key=value")?; let mut cli = create_test_cli(&config_path); cli.paths = vec![config_path.to_string_lossy().to_string()]; - // Test 1: Single non-source file without --only-include should be rejected cli.only_include = None; let entries = process_entries(&cli)?; assert_eq!(entries.len(), 0); - // Test 2: Single non-source file WITH --only-include should be accepted cli.only_include = Some(vec!["*.conf".to_string()]); let entries = process_entries(&cli)?; assert_eq!(entries.len(), 1); assert!(entries[0].path.extension().and_then(|ext| ext.to_str()) == Some("conf")); - // Test 3: Single source file WITH --only-include that doesn't match should be rejected let rs_path = dir.path().join("src/main.rs"); cli.paths = vec![rs_path.to_string_lossy().to_string()]; cli.only_include = Some(vec!["*.conf".to_string()]); let entries = process_entries(&cli)?; assert_eq!(entries.len(), 0); - // Test 4: Single source file WITH --only-include that matches should be accepted cli.only_include = Some(vec!["*.rs".to_string()]); let entries = process_entries(&cli)?; assert_eq!(entries.len(), 1); diff --git a/src/cli.rs b/src/cli.rs index d8147e4..22ae98a 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,27 +1,176 @@ -use crate::config::Config; -use clap::{Parser, ValueEnum}; -use serde::{Deserialize, Serialize}; use std::path::PathBuf; +use clap::{Parser, Subcommand, ValueEnum}; +use serde::{Deserialize, Serialize}; + +use glimpse::{Config, Exclude, OutputFormat, TokenizerType}; + #[derive(Debug, Clone, ValueEnum, Serialize, Deserialize)] -pub enum OutputFormat { +pub enum CliOutputFormat { Tree, Files, Both, } +impl From for OutputFormat { + fn from(format: CliOutputFormat) -> Self { + match format { + CliOutputFormat::Tree => OutputFormat::Tree, + CliOutputFormat::Files => OutputFormat::Files, + CliOutputFormat::Both => OutputFormat::Both, + } + } +} + +impl From for CliOutputFormat { + fn from(format: OutputFormat) -> Self { + match format { + OutputFormat::Tree => CliOutputFormat::Tree, + OutputFormat::Files => CliOutputFormat::Files, + OutputFormat::Both => CliOutputFormat::Both, + } + } +} + #[derive(Debug, Clone, ValueEnum)] -pub enum TokenizerType { +pub enum CliTokenizerType { Tiktoken, #[clap(name = "huggingface")] HuggingFace, } -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(untagged)] -pub enum Exclude { - File(PathBuf), - Pattern(String), +impl From for TokenizerType { + fn from(t: CliTokenizerType) -> Self { + match t { + CliTokenizerType::Tiktoken => TokenizerType::Tiktoken, + CliTokenizerType::HuggingFace => TokenizerType::HuggingFace, + } + } +} + +#[derive(Subcommand, Debug, Clone)] +pub enum Commands { + /// Generate call graph for a function + #[command(name = "code")] + Code(CodeArgs), + + /// Manage the code index + #[command(name = "index")] + Index(IndexArgs), +} + +#[derive(Parser, Debug, Clone)] +pub struct CodeArgs { + /// Target function in file:function format (e.g., src/main.rs:main or :main) + #[arg(required = true)] + pub target: String, + + /// Project root directory + #[arg(short, long, default_value = ".")] + pub root: PathBuf, + + /// Include callers (reverse call graph) + #[arg(long)] + pub callers: bool, + + /// Maximum depth to traverse + #[arg(short, long)] + pub depth: Option, + + /// Output file (default: stdout) + #[arg(short = 'f', long)] + pub file: Option, + + /// Strict mode: only resolve calls via imports (no global name matching) + #[arg(long)] + pub strict: bool, + + /// Precise mode: use LSP for type-aware resolution (slower but more accurate) + #[arg(long)] + pub precise: bool, + + /// Include hidden files and directories + #[arg(short = 'H', long)] + pub hidden: bool, + + /// Don't respect ignore files (.gitignore, .ignore, etc.) + #[arg(long)] + pub no_ignore: bool, +} + +#[derive(Parser, Debug, Clone)] +pub struct IndexArgs { + #[command(subcommand)] + pub command: IndexCommand, +} + +#[derive(Subcommand, Debug, Clone)] +pub enum IndexCommand { + /// Build or update the index for a project + Build { + /// Project root directory + #[arg(default_value = ".")] + path: PathBuf, + + /// Force rebuild (ignore existing index) + #[arg(short, long)] + force: bool, + + /// Use LSP for precise call resolution (slower but more accurate) + #[arg(long)] + precise: bool, + + /// Include hidden files and directories + #[arg(short = 'H', long)] + hidden: bool, + + /// Don't respect ignore files (.gitignore, .ignore, etc.) + #[arg(long)] + no_ignore: bool, + }, + + /// Clear the index for a project + Clear { + /// Project root directory + #[arg(default_value = ".")] + path: PathBuf, + }, + + /// Show index status and stats + Status { + /// Project root directory + #[arg(default_value = ".")] + path: PathBuf, + }, +} + +#[derive(Debug, Clone)] +pub struct FunctionTarget { + pub file: Option, + pub function: String, +} + +impl FunctionTarget { + pub fn parse(target: &str) -> anyhow::Result { + if let Some((file, func)) = target.rsplit_once(':') { + if file.is_empty() { + Ok(Self { + file: None, + function: func.to_string(), + }) + } else { + Ok(Self { + file: Some(PathBuf::from(file)), + function: func.to_string(), + }) + } + } else { + Ok(Self { + file: None, + function: target.to_string(), + }) + } + } } #[derive(Parser, Debug, Clone)] @@ -31,95 +180,79 @@ pub enum Exclude { version )] pub struct Cli { - /// Files or directories to analyze (multiple allowed), or a single URL/git repository + #[command(subcommand)] + pub command: Option, + #[arg(default_value = ".")] pub paths: Vec, - /// Print the config file path and exit #[arg(long)] pub config_path: bool, - /// Additional patterns to include (e.g. "*.rs,*.go") - adds to source file detection #[arg(short, long, value_delimiter = ',')] pub include: Option>, - /// Only include files matching these patterns (e.g. "*.yml,*.toml") - replaces source file detection #[arg(long, value_delimiter = ',')] pub only_include: Option>, - /// Additional patterns to exclude #[arg(short, long, value_parser = parse_exclude, value_delimiter = ',')] pub exclude: Option>, - /// Maximum file size in bytes #[arg(short, long)] pub max_size: Option, - /// Maximum directory depth #[arg(long)] pub max_depth: Option, - /// Output format (tree, files, or both) #[arg(short, long, value_enum)] - pub output: Option, + pub output: Option, - /// Output file path (optional) #[arg(short = 'f', long, num_args = 0..=1, default_missing_value = "GLIMPSE.md")] pub file: Option, - /// Init glimpse config file #[arg(long, default_value_t = false)] pub config: bool, - /// Print to stdout instead #[arg(short, long)] pub print: bool, - /// Number of threads for parallel processing #[arg(short, long)] pub threads: Option, - /// Show hidden files and directories #[arg(short = 'H', long)] pub hidden: bool, - /// Don't respect .gitignore files #[arg(long)] pub no_ignore: bool, - /// Ignore Token Count #[arg(long)] pub no_tokens: bool, - /// Tokenizer to use (tiktoken or huggingface) + /// Verbosity level (-v, -vv, -vvv) + #[arg(short, long, action = clap::ArgAction::Count)] + pub verbose: u8, + #[arg(long, value_enum)] - pub tokenizer: Option, + pub tokenizer: Option, - /// Model to use for HuggingFace tokenizer #[arg(long)] pub model: Option, - /// Path to local tokenizer file #[arg(long)] pub tokenizer_file: Option, - /// Interactive mode #[arg(long)] pub interactive: bool, - /// Output as Pdf #[arg(long)] pub pdf: Option, - /// Traverse sublinks when processing URLs #[arg(long)] pub traverse_links: bool, - /// Maximum depth to traverse sublinks (default: 1) #[arg(long)] pub link_depth: Option, - /// Output in XML format for better LLM compatibility #[arg(short = 'x', long)] pub xml: bool, } @@ -128,14 +261,12 @@ impl Cli { pub fn parse_with_config(config: &Config) -> anyhow::Result { let mut cli = Self::parse(); - // Apply config defaults if CLI args aren't specified cli.max_size = cli.max_size.or(Some(config.max_size)); cli.max_depth = cli.max_depth.or(Some(config.max_depth)); - cli.output = cli.output.or(Some(OutputFormat::from( - config.default_output_format.clone(), - ))); + cli.output = cli + .output + .or(Some(config.default_output_format.clone().into())); - // Merge excludes from config and CLI if let Some(mut excludes) = cli.exclude.take() { excludes.extend(config.default_excludes.clone()); cli.exclude = Some(excludes); @@ -143,19 +274,17 @@ impl Cli { cli.exclude = Some(config.default_excludes.clone()); } - // Set default tokenizer if none specified but token counting is enabled if !cli.no_tokens && cli.tokenizer.is_none() { cli.tokenizer = Some(match config.default_tokenizer.as_str() { - "huggingface" => TokenizerType::HuggingFace, - _ => TokenizerType::Tiktoken, + "huggingface" => CliTokenizerType::HuggingFace, + _ => CliTokenizerType::Tiktoken, }); } - // Set default model for HuggingFace if none specified if cli .tokenizer .as_ref() - .is_some_and(|t| matches!(t, TokenizerType::HuggingFace)) + .is_some_and(|t| matches!(t, CliTokenizerType::HuggingFace)) && cli.model.is_none() && cli.tokenizer_file.is_none() { @@ -172,7 +301,6 @@ impl Cli { } pub fn validate_args(&self, is_url: bool) -> anyhow::Result<()> { - // Validate that both include and only_include are not used together if self.include.is_some() && self.only_include.is_some() { return Err(anyhow::anyhow!( "Cannot use both --include and --only-include flags together. Use --include for additive behavior (add to source files) or --only-include for replacement behavior (only specified patterns)." @@ -195,6 +323,14 @@ impl Cli { } Ok(()) } + + pub fn get_output_format(&self) -> Option { + self.output.clone().map(|f| f.into()) + } + + pub fn get_tokenizer_type(&self) -> Option { + self.tokenizer.clone().map(|t| t.into()) + } } fn parse_exclude(value: &str) -> Result { @@ -205,3 +341,43 @@ fn parse_exclude(value: &str) -> Result { Ok(Exclude::Pattern(value.to_string())) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_function_target_parse_with_file() { + let target = FunctionTarget::parse("src/main.rs:main").unwrap(); + assert_eq!(target.file, Some(PathBuf::from("src/main.rs"))); + assert_eq!(target.function, "main"); + } + + #[test] + fn test_function_target_parse_without_file() { + let target = FunctionTarget::parse(":main").unwrap(); + assert_eq!(target.file, None); + assert_eq!(target.function, "main"); + } + + #[test] + fn test_function_target_parse_function_only() { + let target = FunctionTarget::parse("main").unwrap(); + assert_eq!(target.file, None); + assert_eq!(target.function, "main"); + } + + #[test] + fn test_function_target_parse_nested_path() { + let target = FunctionTarget::parse("src/code/graph.rs:build").unwrap(); + assert_eq!(target.file, Some(PathBuf::from("src/code/graph.rs"))); + assert_eq!(target.function, "build"); + } + + #[test] + fn test_function_target_parse_windows_path() { + let target = FunctionTarget::parse("C:\\src\\main.rs:main").unwrap(); + assert_eq!(target.file, Some(PathBuf::from("C:\\src\\main.rs"))); + assert_eq!(target.function, "main"); + } +} diff --git a/src/code/extract.rs b/src/code/extract.rs new file mode 100644 index 0000000..0bd5c16 --- /dev/null +++ b/src/code/extract.rs @@ -0,0 +1,305 @@ +use std::path::Path; + +use anyhow::{Context, Result}; +use tree_sitter::{Language, Node, Query, QueryCursor, StreamingIterator, Tree}; + +use super::grammar::{LanguageEntry, Registry}; +use super::index::{Call, Definition, DefinitionKind, Import, Span}; + +pub struct QuerySet { + pub definitions: Query, + pub calls: Query, + pub imports: Option, + def_name_idx: u32, + def_kind_indices: Vec<(u32, DefinitionKind)>, + call_name_idx: u32, + call_qualifier_idx: Option, + import_path_indices: Vec, + import_alias_idx: Option, +} + +impl QuerySet { + pub fn load(language: Language, entry: &LanguageEntry) -> Result { + let definitions = Query::new(&language, &entry.definition_query) + .with_context(|| format!("failed to compile definition query for {}", entry.name))?; + + let calls = Query::new(&language, &entry.call_query) + .with_context(|| format!("failed to compile call query for {}", entry.name))?; + + let imports = + if entry.import_query.trim().is_empty() { + None + } else { + Some(Query::new(&language, &entry.import_query).with_context(|| { + format!("failed to compile import query for {}", entry.name) + })?) + }; + + let def_name_idx = definitions + .capture_index_for_name("name") + .unwrap_or(u32::MAX); + + let def_kind_indices = Self::build_definition_kind_indices(&definitions); + + let call_name_idx = calls.capture_index_for_name("name").unwrap_or(u32::MAX); + let call_qualifier_idx = calls.capture_index_for_name("qualifier"); + + let (import_path_indices, import_alias_idx) = if let Some(ref q) = imports { + let path_indices = ["path", "source", "system_path", "local_path", "module"] + .iter() + .filter_map(|name| q.capture_index_for_name(name)) + .collect(); + let alias = q.capture_index_for_name("alias"); + (path_indices, alias) + } else { + (vec![], None) + }; + + Ok(Self { + definitions, + calls, + imports, + def_name_idx, + def_kind_indices, + call_name_idx, + call_qualifier_idx, + import_path_indices, + import_alias_idx, + }) + } + + fn build_definition_kind_indices(query: &Query) -> Vec<(u32, DefinitionKind)> { + let mut indices = Vec::new(); + + let kind_mappings = [ + ("function.definition", DefinitionKind::Function), + ("method.definition", DefinitionKind::Method), + ("class.definition", DefinitionKind::Class), + ("struct.definition", DefinitionKind::Struct), + ("enum.definition", DefinitionKind::Enum), + ("trait.definition", DefinitionKind::Trait), + ("interface.definition", DefinitionKind::Interface), + ("module.definition", DefinitionKind::Module), + ("object.definition", DefinitionKind::Other("object".into())), + ]; + + for (name, kind) in kind_mappings { + if let Some(idx) = query.capture_index_for_name(name) { + indices.push((idx, kind)); + } + } + + indices + } +} + +pub struct Extractor { + language: Language, + queries: QuerySet, +} + +impl Extractor { + pub fn new(lang_name: &str) -> Result { + let registry = Registry::global(); + let entry = registry + .get(lang_name) + .with_context(|| format!("unknown language: {}", lang_name))?; + + let language = super::grammar::load_language(lang_name)?; + let queries = QuerySet::load(language.clone(), entry)?; + + Ok(Self { language, queries }) + } + + pub fn from_extension(ext: &str) -> Result { + let registry = Registry::global(); + let entry = registry + .get_by_extension(ext) + .with_context(|| format!("no language for extension: {}", ext))?; + + let language = super::grammar::load_language(&entry.name)?; + let queries = QuerySet::load(language.clone(), entry)?; + + Ok(Self { language, queries }) + } + + pub fn language(&self) -> &Language { + &self.language + } + + pub fn extract_definitions(&self, tree: &Tree, source: &[u8], path: &Path) -> Vec { + let mut cursor = QueryCursor::new(); + let mut definitions = Vec::new(); + let mut matches = cursor.matches(&self.queries.definitions, tree.root_node(), source); + + while let Some(m) = matches.next() { + let mut name: Option<&str> = None; + let mut kind: Option = None; + let mut span_node: Option = None; + + for capture in m.captures { + if capture.index == self.queries.def_name_idx { + name = capture.node.utf8_text(source).ok(); + } + + for (kind_idx, kind_type) in &self.queries.def_kind_indices { + if capture.index == *kind_idx { + kind = Some(kind_type.clone()); + span_node = Some(capture.node); + break; + } + } + } + + if let (Some(name), Some(kind), Some(node)) = (name, kind, span_node) { + definitions.push(Definition { + name: name.to_string(), + kind, + span: node_to_span(&node), + file: path.to_path_buf(), + signature: None, + }); + } + } + + definitions + } + + pub fn extract_calls(&self, tree: &Tree, source: &[u8], path: &Path) -> Vec { + let definitions = self.extract_definitions(tree, source, path); + let mut cursor = QueryCursor::new(); + let mut calls = Vec::new(); + let mut matches = cursor.matches(&self.queries.calls, tree.root_node(), source); + + while let Some(m) = matches.next() { + let mut callee: Option<&str> = None; + let mut qualifier: Option<&str> = None; + let mut call_node: Option = None; + + for capture in m.captures { + if capture.index == self.queries.call_name_idx { + callee = capture.node.utf8_text(source).ok(); + call_node = Some(capture.node); + } + if Some(capture.index) == self.queries.call_qualifier_idx { + qualifier = capture.node.utf8_text(source).ok(); + } + } + + if let (Some(callee), Some(node)) = (callee, call_node) { + let caller = find_enclosing_definition(&definitions, node.start_byte()); + + calls.push(Call { + callee: callee.to_string(), + qualifier: qualifier.map(|s| s.to_string()), + span: node_to_span(&node), + file: path.to_path_buf(), + caller, + resolved: None, + }); + } + } + + calls + } + + pub fn extract_imports(&self, tree: &Tree, source: &[u8], path: &Path) -> Vec { + let Some(ref import_query) = self.queries.imports else { + return Vec::new(); + }; + + let mut cursor = QueryCursor::new(); + let mut imports = Vec::new(); + let mut seen_ranges = std::collections::HashSet::new(); + let mut matches = cursor.matches(import_query, tree.root_node(), source); + + while let Some(m) = matches.next() { + let mut module_path: Option<&str> = None; + let mut alias: Option<&str> = None; + let mut import_node: Option = None; + + for capture in m.captures { + if self.queries.import_path_indices.contains(&capture.index) + && module_path.is_none() + { + module_path = capture.node.utf8_text(source).ok(); + import_node = Some(capture.node); + } + + if self.queries.import_alias_idx == Some(capture.index) { + alias = capture.node.utf8_text(source).ok(); + } + } + + if let (Some(module_path), Some(node)) = (module_path, import_node) { + let range = (node.start_byte(), node.end_byte()); + if seen_ranges.contains(&range) { + continue; + } + seen_ranges.insert(range); + + let cleaned_path = clean_import_path(module_path); + + imports.push(Import { + module_path: cleaned_path, + alias: alias.map(|s| s.to_string()), + span: node_to_span(&node), + file: path.to_path_buf(), + }); + } + } + + imports + } +} + +fn node_to_span(node: &Node) -> Span { + Span { + start_byte: node.start_byte(), + end_byte: node.end_byte(), + start_line: node.start_position().row + 1, + end_line: node.end_position().row + 1, + } +} + +fn find_enclosing_definition(definitions: &[Definition], byte_offset: usize) -> Option { + definitions + .iter() + .filter(|d| d.span.start_byte <= byte_offset && byte_offset < d.span.end_byte) + .min_by_key(|d| d.span.end_byte - d.span.start_byte) + .map(|d| d.name.clone()) +} + +fn clean_import_path(path: &str) -> String { + path.trim_matches('"') + .trim_matches('\'') + .trim_matches('`') + .to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_clean_import_path() { + assert_eq!(clean_import_path("\"std::fs\""), "std::fs"); + assert_eq!(clean_import_path("'./module'"), "./module"); + assert_eq!(clean_import_path("std::path"), "std::path"); + } + + #[test] + fn test_span_fields() { + let span = Span { + start_byte: 10, + end_byte: 50, + start_line: 2, + end_line: 5, + }; + + assert_eq!(span.start_byte, 10); + assert_eq!(span.end_byte, 50); + assert_eq!(span.start_line, 2); + assert_eq!(span.end_line, 5); + } +} diff --git a/src/code/grammar.rs b/src/code/grammar.rs new file mode 100644 index 0000000..085175c --- /dev/null +++ b/src/code/grammar.rs @@ -0,0 +1,478 @@ +use std::collections::HashMap; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::sync::{Mutex, OnceLock}; + +use anyhow::{bail, Context, Result}; +use git2::Repository; +use libloading::{Library, Symbol}; +use once_cell::sync::Lazy; +use serde::Deserialize; +use tree_sitter::ffi::TSLanguage; +use tree_sitter::Language; + +type LanguageFn = unsafe extern "C" fn() -> *const TSLanguage; + +static REGISTRY: OnceLock = OnceLock::new(); +static LOADED_LANGUAGES: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::new())); +static LOADED_LIBRARIES: Lazy>> = Lazy::new(|| Mutex::new(Vec::new())); + +#[derive(Debug, Clone, Deserialize)] +pub struct LspConfig { + pub binary: String, + #[serde(default)] + pub args: Vec, + pub version: Option, + pub url_template: Option, + pub archive: Option, + pub binary_path: Option, + #[serde(default)] + pub targets: std::collections::HashMap, + pub npm_package: Option, + pub go_package: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct LanguageEntry { + pub name: String, + pub extensions: Vec, + pub repo: String, + pub branch: String, + pub symbol: String, + pub subpath: Option, + pub definition_query: String, + pub call_query: String, + pub import_query: String, + pub lsp: Option, +} + +#[derive(Debug, Deserialize)] +struct RegistryFile { + language: Vec, +} + +pub struct Registry { + languages: Vec, + by_name: HashMap, + by_extension: HashMap, +} + +impl Registry { + pub fn load() -> Result { + let registry_toml = include_str!("../../registry.toml"); + Self::from_str(registry_toml) + } + + fn from_str(content: &str) -> Result { + let file: RegistryFile = + toml::from_str(content).context("failed to parse registry.toml")?; + + let mut by_name = HashMap::new(); + let mut by_extension = HashMap::new(); + + for (idx, lang) in file.language.iter().enumerate() { + by_name.insert(lang.name.clone(), idx); + for ext in &lang.extensions { + by_extension.insert(ext.clone(), idx); + } + } + + Ok(Self { + languages: file.language, + by_name, + by_extension, + }) + } + + pub fn global() -> &'static Registry { + REGISTRY.get_or_init(|| Self::load().expect("failed to load registry")) + } + + pub fn get(&self, name: &str) -> Option<&LanguageEntry> { + self.by_name.get(name).map(|&idx| &self.languages[idx]) + } + + pub fn get_by_extension(&self, ext: &str) -> Option<&LanguageEntry> { + self.by_extension.get(ext).map(|&idx| &self.languages[idx]) + } + + pub fn languages(&self) -> &[LanguageEntry] { + &self.languages + } +} + +fn sources_dir() -> PathBuf { + cache_dir().join("sources") +} + +pub fn fetch_grammar(lang: &LanguageEntry) -> Result { + let sources = sources_dir(); + fs::create_dir_all(&sources)?; + + let dest = sources.join(&lang.name); + + if dest.exists() { + return Ok(dest); + } + + Repository::clone(&lang.repo, &dest) + .with_context(|| format!("failed to clone grammar repo: {}", lang.repo))?; + + let repo = Repository::open(&dest)?; + let (object, reference) = repo.revparse_ext(&lang.branch)?; + repo.checkout_tree(&object, None)?; + match reference { + Some(r) => repo.set_head(r.name().unwrap())?, + None => repo.set_head_detached(object.id())?, + } + + Ok(dest) +} + +pub fn compile_grammar(lang: &LanguageEntry, grammar_dir: &Path) -> Result { + let output_dir = cache_dir(); + fs::create_dir_all(&output_dir)?; + + let lib_name = format!("tree-sitter-{}", lang.name); + let output_path = output_dir.join(lib_filename(&lib_name)); + + if output_path.exists() { + return Ok(output_path); + } + + let src_dir = match &lang.subpath { + Some(subpath) => grammar_dir.join(subpath).join("src"), + None => grammar_dir.join("src"), + }; + + let parser_c = src_dir.join("parser.c"); + if !parser_c.exists() { + bail!("parser.c not found at: {}", parser_c.display()); + } + + let temp_dir = tempfile::tempdir()?; + let mut objects = Vec::new(); + + objects.push(compile_c_file(&parser_c, &src_dir, temp_dir.path())?); + + let scanner_c = src_dir.join("scanner.c"); + if scanner_c.exists() { + objects.push(compile_c_file(&scanner_c, &src_dir, temp_dir.path())?); + } + + let scanner_cc = src_dir.join("scanner.cc"); + if scanner_cc.exists() { + objects.push(compile_cpp_file(&scanner_cc, &src_dir, temp_dir.path())?); + } + + link_shared_library(&objects, &output_path)?; + + Ok(output_path) +} + +fn compile_c_file(source: &Path, include_dir: &Path, out_dir: &Path) -> Result { + let obj_name = source.file_stem().unwrap().to_string_lossy(); + let obj_path = out_dir.join(format!("{}.o", obj_name)); + + let status = Command::new("cc") + .args(["-c", "-O3", "-fPIC", "-w"]) + .arg("-I") + .arg(include_dir) + .arg("-o") + .arg(&obj_path) + .arg(source) + .status() + .context("failed to run cc")?; + + if !status.success() { + bail!("failed to compile: {}", source.display()); + } + + Ok(obj_path) +} + +fn compile_cpp_file(source: &Path, include_dir: &Path, out_dir: &Path) -> Result { + let obj_name = source.file_stem().unwrap().to_string_lossy(); + let obj_path = out_dir.join(format!("{}.o", obj_name)); + + let status = Command::new("c++") + .args(["-c", "-O3", "-fPIC", "-w"]) + .arg("-I") + .arg(include_dir) + .arg("-o") + .arg(&obj_path) + .arg(source) + .status() + .context("failed to run c++")?; + + if !status.success() { + bail!("failed to compile: {}", source.display()); + } + + Ok(obj_path) +} + +fn link_shared_library(objects: &[PathBuf], output: &Path) -> Result<()> { + let mut cmd = if cfg!(target_os = "macos") { + let mut c = Command::new("cc"); + c.args(["-dynamiclib", "-undefined", "dynamic_lookup"]); + c + } else if cfg!(target_os = "windows") { + let mut c = Command::new("link"); + c.arg("/DLL"); + c + } else { + let mut c = Command::new("cc"); + c.arg("-shared"); + c + }; + + for obj in objects { + cmd.arg(obj); + } + + if cfg!(target_os = "windows") { + cmd.arg(format!("/OUT:{}", output.display())); + } else { + cmd.arg("-o").arg(output); + } + + let status = cmd.status().context("failed to link shared library")?; + + if !status.success() { + bail!("failed to link shared library: {}", output.display()); + } + + Ok(()) +} + +fn lib_filename(name: &str) -> String { + if cfg!(target_os = "macos") { + format!("lib{}.dylib", name) + } else if cfg!(target_os = "windows") { + format!("{}.dll", name) + } else { + format!("lib{}.so", name) + } +} + +pub fn cache_dir() -> PathBuf { + dirs::data_local_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join("glimpse") + .join("grammars") +} + +pub fn lsp_dir() -> PathBuf { + dirs::data_local_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join("glimpse") + .join("lsp") +} + +pub fn load_language(name: &str) -> Result { + { + let cache = LOADED_LANGUAGES.lock().unwrap(); + if let Some(lang) = cache.get(name) { + return Ok(lang.clone()); + } + } + + let registry = Registry::global(); + let entry = registry + .get(name) + .with_context(|| format!("unknown language: {}", name))?; + + load_language_entry(entry) +} + +pub fn load_language_by_extension(ext: &str) -> Result { + let registry = Registry::global(); + let entry = registry + .get_by_extension(ext) + .with_context(|| format!("no language for extension: {}", ext))?; + + { + let cache = LOADED_LANGUAGES.lock().unwrap(); + if let Some(lang) = cache.get(&entry.name) { + return Ok(lang.clone()); + } + } + + load_language_entry(entry) +} + +fn load_language_entry(entry: &LanguageEntry) -> Result { + let lib_path = compiled_lib_path(entry); + + if !lib_path.exists() { + let grammar_dir = fetch_grammar(entry)?; + compile_grammar(entry, &grammar_dir)?; + } + + if !lib_path.exists() { + bail!("compiled grammar not found: {}", lib_path.display()); + } + + let language = unsafe { load_language_from_lib(&lib_path, &entry.symbol) }?; + + { + let mut cache = LOADED_LANGUAGES.lock().unwrap(); + cache.insert(entry.name.clone(), language.clone()); + } + + Ok(language) +} + +fn compiled_lib_path(entry: &LanguageEntry) -> PathBuf { + let lib_name = format!("tree-sitter-{}", entry.name); + cache_dir().join(lib_filename(&lib_name)) +} + +unsafe fn load_language_from_lib(lib_path: &PathBuf, symbol: &str) -> Result { + let lib = Library::new(lib_path) + .with_context(|| format!("failed to load library: {}", lib_path.display()))?; + + let func: Symbol = lib + .get(symbol.as_bytes()) + .with_context(|| format!("symbol not found: {}", symbol))?; + + let lang_ptr = func(); + let language = Language::from_raw(lang_ptr); + + LOADED_LIBRARIES.lock().unwrap().push(lib); + + Ok(language) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_load_registry() { + let registry = Registry::load().expect("failed to load registry"); + assert!(!registry.languages.is_empty()); + } + + #[test] + fn test_get_rust() { + let registry = Registry::load().unwrap(); + let rust = registry.get("rust").expect("rust language not found"); + assert_eq!(rust.name, "rust"); + assert!(rust.extensions.contains(&"rs".to_string())); + assert_eq!(rust.symbol, "tree_sitter_rust"); + } + + #[test] + fn test_get_by_extension() { + let registry = Registry::load().unwrap(); + let rust = registry + .get_by_extension("rs") + .expect("rs extension not found"); + assert_eq!(rust.name, "rust"); + } + + #[test] + fn test_typescript_subpath() { + let registry = Registry::load().unwrap(); + let ts = registry.get("typescript").expect("typescript not found"); + assert_eq!(ts.subpath, Some("typescript".to_string())); + } + + #[test] + fn test_all_languages_have_queries() { + let registry = Registry::load().unwrap(); + for lang in registry.languages() { + assert!( + !lang.definition_query.is_empty(), + "{} missing definition_query", + lang.name + ); + assert!( + !lang.call_query.is_empty(), + "{} missing call_query", + lang.name + ); + assert!( + !lang.import_query.is_empty(), + "{} missing import_query", + lang.name + ); + } + } + + #[test] + fn test_cache_dir() { + let dir = cache_dir(); + assert!(dir.to_string_lossy().contains("glimpse")); + assert!(dir.ends_with("grammars")); + } + + #[test] + fn test_sources_dir() { + let dir = sources_dir(); + assert!(dir.ends_with("grammars/sources")); + } + + #[test] + fn test_lib_filename() { + let name = "tree-sitter-rust"; + let filename = lib_filename(name); + if cfg!(target_os = "macos") { + assert_eq!(filename, "libtree-sitter-rust.dylib"); + } else if cfg!(target_os = "windows") { + assert_eq!(filename, "tree-sitter-rust.dll"); + } else { + assert_eq!(filename, "libtree-sitter-rust.so"); + } + } + + #[test] + fn test_compiled_lib_path() { + let registry = Registry::global(); + let rust = registry.get("rust").unwrap(); + let path = compiled_lib_path(rust); + assert!(path.to_string_lossy().contains("tree-sitter-rust")); + } + + #[test] + #[ignore] + fn test_load_rust_grammar() { + let language = load_language("rust").expect("failed to load rust grammar"); + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&language) + .expect("failed to set language"); + + let source = "fn main() { println!(\"Hello\"); }"; + let tree = parser.parse(source, None).expect("failed to parse"); + let root = tree.root_node(); + + assert_eq!(root.kind(), "source_file"); + assert!(root.child_count() > 0); + } + + #[test] + #[ignore] + fn test_load_by_extension() { + let language = load_language_by_extension("rs").expect("failed to load by extension"); + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&language) + .expect("failed to set language"); + + let tree = parser.parse("fn foo() {}", None).expect("failed to parse"); + assert_eq!(tree.root_node().kind(), "source_file"); + } + + #[test] + #[ignore] + fn test_language_caching() { + let lang1 = load_language("rust").expect("first load failed"); + let lang2 = load_language("rust").expect("second load failed"); + assert_eq!(lang1.node_kind_count(), lang2.node_kind_count()); + } +} diff --git a/src/code/graph.rs b/src/code/graph.rs new file mode 100644 index 0000000..b6ce631 --- /dev/null +++ b/src/code/graph.rs @@ -0,0 +1,704 @@ +use std::collections::{HashMap, HashSet, VecDeque}; +use std::path::Path; + +use super::index::{Definition, Index}; +use super::resolve::Resolver; + +pub type NodeId = usize; + +#[derive(Debug, Clone)] +pub struct CallGraphNode { + pub definition: Definition, + pub callees: HashSet, + pub callers: HashSet, +} + +#[derive(Debug, Default)] +pub struct CallGraph { + pub nodes: HashMap, + name_to_id: HashMap, + file_name_to_id: HashMap<(String, String), NodeId>, + next_id: NodeId, +} + +impl CallGraph { + pub fn new() -> Self { + Self { + nodes: HashMap::new(), + name_to_id: HashMap::new(), + file_name_to_id: HashMap::new(), + next_id: 0, + } + } + + pub fn build(index: &Index) -> Self { + Self::build_with_options(index, false) + } + + pub fn build_with_options(index: &Index, strict: bool) -> Self { + let resolver = Resolver::with_strict(index, strict); + let mut graph = CallGraph::new(); + + for def in index.definitions() { + graph.add_definition(def.clone()); + } + + for call in index.calls() { + let caller_id = call + .caller + .as_ref() + .and_then(|name| graph.find_node_by_file_and_name(&call.file, name)); + + let Some(caller_id) = caller_id else { + continue; + }; + + let callee_def = if let Some(ref resolved) = call.resolved { + index + .get(&resolved.target_file) + .and_then(|r| { + r.definitions + .iter() + .find(|d| d.name == resolved.target_name) + }) + .cloned() + } else { + resolver.resolve(&call.callee, call.qualifier.as_deref(), &call.file) + }; + + let callee_id = if let Some(def) = callee_def { + graph + .find_node_by_file_and_name(&def.file, &def.name) + .unwrap_or_else(|| graph.add_definition(def)) + } else { + continue; + }; + + graph.add_edge(caller_id, callee_id); + } + + graph + } + + fn add_definition(&mut self, definition: Definition) -> NodeId { + let file_key = definition.file.to_string_lossy().to_string(); + let composite_key = (file_key, definition.name.clone()); + + if let Some(&existing_id) = self.file_name_to_id.get(&composite_key) { + return existing_id; + } + + let id = self.next_id; + self.next_id += 1; + + let node = CallGraphNode { + definition: definition.clone(), + callees: HashSet::new(), + callers: HashSet::new(), + }; + + self.nodes.insert(id, node); + self.name_to_id.entry(definition.name.clone()).or_insert(id); + self.file_name_to_id.insert(composite_key, id); + + id + } + + fn add_edge(&mut self, caller: NodeId, callee: NodeId) { + if caller == callee { + return; + } + + if let Some(caller_node) = self.nodes.get_mut(&caller) { + caller_node.callees.insert(callee); + } + + if let Some(callee_node) = self.nodes.get_mut(&callee) { + callee_node.callers.insert(caller); + } + } + + pub fn find_node(&self, name: &str) -> Option { + self.name_to_id.get(name).copied() + } + + pub fn find_node_by_file_and_name(&self, file: &Path, name: &str) -> Option { + let file_key = file.to_string_lossy().to_string(); + self.file_name_to_id + .get(&(file_key, name.to_string())) + .copied() + } + + pub fn get_node(&self, id: NodeId) -> Option<&CallGraphNode> { + self.nodes.get(&id) + } + + pub fn get_callees(&self, node_id: NodeId) -> Vec<&CallGraphNode> { + self.nodes + .get(&node_id) + .map(|node| { + node.callees + .iter() + .filter_map(|id| self.nodes.get(id)) + .collect() + }) + .unwrap_or_default() + } + + pub fn get_callers(&self, node_id: NodeId) -> Vec<&CallGraphNode> { + self.nodes + .get(&node_id) + .map(|node| { + node.callers + .iter() + .filter_map(|id| self.nodes.get(id)) + .collect() + }) + .unwrap_or_default() + } + + pub fn get_transitive_callees(&self, node_id: NodeId) -> Vec<&CallGraphNode> { + let mut visited = HashSet::new(); + let mut result = Vec::new(); + let mut queue = VecDeque::new(); + + if let Some(node) = self.nodes.get(&node_id) { + for &callee_id in &node.callees { + queue.push_back(callee_id); + } + } + + while let Some(current_id) = queue.pop_front() { + if !visited.insert(current_id) { + continue; + } + + if let Some(node) = self.nodes.get(¤t_id) { + result.push(node); + + for &callee_id in &node.callees { + if !visited.contains(&callee_id) { + queue.push_back(callee_id); + } + } + } + } + + result + } + + pub fn get_transitive_callers(&self, node_id: NodeId) -> Vec<&CallGraphNode> { + let mut visited = HashSet::new(); + let mut result = Vec::new(); + let mut queue = VecDeque::new(); + + if let Some(node) = self.nodes.get(&node_id) { + for &caller_id in &node.callers { + queue.push_back(caller_id); + } + } + + while let Some(current_id) = queue.pop_front() { + if !visited.insert(current_id) { + continue; + } + + if let Some(node) = self.nodes.get(¤t_id) { + result.push(node); + + for &caller_id in &node.callers { + if !visited.contains(&caller_id) { + queue.push_back(caller_id); + } + } + } + } + + result + } + + pub fn post_order(&self, node_id: NodeId) -> Vec { + let mut visited = HashSet::new(); + let mut result = Vec::new(); + self.post_order_dfs(node_id, &mut visited, &mut result); + result + } + + fn post_order_dfs( + &self, + node_id: NodeId, + visited: &mut HashSet, + result: &mut Vec, + ) { + if !visited.insert(node_id) { + return; + } + + if let Some(node) = self.nodes.get(&node_id) { + for &callee_id in &node.callees { + self.post_order_dfs(callee_id, visited, result); + } + } + + result.push(node_id); + } + + pub fn post_order_definitions(&self, node_id: NodeId) -> Vec<&Definition> { + self.post_order(node_id) + .into_iter() + .filter_map(|id| self.nodes.get(&id).map(|n| &n.definition)) + .collect() + } + + pub fn get_callees_to_depth(&self, node_id: NodeId, max_depth: usize) -> Vec { + let mut visited = HashSet::new(); + let mut result = Vec::new(); + let mut queue = VecDeque::new(); + + queue.push_back((node_id, 0)); + visited.insert(node_id); + + while let Some((current_id, depth)) = queue.pop_front() { + result.push(current_id); + + if depth >= max_depth { + continue; + } + + if let Some(node) = self.nodes.get(¤t_id) { + for &callee_id in &node.callees { + if visited.insert(callee_id) { + queue.push_back((callee_id, depth + 1)); + } + } + } + } + + result + } + + pub fn get_callers_to_depth(&self, node_id: NodeId, max_depth: usize) -> Vec { + let mut visited = HashSet::new(); + let mut result = Vec::new(); + let mut queue = VecDeque::new(); + + queue.push_back((node_id, 0)); + visited.insert(node_id); + + while let Some((current_id, depth)) = queue.pop_front() { + result.push(current_id); + + if depth >= max_depth { + continue; + } + + if let Some(node) = self.nodes.get(¤t_id) { + for &caller_id in &node.callers { + if visited.insert(caller_id) { + queue.push_back((caller_id, depth + 1)); + } + } + } + } + + result + } + + pub fn definitions_to_depth(&self, node_id: NodeId, max_depth: usize) -> Vec<&Definition> { + self.get_callees_to_depth(node_id, max_depth) + .into_iter() + .filter_map(|id| self.nodes.get(&id).map(|n| &n.definition)) + .collect() + } + + pub fn node_count(&self) -> usize { + self.nodes.len() + } + + pub fn edge_count(&self) -> usize { + self.nodes.values().map(|n| n.callees.len()).sum() + } + + pub fn roots(&self) -> Vec { + self.nodes + .iter() + .filter(|(_, node)| node.callers.is_empty()) + .map(|(&id, _)| id) + .collect() + } + + pub fn leaves(&self) -> Vec { + self.nodes + .iter() + .filter(|(_, node)| node.callees.is_empty()) + .map(|(&id, _)| id) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::super::index::{Call, DefinitionKind, FileRecord, Span}; + use super::*; + use std::path::PathBuf; + + fn make_span() -> Span { + Span { + start_byte: 0, + end_byte: 100, + start_line: 1, + end_line: 10, + } + } + + fn make_definition(name: &str, file: &str) -> Definition { + Definition { + name: name.to_string(), + kind: DefinitionKind::Function, + span: make_span(), + file: PathBuf::from(file), + signature: None, + } + } + + fn make_call(callee: &str, caller: Option<&str>, file: &str) -> Call { + Call { + callee: callee.to_string(), + qualifier: None, + span: make_span(), + file: PathBuf::from(file), + caller: caller.map(|s| s.to_string()), + resolved: None, + } + } + + #[test] + fn test_build_empty_index() { + let index = Index::new(); + let graph = CallGraph::build(&index); + assert_eq!(graph.node_count(), 0); + assert_eq!(graph.edge_count(), 0); + } + + #[test] + fn test_build_definitions_only() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/main.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("main", "src/main.rs"), + make_definition("helper", "src/main.rs"), + ], + calls: vec![], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + assert_eq!(graph.node_count(), 2); + assert_eq!(graph.edge_count(), 0); + } + + #[test] + fn test_build_with_calls() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/main.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("main", "src/main.rs"), + make_definition("helper", "src/main.rs"), + ], + calls: vec![make_call("helper", Some("main"), "src/main.rs")], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + assert_eq!(graph.node_count(), 2); + assert_eq!(graph.edge_count(), 1); + + let main_id = graph.find_node("main").unwrap(); + let callees = graph.get_callees(main_id); + assert_eq!(callees.len(), 1); + assert_eq!(callees[0].definition.name, "helper"); + } + + #[test] + fn test_get_callees_and_callers() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("a", "src/lib.rs"), + make_definition("b", "src/lib.rs"), + make_definition("c", "src/lib.rs"), + ], + calls: vec![ + make_call("b", Some("a"), "src/lib.rs"), + make_call("c", Some("a"), "src/lib.rs"), + make_call("c", Some("b"), "src/lib.rs"), + ], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + + let a_id = graph.find_node("a").unwrap(); + let c_id = graph.find_node("c").unwrap(); + + let a_callees = graph.get_callees(a_id); + assert_eq!(a_callees.len(), 2); + + let c_callers = graph.get_callers(c_id); + assert_eq!(c_callers.len(), 2); + + let a_callers = graph.get_callers(a_id); + assert!(a_callers.is_empty()); + + let c_callees = graph.get_callees(c_id); + assert!(c_callees.is_empty()); + + assert_eq!(graph.roots(), vec![a_id]); + assert_eq!(graph.leaves(), vec![c_id]); + } + + #[test] + fn test_transitive_callees() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("a", "src/lib.rs"), + make_definition("b", "src/lib.rs"), + make_definition("c", "src/lib.rs"), + make_definition("d", "src/lib.rs"), + ], + calls: vec![ + make_call("b", Some("a"), "src/lib.rs"), + make_call("c", Some("b"), "src/lib.rs"), + make_call("d", Some("c"), "src/lib.rs"), + ], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + let a_id = graph.find_node("a").unwrap(); + + let transitive = graph.get_transitive_callees(a_id); + assert_eq!(transitive.len(), 3); + + let names: HashSet<_> = transitive + .iter() + .map(|n| n.definition.name.as_str()) + .collect(); + assert!(names.contains("b")); + assert!(names.contains("c")); + assert!(names.contains("d")); + } + + #[test] + fn test_transitive_callees_with_cycle() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("a", "src/lib.rs"), + make_definition("b", "src/lib.rs"), + make_definition("c", "src/lib.rs"), + ], + calls: vec![ + make_call("b", Some("a"), "src/lib.rs"), + make_call("c", Some("b"), "src/lib.rs"), + make_call("a", Some("c"), "src/lib.rs"), + ], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + let a_id = graph.find_node("a").unwrap(); + + let transitive = graph.get_transitive_callees(a_id); + assert_eq!(transitive.len(), 3); + } + + #[test] + fn test_post_order() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("a", "src/lib.rs"), + make_definition("b", "src/lib.rs"), + make_definition("c", "src/lib.rs"), + ], + calls: vec![ + make_call("b", Some("a"), "src/lib.rs"), + make_call("c", Some("b"), "src/lib.rs"), + ], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + let a_id = graph.find_node("a").unwrap(); + let b_id = graph.find_node("b").unwrap(); + let c_id = graph.find_node("c").unwrap(); + + let order = graph.post_order(a_id); + + let c_pos = order.iter().position(|&id| id == c_id).unwrap(); + let b_pos = order.iter().position(|&id| id == b_id).unwrap(); + let a_pos = order.iter().position(|&id| id == a_id).unwrap(); + + assert!(c_pos < b_pos); + assert!(b_pos < a_pos); + } + + #[test] + fn test_post_order_with_cycle() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("a", "src/lib.rs"), + make_definition("b", "src/lib.rs"), + ], + calls: vec![ + make_call("b", Some("a"), "src/lib.rs"), + make_call("a", Some("b"), "src/lib.rs"), + ], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + let a_id = graph.find_node("a").unwrap(); + + let order = graph.post_order(a_id); + assert_eq!(order.len(), 2); + } + + #[test] + fn test_post_order_definitions() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![ + make_definition("main", "src/lib.rs"), + make_definition("init", "src/lib.rs"), + ], + calls: vec![make_call("init", Some("main"), "src/lib.rs")], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + let main_id = graph.find_node("main").unwrap(); + + let defs = graph.post_order_definitions(main_id); + assert_eq!(defs.len(), 2); + assert_eq!(defs[0].name, "init"); + assert_eq!(defs[1].name, "main"); + } + + #[test] + fn test_no_self_loops() { + let mut index = Index::new(); + index.update(FileRecord { + path: PathBuf::from("src/lib.rs"), + mtime: 0, + size: 0, + definitions: vec![make_definition("recursive", "src/lib.rs")], + calls: vec![make_call("recursive", Some("recursive"), "src/lib.rs")], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + let id = graph.find_node("recursive").unwrap(); + let node = graph.get_node(id).unwrap(); + + assert!(node.callees.is_empty()); + assert!(node.callers.is_empty()); + } + + #[test] + fn test_cross_file_calls() { + let mut index = Index::new(); + + index.update(FileRecord { + path: PathBuf::from("src/main.rs"), + mtime: 0, + size: 0, + definitions: vec![make_definition("main", "src/main.rs")], + calls: vec![make_call("helper", Some("main"), "src/main.rs")], + imports: vec![], + }); + + index.update(FileRecord { + path: PathBuf::from("src/utils.rs"), + mtime: 0, + size: 0, + definitions: vec![make_definition("helper", "src/utils.rs")], + calls: vec![], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + + let main_id = graph.find_node("main").unwrap(); + let callees = graph.get_callees(main_id); + + assert_eq!(callees.len(), 1); + assert_eq!(callees[0].definition.name, "helper"); + assert_eq!(callees[0].definition.file, PathBuf::from("src/utils.rs")); + } + + #[test] + fn test_find_node_by_file_and_name() { + let mut index = Index::new(); + + index.update(FileRecord { + path: PathBuf::from("src/a.rs"), + mtime: 0, + size: 0, + definitions: vec![make_definition("foo", "src/a.rs")], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: PathBuf::from("src/b.rs"), + mtime: 0, + size: 0, + definitions: vec![make_definition("foo", "src/b.rs")], + calls: vec![], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + + let a_id = graph.find_node_by_file_and_name(Path::new("src/a.rs"), "foo"); + let b_id = graph.find_node_by_file_and_name(Path::new("src/b.rs"), "foo"); + + assert!(a_id.is_some()); + assert!(b_id.is_some()); + assert_ne!(a_id, b_id); + + let a_node = graph.get_node(a_id.unwrap()).unwrap(); + let b_node = graph.get_node(b_id.unwrap()).unwrap(); + + assert_eq!(a_node.definition.file, PathBuf::from("src/a.rs")); + assert_eq!(b_node.definition.file, PathBuf::from("src/b.rs")); + } +} diff --git a/src/code/index.rs b/src/code/index.rs new file mode 100644 index 0000000..2c54639 --- /dev/null +++ b/src/code/index.rs @@ -0,0 +1,339 @@ +use std::collections::hash_map::DefaultHasher; +use std::collections::HashMap; +use std::fs::{self, File}; +use std::hash::{Hash, Hasher}; +use std::io::{BufReader, BufWriter}; +use std::path::{Path, PathBuf}; +use std::time::SystemTime; + +use anyhow::{Context, Result}; +use serde::{Deserialize, Serialize}; + +pub const INDEX_FILE: &str = "index.bin"; +pub const INDEX_VERSION: u32 = 3; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Span { + pub start_byte: usize, + pub end_byte: usize, + pub start_line: usize, + pub end_line: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Definition { + pub name: String, + pub kind: DefinitionKind, + pub span: Span, + pub file: PathBuf, + pub signature: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ResolvedCall { + pub target_file: PathBuf, + pub target_name: String, + pub target_span: Span, + pub signature: Option, + pub receiver_type: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum DefinitionKind { + Function, + Method, + Class, + Struct, + Enum, + Trait, + Interface, + Module, + Other(String), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Call { + pub callee: String, + pub qualifier: Option, + pub span: Span, + pub file: PathBuf, + pub caller: Option, + pub resolved: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Import { + pub module_path: String, + pub alias: Option, + pub span: Span, + pub file: PathBuf, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileRecord { + pub path: PathBuf, + pub mtime: u64, + pub size: u64, + pub definitions: Vec, + pub calls: Vec, + pub imports: Vec, +} + +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct Index { + pub files: HashMap, + pub version: u32, +} + +impl Index { + pub fn new() -> Self { + Self { + files: HashMap::new(), + version: INDEX_VERSION, + } + } + + pub fn is_stale(&self, path: &Path, mtime: u64, size: u64) -> bool { + match self.files.get(path) { + Some(record) => record.mtime != mtime || record.size != size, + None => true, + } + } + + pub fn update(&mut self, record: FileRecord) { + self.files.insert(record.path.clone(), record); + } + + pub fn remove(&mut self, path: &Path) { + self.files.remove(path); + } + + pub fn get(&self, path: &Path) -> Option<&FileRecord> { + self.files.get(path) + } + + pub fn definitions(&self) -> impl Iterator { + self.files.values().flat_map(|f| &f.definitions) + } + + pub fn calls(&self) -> impl Iterator { + self.files.values().flat_map(|f| &f.calls) + } + + pub fn imports(&self) -> impl Iterator { + self.files.values().flat_map(|f| &f.imports) + } +} + +pub fn file_fingerprint(path: &Path) -> Result<(u64, u64)> { + let meta = fs::metadata(path).with_context(|| format!("failed to stat {}", path.display()))?; + let mtime = meta + .modified() + .unwrap_or(SystemTime::UNIX_EPOCH) + .duration_since(SystemTime::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + let size = meta.len(); + Ok((mtime, size)) +} + +fn hash_path(path: &Path) -> String { + let mut hasher = DefaultHasher::new(); + path.hash(&mut hasher); + format!("{:016x}", hasher.finish()) +} + +fn index_dir() -> Result { + dirs::data_local_dir() + .map(|d| d.join("glimpse").join("indices")) + .context("could not determine local data directory") +} + +pub fn index_path(root: &Path) -> Result { + let canonical = root.canonicalize().unwrap_or_else(|_| root.to_path_buf()); + let hash = hash_path(&canonical); + Ok(index_dir()?.join(hash).join(INDEX_FILE)) +} + +pub fn save_index(index: &Index, root: &Path) -> Result<()> { + let path = index_path(root)?; + let dir = path.parent().unwrap(); + fs::create_dir_all(dir).with_context(|| format!("failed to create {}", dir.display()))?; + + let file = + File::create(&path).with_context(|| format!("failed to create {}", path.display()))?; + let writer = BufWriter::new(file); + + bincode::serialize_into(writer, index).context("failed to serialize index")?; + Ok(()) +} + +pub fn load_index(root: &Path) -> Result> { + let path = index_path(root)?; + if !path.exists() { + return Ok(None); + } + + let file = File::open(&path).with_context(|| format!("failed to open {}", path.display()))?; + let reader = BufReader::new(file); + + let index: Index = match bincode::deserialize_from(reader) { + Ok(idx) => idx, + Err(_) => return Ok(None), + }; + + if index.version != INDEX_VERSION { + return Ok(None); + } + + Ok(Some(index)) +} + +pub fn clear_index(root: &Path) -> Result<()> { + let path = index_path(root)?; + if let Some(dir) = path.parent() { + if dir.exists() { + fs::remove_dir_all(dir) + .with_context(|| format!("failed to remove {}", dir.display()))?; + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_test_record(name: &str) -> FileRecord { + FileRecord { + path: PathBuf::from(format!("src/{}.rs", name)), + mtime: 1234567890, + size: 1024, + definitions: vec![Definition { + name: format!("{}_fn", name), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 100, + start_line: 1, + end_line: 10, + }, + file: PathBuf::from(format!("src/{}.rs", name)), + signature: None, + }], + calls: vec![Call { + callee: "other_fn".to_string(), + qualifier: None, + span: Span { + start_byte: 50, + end_byte: 60, + start_line: 5, + end_line: 5, + }, + file: PathBuf::from(format!("src/{}.rs", name)), + caller: Some(format!("{}_fn", name)), + resolved: None, + }], + imports: vec![Import { + module_path: "std::fs".to_string(), + alias: None, + span: Span { + start_byte: 0, + end_byte: 15, + start_line: 1, + end_line: 1, + }, + file: PathBuf::from(format!("src/{}.rs", name)), + }], + } + } + + #[test] + fn test_index_update_and_get() { + let mut index = Index::new(); + let record = make_test_record("main"); + + index.update(record.clone()); + let got = index.get(Path::new("src/main.rs")).unwrap(); + + assert_eq!(got.path, record.path); + assert_eq!(got.definitions.len(), 1); + assert_eq!(got.calls.len(), 1); + assert_eq!(got.imports.len(), 1); + } + + #[test] + fn test_index_is_stale() { + let mut index = Index::new(); + let record = make_test_record("lib"); + index.update(record); + + assert!(!index.is_stale(Path::new("src/lib.rs"), 1234567890, 1024)); + assert!(index.is_stale(Path::new("src/lib.rs"), 1234567891, 1024)); + assert!(index.is_stale(Path::new("src/lib.rs"), 1234567890, 2048)); + assert!(index.is_stale(Path::new("src/other.rs"), 1234567890, 1024)); + } + + #[test] + fn test_index_remove() { + let mut index = Index::new(); + index.update(make_test_record("foo")); + index.update(make_test_record("bar")); + + assert!(index.get(Path::new("src/foo.rs")).is_some()); + index.remove(Path::new("src/foo.rs")); + assert!(index.get(Path::new("src/foo.rs")).is_none()); + assert!(index.get(Path::new("src/bar.rs")).is_some()); + } + + #[test] + fn test_index_iterators() { + let mut index = Index::new(); + index.update(make_test_record("a")); + index.update(make_test_record("b")); + + assert_eq!(index.definitions().count(), 2); + assert_eq!(index.calls().count(), 2); + assert_eq!(index.imports().count(), 2); + } + + #[test] + fn test_index_path_uses_data_dir() { + let path = index_path(Path::new("/some/project")).unwrap(); + let data_dir = dirs::data_local_dir().unwrap(); + assert!(path.starts_with(data_dir.join("glimpse").join("indices"))); + assert!(path.ends_with(INDEX_FILE)); + } + + #[test] + fn test_index_path_different_projects() { + let path1 = index_path(Path::new("/project/a")).unwrap(); + let path2 = index_path(Path::new("/project/b")).unwrap(); + assert_ne!(path1, path2); + } + + #[test] + fn test_save_and_load_index() { + let project_dir = tempfile::tempdir().unwrap(); + let mut index = Index::new(); + index.update(make_test_record("main")); + index.update(make_test_record("lib")); + + save_index(&index, project_dir.path()).unwrap(); + + let loaded = load_index(project_dir.path()).unwrap().unwrap(); + assert_eq!(loaded.version, INDEX_VERSION); + assert_eq!(loaded.files.len(), 2); + assert!(loaded.get(Path::new("src/main.rs")).is_some()); + assert!(loaded.get(Path::new("src/lib.rs")).is_some()); + + clear_index(project_dir.path()).unwrap(); + } + + #[test] + fn test_load_index_nonexistent() { + let dir = tempfile::tempdir().unwrap(); + let result = load_index(dir.path()).unwrap(); + assert!(result.is_none()); + } +} diff --git a/src/code/lsp.rs b/src/code/lsp.rs new file mode 100644 index 0000000..0c097bc --- /dev/null +++ b/src/code/lsp.rs @@ -0,0 +1,1283 @@ +use std::collections::{HashMap, HashSet}; +use std::fs::{self, File}; +use std::io::{BufRead, BufReader, BufWriter, Read, Write}; +use std::path::{Path, PathBuf}; +use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; +use std::sync::atomic::{AtomicI32, Ordering}; + +use anyhow::{bail, Context, Result}; +use tracing::{debug, trace, warn}; +use flate2::read::GzDecoder; +use indicatif::{ProgressBar, ProgressStyle}; +use lsp_types::{ + ClientCapabilities, DidOpenTextDocumentParams, GotoDefinitionParams, GotoDefinitionResponse, + InitializeParams, InitializedParams, Position, TextDocumentIdentifier, + TextDocumentPositionParams, Uri, WorkspaceFolder, +}; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; + +use super::grammar::{lsp_dir, LspConfig, Registry}; +use super::index::{Call, Definition, Index, ResolvedCall}; + +fn current_target() -> &'static str { + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] + { + "x86_64-unknown-linux-gnu" + } + #[cfg(all(target_os = "linux", target_arch = "aarch64"))] + { + "aarch64-unknown-linux-gnu" + } + #[cfg(all(target_os = "macos", target_arch = "x86_64"))] + { + "x86_64-apple-darwin" + } + #[cfg(all(target_os = "macos", target_arch = "aarch64"))] + { + "aarch64-apple-darwin" + } + #[cfg(all(target_os = "windows", target_arch = "x86_64"))] + { + "x86_64-pc-windows-msvc" + } + #[cfg(not(any( + all(target_os = "linux", target_arch = "x86_64"), + all(target_os = "linux", target_arch = "aarch64"), + all(target_os = "macos", target_arch = "x86_64"), + all(target_os = "macos", target_arch = "aarch64"), + all(target_os = "windows", target_arch = "x86_64"), + )))] + { + "unknown" + } +} + +fn binary_extension() -> &'static str { + if cfg!(target_os = "windows") { + ".exe" + } else { + "" + } +} + +fn lsp_binary_path(lsp: &LspConfig) -> PathBuf { + let dir = lsp_dir(); + dir.join(format!("{}{}", lsp.binary, binary_extension())) +} + +fn path_to_uri(path: &Path) -> Result { + let url = url::Url::from_file_path(path) + .map_err(|_| anyhow::anyhow!("invalid path: {}", path.display()))?; + url.as_str().parse().context("failed to convert URL to URI") +} + +fn uri_to_path(uri: &Uri) -> Option { + let url = url::Url::parse(uri.as_str()).ok()?; + url.to_file_path().ok() +} + +fn detect_zig_version_from_zon(root: &Path) -> Option { + let zon_path = root.join("build.zig.zon"); + let content = fs::read_to_string(zon_path).ok()?; + let re = regex::Regex::new(r#"\.minimum_zig_version\s*=\s*"([^"]+)""#).ok()?; + let caps = re.captures(&content)?; + Some(caps.get(1)?.as_str().to_string()) +} + +fn detect_zig_version(root: &Path) -> Option { + let zig_version = if let Ok(output) = Command::new("zig").arg("version").output() { + if output.status.success() { + let version_str = String::from_utf8_lossy(&output.stdout); + let version = version_str.trim(); + version.split('-').next().map(|s| s.to_string()) + } else { + None + } + } else { + None + }; + + let zig_version = zig_version.or_else(|| detect_zig_version_from_zon(root))?; + + // zls releases may lag behind zig - try to find matching major.minor + // e.g., zig 0.15.2 -> try 0.15.2, 0.15.1, 0.15.0 + let parts: Vec<&str> = zig_version.split('.').collect(); + if parts.len() >= 2 { + let major_minor = format!("{}.{}", parts[0], parts[1]); + // Try decreasing patch versions + for patch in (0..=10).rev() { + let version = format!("{}.{}", major_minor, patch); + let url = format!( + "https://github.com/zigtools/zls/releases/download/{}/zls-x86_64-linux.tar.xz", + version + ); + if let Ok(resp) = reqwest::blocking::Client::new() + .head(&url) + .send() + { + if resp.status().is_success() || resp.status().as_u16() == 302 { + debug!(zig_version = %zig_version, zls_version = %version, "found matching zls version"); + return Some(version); + } + } + } + } + + Some(zig_version) +} + +#[allow(clippy::literal_string_with_formatting_args)] +fn download_and_extract(lsp: &LspConfig, root: &Path) -> Result { + let Some(ref url_template) = lsp.url_template else { + bail!("no download URL configured for {}", lsp.binary); + }; + + let version = if lsp.binary == "zls" { + detect_zig_version(root) + .with_context(|| "failed to detect zig version. Install zig or install zls manually")? + } else { + lsp.version + .clone() + .with_context(|| format!("no version configured for {}", lsp.binary))? + }; + + let target = current_target(); + let Some(target_name) = lsp.targets.get(target) else { + bail!( + "no pre-built binary available for {} on {}", + lsp.binary, + target + ); + }; + + let url = url_template + .replace("{version}", &version) + .replace("{target}", target_name); + + let dir = lsp_dir(); + fs::create_dir_all(&dir)?; + + let response = + reqwest::blocking::get(&url).with_context(|| format!("failed to download {}", url))?; + + if !response.status().is_success() { + bail!("download failed with status: {}", response.status()); + } + + let total_size = response.content_length().unwrap_or(0); + let pb = ProgressBar::new(total_size); + pb.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} [{bar:40.cyan/blue}] {bytes}/{total_bytes} ({eta}) downloading {msg}") + .expect("valid template") + .progress_chars("#>-"), + ); + pb.set_message(lsp.binary.clone()); + + let mut bytes = Vec::new(); + let mut reader = response; + let mut buffer = [0u8; 8192]; + loop { + let n = reader.read(&mut buffer)?; + if n == 0 { + break; + } + bytes.extend_from_slice(&buffer[..n]); + pb.set_position(bytes.len() as u64); + } + pb.finish_and_clear(); + let archive_type = lsp.archive.as_deref().unwrap_or("gz"); + + let final_path = lsp_binary_path(lsp); + + match archive_type { + "gz" => { + let mut decoder = GzDecoder::new(&bytes[..]); + let mut output = File::create(&final_path)?; + std::io::copy(&mut decoder, &mut output)?; + } + "tar.xz" => { + let decoder = xz2::read::XzDecoder::new(&bytes[..]); + let mut archive = tar::Archive::new(decoder); + + let binary_name = format!("{}{}", lsp.binary, binary_extension()); + let mut found = false; + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?; + if let Some(name) = path.file_name() { + if name == binary_name.as_str() { + let mut output = File::create(&final_path)?; + std::io::copy(&mut entry, &mut output)?; + found = true; + break; + } + } + } + + if !found { + bail!("binary {} not found in tar.xz archive", binary_name); + } + } + "zip" => { + let cursor = std::io::Cursor::new(&bytes); + let mut archive = zip::ZipArchive::new(cursor)?; + + let binary_path = if let Some(ref path) = lsp.binary_path { + path.replace("{version}", &version) + } else { + lsp.binary.clone() + }; + + let mut found = false; + for i in 0..archive.len() { + let mut file = archive.by_index(i)?; + let name = file.name().to_string(); + + if name.ends_with(&binary_path) + || name.ends_with(&format!("{}{}", binary_path, binary_extension())) + { + let mut output = File::create(&final_path)?; + std::io::copy(&mut file, &mut output)?; + found = true; + break; + } + } + + if !found { + bail!("binary {} not found in archive", binary_path); + } + } + other => bail!("unsupported archive type: {}", other), + } + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(&final_path)?.permissions(); + perms.set_mode(0o755); + fs::set_permissions(&final_path, perms)?; + } + + eprintln!("Installed {} to {}", lsp.binary, final_path.display()); + Ok(final_path) +} + +fn install_npm_package(lsp: &LspConfig) -> Result { + let Some(ref package) = lsp.npm_package else { + bail!("no npm package configured for {}", lsp.binary); + }; + + let (pkg_manager, pkg_manager_path) = if let Ok(bun) = which::which("bun") { + ("bun", bun) + } else if let Ok(npm) = which::which("npm") { + ("npm", npm) + } else { + bail!("neither bun nor npm found. Install one of them or install the LSP manually"); + }; + + let pkg_dir = lsp_dir().join(format!("{}-pkg", &lsp.binary)); + fs::create_dir_all(&pkg_dir)?; + + let init_status = Command::new(&pkg_manager_path) + .args(["init", "--yes"]) + .current_dir(&pkg_dir) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .with_context(|| format!("failed to run {} init", pkg_manager))?; + + if !init_status.success() { + bail!("{} init failed", pkg_manager); + } + + let packages: Vec<&str> = package.split_whitespace().collect(); + let mut install_args = vec!["add"]; + install_args.extend(packages.iter()); + + let install_status = Command::new(&pkg_manager_path) + .args(&install_args) + .current_dir(&pkg_dir) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .with_context(|| format!("failed to run {} add", pkg_manager))?; + + if !install_status.success() { + bail!("{} add failed for {}", pkg_manager, package); + } + + let bin_path = pkg_dir.join("node_modules").join(".bin").join(&lsp.binary); + if !bin_path.exists() { + bail!( + "installed {} but binary not found at {}", + package, + bin_path.display() + ); + } + + let wrapper_path = lsp_binary_path(lsp); + create_wrapper_script(&wrapper_path, &bin_path)?; + + Ok(wrapper_path) +} + +fn create_wrapper_script(wrapper_path: &Path, target_path: &Path) -> Result<()> { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + + let script = format!("#!/bin/sh\nexec \"{}\" \"$@\"\n", target_path.display()); + fs::write(wrapper_path, script)?; + + let mut perms = fs::metadata(wrapper_path)?.permissions(); + perms.set_mode(0o755); + fs::set_permissions(wrapper_path, perms)?; + } + + #[cfg(windows)] + { + let script = format!("@echo off\r\n\"{}\" %*\r\n", target_path.display()); + let wrapper_cmd = wrapper_path.with_extension("cmd"); + fs::write(&wrapper_cmd, script)?; + } + + Ok(()) +} + +fn install_go_package(lsp: &LspConfig) -> Result { + let Some(ref package) = lsp.go_package else { + bail!("no go package configured for {}", lsp.binary); + }; + + let go_path = + which::which("go").context("go not found. Install Go or install the LSP manually")?; + + let install_dir = lsp_dir(); + fs::create_dir_all(&install_dir)?; + + let status = Command::new(&go_path) + .args(["install", package]) + .env("GOBIN", &install_dir) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .context("failed to run go install")?; + + if !status.success() { + bail!("go install failed for {}", package); + } + + let binary_path = install_dir.join(&lsp.binary); + if binary_path.exists() { + return Ok(binary_path); + } + + bail!( + "go install succeeded but binary {} not found at {}", + lsp.binary, + binary_path.display() + ); +} + +fn find_lsp_binary(lsp: &LspConfig, root: &Path) -> Result { + let local_path = lsp_binary_path(lsp); + if local_path.exists() { + debug!(binary = %lsp.binary, path = %local_path.display(), "using cached LSP binary"); + return Ok(local_path); + } + + if let Ok(system_path) = which::which(&lsp.binary) { + debug!(binary = %lsp.binary, path = %system_path.display(), "using system LSP binary"); + return Ok(system_path); + } + + debug!(binary = %lsp.binary, "LSP not found, attempting install"); + + if lsp.url_template.is_some() { + return download_and_extract(lsp, root); + } + + if lsp.npm_package.is_some() { + return install_npm_package(lsp); + } + + if lsp.go_package.is_some() { + return install_go_package(lsp); + } + + bail!( + "LSP server '{}' not found. Install it manually.", + lsp.binary + ); +} + +#[derive(Debug)] +struct LspClient { + process: Child, + stdin: BufWriter, + stdout: BufReader, + request_id: AtomicI32, + root_uri: Uri, + opened_files: HashMap, + is_ready: bool, +} + +#[derive(Debug, Serialize, Deserialize)] +struct LspMessage { + jsonrpc: String, + #[serde(skip_serializing_if = "Option::is_none")] + id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + method: Option, + #[serde(skip_serializing_if = "Option::is_none")] + params: Option, + #[serde(skip_serializing_if = "Option::is_none")] + result: Option, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option, +} + +impl LspClient { + fn new(lsp: &LspConfig, root: &Path) -> Result { + let binary_path = find_lsp_binary(lsp, root)?; + + let mut process = Command::new(&binary_path) + .args(&lsp.args) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn() + .with_context(|| format!("failed to spawn {}", binary_path.display()))?; + + let stdin = process.stdin.take().context("failed to get stdin")?; + let stdout = process.stdout.take().context("failed to get stdout")?; + + let root_uri = path_to_uri(&root.canonicalize().unwrap_or_else(|_| root.to_path_buf()))?; + + Ok(Self { + process, + stdin: BufWriter::new(stdin), + stdout: BufReader::new(stdout), + request_id: AtomicI32::new(1), + root_uri, + opened_files: HashMap::new(), + is_ready: false, + }) + } + + fn next_id(&self) -> i32 { + self.request_id.fetch_add(1, Ordering::SeqCst) + } + + fn send_message(&mut self, msg: &LspMessage) -> Result<()> { + let content = serde_json::to_string(msg)?; + let header = format!("Content-Length: {}\r\n\r\n", content.len()); + + self.stdin.write_all(header.as_bytes())?; + self.stdin.write_all(content.as_bytes())?; + self.stdin.flush()?; + + Ok(()) + } + + fn read_message(&mut self) -> Result { + let mut content_length: Option = None; + let mut header_line = String::new(); + + loop { + header_line.clear(); + self.stdout.read_line(&mut header_line)?; + + if header_line == "\r\n" || header_line.is_empty() { + break; + } + + if let Some(len_str) = header_line.strip_prefix("Content-Length: ") { + content_length = Some(len_str.trim().parse()?); + } + } + + let len = content_length.context("missing Content-Length header")?; + let mut body = vec![0u8; len]; + self.stdout.read_exact(&mut body)?; + + let msg: LspMessage = serde_json::from_slice(&body)?; + Ok(msg) + } + + fn send_request(&mut self, method: &str, params: Value) -> Result { + let id = self.next_id(); + let msg = LspMessage { + jsonrpc: "2.0".to_string(), + id: Some(id), + method: Some(method.to_string()), + params: Some(params), + result: None, + error: None, + }; + + self.send_message(&msg)?; + + loop { + let response = self.read_message()?; + + if response.id == Some(id) { + if let Some(error) = response.error { + bail!("LSP error: {}", error); + } + return Ok(response.result.unwrap_or(Value::Null)); + } + } + } + + fn send_notification(&mut self, method: &str, params: Value) -> Result<()> { + let msg = LspMessage { + jsonrpc: "2.0".to_string(), + id: None, + method: Some(method.to_string()), + params: Some(params), + result: None, + error: None, + }; + + self.send_message(&msg) + } + + fn wait_for_ready( + &mut self, + path: &Path, + max_attempts: u32, + pb: Option<&ProgressBar>, + server_name: Option<&str>, + ) -> Result { + use std::thread; + use std::time::Duration; + + let uri = path_to_uri(path)?; + let name = server_name.unwrap_or("LSP"); + + debug!(server = %name, "waiting for LSP to be ready"); + + if let Some(pb) = pb { + pb.set_message(format!("{}: waiting for indexing...", name)); + } + + for i in 0..10 { + let params = lsp_types::DocumentSymbolParams { + text_document: TextDocumentIdentifier { uri: uri.clone() }, + work_done_progress_params: Default::default(), + partial_result_params: Default::default(), + }; + + match self.send_request("textDocument/documentSymbol", serde_json::to_value(params)?) { + Ok(Value::Array(arr)) if !arr.is_empty() => { + trace!(server = %name, attempt = i, "syntax analysis ready"); + break; + } + _ => thread::sleep(Duration::from_millis(200)), + } + } + + if let Some(pb) = pb { + pb.set_message(format!("{}: ready", name)); + } + + for attempt in 0..max_attempts { + let hover_params = json!({ + "textDocument": { "uri": uri.as_str() }, + "position": { "line": 0, "character": 4 } + }); + + match self.send_request("textDocument/hover", hover_params) { + Ok(result) if !result.is_null() => { + debug!(server = %name, attempts = attempt + 1, "LSP ready"); + return Ok(true); + } + _ => {} + } + + if attempt < max_attempts - 1 { + thread::sleep(Duration::from_millis(500)); + } + } + + warn!(server = %name, "LSP did not become ready after {} attempts", max_attempts); + Ok(false) + } + + fn initialize(&mut self) -> Result<()> { + let text_document_caps = lsp_types::TextDocumentClientCapabilities { + definition: Some(lsp_types::GotoCapability { + dynamic_registration: Some(false), + link_support: Some(true), + }), + synchronization: Some(lsp_types::TextDocumentSyncClientCapabilities { + dynamic_registration: Some(false), + will_save: Some(false), + will_save_wait_until: Some(false), + did_save: Some(false), + }), + ..Default::default() + }; + + let capabilities = ClientCapabilities { + text_document: Some(text_document_caps), + ..Default::default() + }; + + let params = InitializeParams { + capabilities, + workspace_folders: Some(vec![WorkspaceFolder { + uri: self.root_uri.clone(), + name: "root".to_string(), + }]), + ..Default::default() + }; + + self.send_request("initialize", serde_json::to_value(params)?)?; + self.send_notification("initialized", serde_json::to_value(InitializedParams {})?)?; + + Ok(()) + } + + fn open_file(&mut self, path: &Path, content: &str, language_id: &str) -> Result<()> { + if self.opened_files.contains_key(path) { + return Ok(()); + } + + let uri = path_to_uri(path)?; + + let version = 1; + self.opened_files.insert(path.to_path_buf(), version); + + let params = DidOpenTextDocumentParams { + text_document: lsp_types::TextDocumentItem { + uri, + language_id: language_id.to_string(), + version, + text: content.to_string(), + }, + }; + + self.send_notification("textDocument/didOpen", serde_json::to_value(params)?) + } + + fn goto_definition( + &mut self, + path: &Path, + line: u32, + character: u32, + ) -> Result> { + let uri = path_to_uri(path)?; + + let params = GotoDefinitionParams { + text_document_position_params: TextDocumentPositionParams { + text_document: TextDocumentIdentifier { uri }, + position: Position { line, character }, + }, + work_done_progress_params: Default::default(), + partial_result_params: Default::default(), + }; + + let result = self.send_request("textDocument/definition", serde_json::to_value(params)?)?; + + if result.is_null() { + return Ok(None); + } + + let response: GotoDefinitionResponse = serde_json::from_value(result)?; + + match response { + GotoDefinitionResponse::Scalar(loc) => Ok(Some(loc)), + GotoDefinitionResponse::Array(locs) => Ok(locs.into_iter().next()), + GotoDefinitionResponse::Link(links) => { + Ok(links.into_iter().next().map(|l| lsp_types::Location { + uri: l.target_uri, + range: l.target_selection_range, + })) + } + } + } + + fn hover(&mut self, path: &Path, line: u32, character: u32) -> Result> { + let uri = path_to_uri(path)?; + + let params = json!({ + "textDocument": { "uri": uri.as_str() }, + "position": { "line": line, "character": character } + }); + + let result = self.send_request("textDocument/hover", params)?; + + if result.is_null() { + return Ok(None); + } + + let hover: lsp_types::Hover = serde_json::from_value(result)?; + + let content = match hover.contents { + lsp_types::HoverContents::Scalar(marked) => extract_marked_string(&marked), + lsp_types::HoverContents::Array(arr) => arr + .into_iter() + .map(|m| extract_marked_string(&m)) + .collect::>() + .join("\n"), + lsp_types::HoverContents::Markup(markup) => markup.value, + }; + + Ok(Some(content)) + } + + fn shutdown(&mut self) -> Result<()> { + self.send_request("shutdown", json!(null))?; + self.send_notification("exit", json!(null))?; + let _ = self.process.wait(); + Ok(()) + } +} + +fn extract_marked_string(marked: &lsp_types::MarkedString) -> String { + match marked { + lsp_types::MarkedString::String(s) => s.clone(), + lsp_types::MarkedString::LanguageString(ls) => ls.value.clone(), + } +} + +impl Drop for LspClient { + fn drop(&mut self) { + let _ = self.shutdown(); + } +} + +#[derive(Debug, Default, Clone)] +pub struct LspServerStats { + pub resolved: usize, + pub no_definition: usize, + pub external: usize, + pub not_indexed: usize, + pub no_match: usize, +} + +#[derive(Debug, Default)] +pub struct LspStats { + pub by_server: HashMap, +} + +impl LspStats { + pub fn total_resolved(&self) -> usize { + self.by_server.values().map(|s| s.resolved).sum() + } +} + +impl std::fmt::Display for LspStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut servers: Vec<_> = self.by_server.iter().collect(); + servers.sort_by_key(|(name, _)| name.as_str()); + + let parts: Vec = servers + .iter() + .map(|(name, stats)| { + let total = stats.resolved + stats.external + stats.no_definition + stats.not_indexed + stats.no_match; + format!( + "{}: {}/{} resolved ({} external, {} no-def, {} not-indexed, {} no-match)", + name, stats.resolved, total, stats.external, stats.no_definition, stats.not_indexed, stats.no_match + ) + }) + .collect(); + + write!(f, "{}", parts.join("\n ")) + } +} + +pub struct LspResolver { + clients: HashMap, + failed_servers: HashSet, + root: PathBuf, + file_cache: HashMap, + progress: Option, + stats: LspStats, +} + +impl LspResolver { + pub fn new(root: &Path) -> Self { + Self { + clients: HashMap::new(), + failed_servers: HashSet::new(), + root: root.to_path_buf(), + file_cache: HashMap::new(), + progress: None, + stats: LspStats::default(), + } + } + + pub fn with_progress(root: &Path, pb: ProgressBar) -> Self { + Self { + clients: HashMap::new(), + failed_servers: HashSet::new(), + root: root.to_path_buf(), + file_cache: HashMap::new(), + progress: Some(pb), + stats: LspStats::default(), + } + } + + pub fn stats(&self) -> &LspStats { + &self.stats + } + + pub fn set_progress(&mut self, pb: Option) { + self.progress = pb; + } + + fn get_or_create_client(&mut self, ext: &str) -> Result<&mut LspClient> { + let registry = Registry::global(); + let lang_entry = registry + .get_by_extension(ext) + .with_context(|| format!("no language for extension: {}", ext))?; + + let lsp_config = lang_entry + .lsp + .as_ref() + .with_context(|| format!("no LSP config for language: {}", lang_entry.name))?; + + let key = lsp_config.binary.clone(); + + if self.failed_servers.contains(&key) { + bail!("{} previously failed to initialize", key); + } + + if !self.clients.contains_key(&key) { + if let Some(ref pb) = self.progress { + pb.set_message(format!("starting {}...", lsp_config.binary)); + } + + let client = match LspClient::new(lsp_config, &self.root) { + Ok(mut c) => { + if let Err(e) = c.initialize() { + self.failed_servers.insert(key.clone()); + warn!(server = %lsp_config.binary, error = ?e, "LSP initialization failed"); + return Err(e); + } + c + } + Err(e) => { + self.failed_servers.insert(key.clone()); + warn!(server = %lsp_config.binary, error = ?e, "LSP server failed to start"); + return Err(e); + } + }; + + self.clients.insert(key.clone(), client); + } + + Ok(self.clients.get_mut(&key).unwrap()) + } + + fn read_file(&mut self, path: &Path) -> Result { + if let Some(content) = self.file_cache.get(path) { + return Ok(content.clone()); + } + + let content = fs::read_to_string(path) + .with_context(|| format!("failed to read {}", path.display()))?; + + self.file_cache.insert(path.to_path_buf(), content.clone()); + Ok(content) + } + + fn language_id_for_ext(ext: &str) -> &'static str { + match ext { + "rs" => "rust", + "ts" | "tsx" | "mts" | "cts" => "typescript", + "js" | "jsx" | "mjs" | "cjs" => "javascript", + "py" | "pyi" => "python", + "go" => "go", + "c" | "h" => "c", + "cpp" | "cc" | "cxx" | "hpp" | "hxx" => "cpp", + "java" => "java", + "zig" => "zig", + "sh" | "bash" => "shellscript", + "scala" | "sc" => "scala", + _ => "text", + } + } + + fn server_name_for_ext(&self, ext: &str) -> Option { + let registry = Registry::global(); + let lang_entry = registry.get_by_extension(ext)?; + lang_entry.lsp.as_ref().map(|l| l.binary.clone()) + } + + fn get_server_stats(&mut self, server: &str) -> &mut LspServerStats { + self.stats + .by_server + .entry(server.to_string()) + .or_default() + } + + pub fn resolve_call_full(&mut self, call: &Call, index: &Index) -> Option { + let ext = call.file.extension().and_then(|e| e.to_str())?.to_string(); + let server_name = self.server_name_for_ext(&ext)?; + let abs_path = self.root.join(&call.file); + let language_id = Self::language_id_for_ext(&ext); + let callee = call.callee.clone(); + let start_line_idx = call.span.start_line.saturating_sub(1); + + let content = self.read_file(&abs_path).ok()?; + + let lines: Vec<&str> = content.lines().collect(); + if start_line_idx >= lines.len() { + return None; + } + + let line_content = lines[start_line_idx]; + let col = line_content.find(&callee).unwrap_or(0) as u32; + + let pb = self.progress.clone(); + let client = self.get_or_create_client(&ext).ok()?; + client.open_file(&abs_path, &content, language_id).ok()?; + + if !client.is_ready { + let ready = client + .wait_for_ready(&abs_path, 60, pb.as_ref(), Some(&server_name)) + .unwrap_or(false); + client.is_ready = true; + if let Some(ref pb) = pb { + if ready { + pb.set_message("resolving..."); + } else { + pb.set_message(format!("{}: indexing (may be slow)", server_name)); + } + } + } + + let signature = client + .hover(&abs_path, start_line_idx as u32, col) + .ok() + .flatten() + .and_then(|h| extract_signature(&h)); + + let receiver_type = call.qualifier.as_ref().and_then(|_| { + let qualifier_col = line_content.find(call.qualifier.as_deref()?)?; + client + .hover(&abs_path, start_line_idx as u32, qualifier_col as u32) + .ok() + .flatten() + .and_then(|h| extract_type(&h)) + }); + + let location = client + .goto_definition(&abs_path, start_line_idx as u32, col) + .ok() + .flatten(); + + let location = match location { + Some(loc) => loc, + None => { + trace!(callee = %callee, file = %call.file.display(), "no definition found"); + self.get_server_stats(&server_name).no_definition += 1; + return None; + } + }; + + let def_path = uri_to_path(&location.uri)?; + + let root = self.root.clone(); + let rel_path = match def_path.strip_prefix(&root) { + Ok(p) => p.to_path_buf(), + Err(_) => { + trace!(callee = %callee, path = %def_path.display(), "definition is external"); + self.get_server_stats(&server_name).external += 1; + return None; + } + }; + + let start_line = location.range.start.line as usize + 1; + let end_line = location.range.end.line as usize + 1; + + let record = match index.get(&rel_path) { + Some(r) => r, + None => { + self.get_server_stats(&server_name).not_indexed += 1; + return None; + } + }; + + let def = match record + .definitions + .iter() + .find(|d| d.span.start_line <= start_line && d.span.end_line >= end_line) + { + Some(d) => d, + None => { + self.get_server_stats(&server_name).no_match += 1; + return None; + } + }; + + self.get_server_stats(&server_name).resolved += 1; + Some(ResolvedCall { + target_file: rel_path, + target_name: def.name.clone(), + target_span: def.span.clone(), + signature, + receiver_type, + }) + } + + pub fn resolve_call(&mut self, call: &Call, index: &Index) -> Option { + if let Some(ref resolved) = call.resolved { + return index + .get(&resolved.target_file)? + .definitions + .iter() + .find(|d| d.name == resolved.target_name) + .cloned(); + } + + let resolved = self.resolve_call_full(call, index)?; + index + .get(&resolved.target_file)? + .definitions + .iter() + .find(|d| d.name == resolved.target_name) + .cloned() + } + + pub fn resolve_calls_batch( + &mut self, + calls: &[&Call], + index: &Index, + ) -> HashMap { + let mut results = HashMap::new(); + + for (i, call) in calls.iter().enumerate() { + if let Some(def) = self.resolve_call(call, index) { + results.insert(i, def); + } + } + + results + } +} + +fn extract_signature(hover_content: &str) -> Option { + let lines: Vec<&str> = hover_content.lines().collect(); + for line in &lines { + let trimmed = line.trim(); + if trimmed.starts_with("fn ") + || trimmed.starts_with("pub fn ") + || trimmed.starts_with("async fn ") + || trimmed.starts_with("pub async fn ") + || trimmed.starts_with("def ") + || trimmed.starts_with("function ") + || trimmed.starts_with("func ") + { + return Some(trimmed.to_string()); + } + if trimmed.contains("->") || trimmed.contains("=>") { + return Some(trimmed.to_string()); + } + } + lines.first().map(|s| s.trim().to_string()) +} + +fn extract_type(hover_content: &str) -> Option { + let content = hover_content.trim(); + if content.is_empty() { + return None; + } + + for line in content.lines() { + let trimmed = line.trim(); + if trimmed.starts_with("let ") || trimmed.starts_with("const ") { + if let Some(colon_pos) = trimmed.find(':') { + let type_part = trimmed[colon_pos + 1..].trim(); + let type_end = type_part.find('=').unwrap_or(type_part.len()); + return Some(type_part[..type_end].trim().to_string()); + } + } + if !trimmed.starts_with("fn ") && !trimmed.starts_with("def ") { + if let Some(first_line) = trimmed.split('\n').next() { + return Some(first_line.to_string()); + } + } + } + Some(content.lines().next()?.to_string()) +} + +#[derive(Debug, Clone)] +pub struct LspAvailability { + pub available: bool, + pub location: Option, + pub can_auto_install: bool, + pub install_method: Option, +} + +pub fn check_lsp_availability() -> HashMap { + let registry = Registry::global(); + let mut result = HashMap::new(); + + for lang in registry.languages() { + if let Some(ref lsp) = lang.lsp { + let local_path = lsp_binary_path(lsp); + let system_available = which::which(&lsp.binary).is_ok(); + let local_available = local_path.exists(); + let available = system_available || local_available; + + let location = if local_available { + Some(local_path.display().to_string()) + } else if system_available { + which::which(&lsp.binary) + .ok() + .map(|p| p.display().to_string()) + } else { + None + }; + + let (can_auto_install, install_method) = if lsp.url_template.is_some() { + (true, Some("download".to_string())) + } else if lsp.npm_package.is_some() { + let bun_available = which::which("bun").is_ok(); + let npm_available = which::which("npm").is_ok(); + if bun_available { + (true, Some("bun".to_string())) + } else if npm_available { + (true, Some("npm".to_string())) + } else { + (false, Some("npm/bun".to_string())) + } + } else if lsp.go_package.is_some() { + let go_available = which::which("go").is_ok(); + (go_available, Some("go".to_string())) + } else { + (false, None) + }; + + result.insert( + lang.name.clone(), + LspAvailability { + available, + location, + can_auto_install: can_auto_install && !available, + install_method, + }, + ); + } + } + + result +} + +pub fn ensure_lsp_for_extension(ext: &str, root: &Path) -> Result { + let registry = Registry::global(); + let lang_entry = registry + .get_by_extension(ext) + .with_context(|| format!("no language for extension: {}", ext))?; + + let lsp_config = lang_entry + .lsp + .as_ref() + .with_context(|| format!("no LSP config for language: {}", lang_entry.name))?; + + find_lsp_binary(lsp_config, root) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_current_target() { + let target = current_target(); + assert!(!target.is_empty()); + assert_ne!(target, "unknown"); + } + + #[test] + fn test_lsp_binary_path() { + let lsp = LspConfig { + binary: "rust-analyzer".to_string(), + args: vec![], + version: None, + url_template: None, + archive: None, + binary_path: None, + targets: HashMap::new(), + npm_package: None, + go_package: None, + }; + + let path = lsp_binary_path(&lsp); + assert!(path.to_string_lossy().contains("rust-analyzer")); + assert!(path.to_string_lossy().contains("lsp")); + } + + #[test] + fn test_language_id_for_ext() { + assert_eq!(LspResolver::language_id_for_ext("rs"), "rust"); + assert_eq!(LspResolver::language_id_for_ext("ts"), "typescript"); + assert_eq!(LspResolver::language_id_for_ext("py"), "python"); + assert_eq!(LspResolver::language_id_for_ext("go"), "go"); + assert_eq!(LspResolver::language_id_for_ext("c"), "c"); + assert_eq!(LspResolver::language_id_for_ext("cpp"), "cpp"); + } + + #[test] + fn test_check_lsp_availability() { + let availability = check_lsp_availability(); + assert!(!availability.is_empty()); + } +} + +#[cfg(test)] +mod integration_tests { + use super::*; + use std::env; + use std::thread; + use std::time::Duration; + + #[test] + #[ignore] // Run with: cargo test --release -- --ignored test_lsp_client_rust + fn test_lsp_client_rust() { + let root = env::current_dir().expect("failed to get current dir"); + let registry = Registry::global(); + let rust_entry = registry.get("rust").expect("rust not in registry"); + let lsp_config = rust_entry.lsp.as_ref().expect("rust has no LSP config"); + + let mut client = LspClient::new(lsp_config, &root).expect("failed to create LSP client"); + client.initialize().expect("failed to initialize LSP"); + + let test_file = root.join("src/main.rs"); + let content = fs::read_to_string(&test_file).expect("failed to read test file"); + + client + .open_file(&test_file, &content, "rust") + .expect("failed to open file"); + + client + .wait_for_ready(&test_file, 30, None, None) + .expect("wait_for_ready failed"); + + // Line 61: ".filter(|path| is_url_or_git(path))" + let line = content.lines().nth(60).unwrap(); + let col = line.find("is_url_or_git").unwrap_or(0); + + // Retry a few times in case of "content modified" errors + for _ in 0..5 { + match client.goto_definition(&test_file, 60, col as u32) { + Ok(Some(loc)) => { + let path = uri_to_path(&loc.uri).expect("invalid uri"); + assert!(path.ends_with("main.rs")); + assert_eq!(loc.range.start.line, 25); // fn is_url_or_git definition + return; + } + Ok(None) | Err(_) => thread::sleep(Duration::from_secs(2)), + } + } + panic!("Failed to resolve definition after all attempts"); + } +} diff --git a/src/code/mod.rs b/src/code/mod.rs new file mode 100644 index 0000000..702c16d --- /dev/null +++ b/src/code/mod.rs @@ -0,0 +1,6 @@ +pub mod extract; +pub mod grammar; +pub mod graph; +pub mod index; +pub mod lsp; +pub mod resolve; diff --git a/src/code/resolve.rs b/src/code/resolve.rs new file mode 100644 index 0000000..9d46da6 --- /dev/null +++ b/src/code/resolve.rs @@ -0,0 +1,427 @@ +use std::path::Path; + +use super::index::{Definition, Index}; + +fn import_to_file_patterns(module_path: &str, lang: &str) -> Vec { + let clean = module_path.trim_matches(|c| c == '"' || c == '\'' || c == '<' || c == '>'); + + match lang { + "rs" => { + let stripped = clean + .trim_start_matches("crate::") + .trim_start_matches("self::") + .trim_start_matches("super::"); + let parts: Vec<&str> = stripped.split("::").filter(|p| !p.is_empty()).collect(); + if parts.is_empty() { + return vec![]; + } + let file_path = parts.join("/"); + vec![ + format!("{}.rs", file_path), + format!("{}/mod.rs", file_path), + format!("src/{}.rs", file_path), + format!("src/{}/mod.rs", file_path), + ] + } + "py" => { + if clean.starts_with('.') { + return vec![]; + } + let parts: Vec<&str> = clean.split('.').collect(); + if parts.is_empty() { + return vec![]; + } + let file_path = parts.join("/"); + vec![ + format!("{}.py", file_path), + format!("{}/__init__.py", file_path), + format!("src/{}.py", file_path), + ] + } + "go" => { + let parts: Vec<&str> = clean.split('/').collect(); + let local_parts: Vec<&str> = if parts.len() >= 3 && parts[0].contains('.') { + parts[3..].to_vec() + } else { + parts + }; + if local_parts.is_empty() { + return vec![]; + } + let dir_path = local_parts.join("/"); + vec![dir_path] + } + "ts" | "tsx" | "js" | "jsx" | "mjs" | "cjs" => { + let base = clean + .trim_start_matches("./") + .trim_start_matches("../") + .trim_start_matches("@/") + .trim_start_matches('@'); + vec![ + format!("{}.ts", base), + format!("{}.tsx", base), + format!("{}.js", base), + format!("{}/index.ts", base), + format!("{}/index.tsx", base), + format!("{}/index.js", base), + ] + } + "java" => { + let file_path = clean.replace('.', "/"); + vec![ + format!("{}.java", file_path), + format!("src/{}.java", file_path), + format!("src/main/java/{}.java", file_path), + ] + } + "scala" | "sc" => { + let trimmed = clean.trim_end_matches("._").trim_end_matches(".*"); + let file_path = trimmed.replace('.', "/"); + vec![format!("{}.scala", file_path), format!("{}.sc", file_path)] + } + "c" | "cpp" | "cc" | "cxx" | "h" | "hpp" | "hxx" => { + vec![ + clean.to_string(), + format!("include/{}", clean), + format!("src/{}", clean), + ] + } + "zig" => { + if clean.ends_with(".zig") || clean.contains('/') { + vec![clean.to_string(), format!("src/{}", clean)] + } else { + vec![format!("{}.zig", clean), format!("src/{}.zig", clean)] + } + } + _ => vec![clean.to_string()], + } +} + +fn import_matches_callee(module_path: &str, callee: &str, lang: &str) -> bool { + let clean = module_path.trim_matches(|c| c == '"' || c == '\'' || c == '<' || c == '>'); + + match lang { + "rs" => { + let parts: Vec<&str> = clean.split("::").collect(); + parts.last().map(|s| *s == callee).unwrap_or(false) + } + "py" => { + let parts: Vec<&str> = clean.split('.').collect(); + parts.last().map(|s| *s == callee).unwrap_or(false) + } + "go" => { + let parts: Vec<&str> = clean.split('/').collect(); + parts.last().map(|s| *s == callee).unwrap_or(false) + } + "java" | "scala" | "sc" => { + let parts: Vec<&str> = clean.split('.').collect(); + parts + .last() + .map(|s| *s == callee || *s == "*" || *s == "_") + .unwrap_or(false) + } + _ => true, + } +} + +fn file_matches_pattern(file_path: &Path, pattern: &str) -> bool { + let file_str = file_path.to_string_lossy(); + + if pattern.contains('/') { + file_str.ends_with(pattern) || file_str.contains(&format!("/{}", pattern)) + } else { + file_path + .file_name() + .and_then(|n| n.to_str()) + .map(|n| n == pattern) + .unwrap_or(false) + } +} + +pub struct Resolver<'a> { + index: &'a Index, + strict: bool, +} + +impl<'a> Resolver<'a> { + pub fn new(index: &'a Index) -> Self { + Self { + index, + strict: false, + } + } + + pub fn with_strict(index: &'a Index, strict: bool) -> Self { + Self { index, strict } + } + + /// Resolve a callee to its definition. + /// + /// Resolution order: + /// 1. Same file - check if callee is defined in the calling file + /// 2. Via imports - use import statements to find the defining file + /// 3. Global fallback (unless strict mode) - search entire index by name + /// + /// Note: Global fallback can produce false positives when multiple functions + /// share the same name (e.g., `parse`). Use strict mode to disable it. + pub fn resolve( + &self, + callee: &str, + _qualifier: Option<&str>, + from_file: &Path, + ) -> Option { + if let Some(def) = self.resolve_same_file(callee, from_file) { + return Some(def); + } + + if let Some(def) = self.resolve_via_imports(callee, from_file) { + return Some(def); + } + + if !self.strict { + return self.resolve_by_index(callee); + } + + None + } + + fn resolve_same_file(&self, callee: &str, file: &Path) -> Option { + let record = self.index.get(file)?; + record + .definitions + .iter() + .find(|d| d.name == callee) + .cloned() + } + + fn resolve_by_index(&self, callee: &str) -> Option { + self.index.definitions().find(|d| d.name == callee).cloned() + } + + fn resolve_via_imports(&self, callee: &str, from_file: &Path) -> Option { + let record = self.index.get(from_file)?; + let ext = from_file.extension().and_then(|e| e.to_str()).unwrap_or(""); + + for import in &record.imports { + if !import_matches_callee(&import.module_path, callee, ext) { + continue; + } + + let patterns = import_to_file_patterns(&import.module_path, ext); + + for indexed_file in self.index.files.keys() { + for pattern in &patterns { + if file_matches_pattern(indexed_file, pattern) { + if let Some(def) = self.find_def_in_file(indexed_file, callee) { + return Some(def); + } + } + } + } + } + + None + } + + fn find_def_in_file(&self, file: &Path, name: &str) -> Option { + let record = self.index.get(file)?; + record.definitions.iter().find(|d| d.name == name).cloned() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::code::index::{Definition, DefinitionKind, FileRecord, Import, Span}; + use std::path::PathBuf; + + fn make_def(name: &str, file: &str) -> Definition { + Definition { + name: name.to_string(), + kind: DefinitionKind::Function, + span: Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 3, + }, + file: PathBuf::from(file), + signature: None, + } + } + + fn make_import(module_path: &str, file: &str) -> Import { + Import { + module_path: module_path.to_string(), + alias: None, + span: Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 1, + }, + file: PathBuf::from(file), + } + } + + #[test] + fn test_resolve_same_file() { + let mut index = Index::new(); + let file = PathBuf::from("src/main.rs"); + + index.update(FileRecord { + path: file.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("foo", "src/main.rs")], + calls: vec![], + imports: vec![], + }); + + let resolver = Resolver::new(&index); + let found = resolver.resolve("foo", None, &file); + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "foo"); + + let not_found = resolver.resolve("bar", None, &file); + assert!(not_found.is_none()); + } + + #[test] + fn test_resolve_prefers_same_file() { + let mut index = Index::new(); + let file_a = PathBuf::from("src/a.rs"); + let file_b = PathBuf::from("src/b.rs"); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("foo", "src/a.rs")], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: file_b.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("foo", "src/b.rs")], + calls: vec![], + imports: vec![], + }); + + let resolver = Resolver::new(&index); + + let found = resolver.resolve("foo", None, &file_a); + assert!(found.is_some()); + assert_eq!(found.unwrap().file, file_a); + + let found = resolver.resolve("foo", None, &file_b); + assert!(found.is_some()); + assert_eq!(found.unwrap().file, file_b); + } + + #[test] + fn test_resolve_via_imports() { + let mut index = Index::new(); + let main_file = PathBuf::from("src/main.rs"); + let helper_file = PathBuf::from("src/utils/helper.rs"); + + index.update(FileRecord { + path: helper_file.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("helper", "src/utils/helper.rs")], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: main_file.clone(), + mtime: 0, + size: 0, + definitions: vec![], + calls: vec![], + imports: vec![make_import("crate::utils::helper", "src/main.rs")], + }); + + let resolver = Resolver::new(&index); + let found = resolver.resolve("helper", None, &main_file); + + assert!(found.is_some()); + assert_eq!(found.unwrap().name, "helper"); + } + + #[test] + fn test_resolve_falls_back_to_index() { + let mut index = Index::new(); + let main_file = PathBuf::from("src/main.rs"); + + index.update(FileRecord { + path: PathBuf::from("src/parse.rs"), + mtime: 0, + size: 0, + definitions: vec![make_def("parse", "src/parse.rs")], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: main_file.clone(), + mtime: 0, + size: 0, + definitions: vec![], + calls: vec![], + imports: vec![], + }); + + let resolver = Resolver::new(&index); + + // Should find via global index lookup + let found = resolver.resolve("parse", None, &main_file); + assert!(found.is_some()); + assert_eq!(found.unwrap().file, PathBuf::from("src/parse.rs")); + } + + #[test] + fn test_file_matches_pattern() { + assert!(file_matches_pattern( + Path::new("src/utils/helper.rs"), + "utils/helper.rs" + )); + assert!(file_matches_pattern( + Path::new("src/utils/helper.rs"), + "helper.rs" + )); + assert!(file_matches_pattern(Path::new("helper.rs"), "helper.rs")); + assert!(!file_matches_pattern( + Path::new("src/other.rs"), + "helper.rs" + )); + } + + #[test] + fn test_import_to_file_patterns_rust() { + let patterns = import_to_file_patterns("crate::utils::helper", "rs"); + assert!(patterns.iter().any(|p| p.contains("utils/helper.rs"))); + assert!(patterns.iter().any(|p| p.contains("utils/helper/mod.rs"))); + } + + #[test] + fn test_import_to_file_patterns_python() { + let patterns = import_to_file_patterns("mypackage.utils.helper", "py"); + assert!(patterns + .iter() + .any(|p| p.contains("mypackage/utils/helper.py"))); + } + + #[test] + fn test_import_to_file_patterns_js() { + let patterns = import_to_file_patterns("./components/Button", "ts"); + assert!(patterns.iter().any(|p| p.contains("components/Button.ts"))); + assert!(patterns + .iter() + .any(|p| p.contains("components/Button/index.ts"))); + } +} diff --git a/src/config.rs b/src/core/config.rs similarity index 64% rename from src/config.rs rename to src/core/config.rs index e9817e2..6c51781 100644 --- a/src/config.rs +++ b/src/core/config.rs @@ -2,60 +2,7 @@ use std::path::{Path, PathBuf}; use serde::{Deserialize, Serialize}; -use crate::cli::{Exclude, OutputFormat}; - -#[derive(Debug, Serialize, Clone)] -#[serde(into = "String")] -pub struct BackwardsCompatOutputFormat(OutputFormat); - -impl From for String { - fn from(format: BackwardsCompatOutputFormat) -> Self { - match format.0 { - OutputFormat::Tree => "tree".to_string(), - OutputFormat::Files => "files".to_string(), - OutputFormat::Both => "both".to_string(), - } - } -} - -impl<'de> Deserialize<'de> for BackwardsCompatOutputFormat { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - #[derive(Deserialize)] - #[serde(untagged)] - enum FormatOrString { - Format(OutputFormat), - String(String), - } - - match FormatOrString::deserialize(deserializer)? { - FormatOrString::Format(format) => Ok(BackwardsCompatOutputFormat(format)), - FormatOrString::String(s) => { - let format = match s.to_lowercase().as_str() { - "tree" => OutputFormat::Tree, - "files" => OutputFormat::Files, - "both" => OutputFormat::Both, - _ => OutputFormat::Both, // Default to Both for unknown values - }; - Ok(BackwardsCompatOutputFormat(format)) - } - } - } -} - -impl From for BackwardsCompatOutputFormat { - fn from(format: OutputFormat) -> Self { - BackwardsCompatOutputFormat(format) - } -} - -impl From for OutputFormat { - fn from(format: BackwardsCompatOutputFormat) -> Self { - format.0 - } -} +use super::types::{Exclude, OutputFormat}; #[derive(Debug, Serialize, Deserialize, Clone)] pub struct Config { @@ -66,7 +13,7 @@ pub struct Config { pub max_depth: usize, #[serde(default = "default_output_format")] - pub default_output_format: BackwardsCompatOutputFormat, + pub default_output_format: OutputFormat, #[serde(default)] pub default_excludes: Vec, @@ -83,9 +30,6 @@ pub struct Config { #[serde(default)] pub traverse_links: bool, - /// List of canonical project directories for which the user has already declined to - /// save a local `.glimpse` configuration file. When a directory is present in this - /// list Glimpse will not prompt the user again. #[serde(default)] pub skipped_prompt_repos: Vec, } @@ -122,17 +66,15 @@ fn default_max_depth() -> usize { 20 } -fn default_output_format() -> BackwardsCompatOutputFormat { - BackwardsCompatOutputFormat(OutputFormat::Both) +fn default_output_format() -> OutputFormat { + OutputFormat::Both } fn default_excludes() -> Vec { vec![ - // Version control Exclude::Pattern("**/.git/**".to_string()), Exclude::Pattern("**/.svn/**".to_string()), Exclude::Pattern("**/.hg/**".to_string()), - // Build artifacts and dependencies Exclude::Pattern("**/target/**".to_string()), Exclude::Pattern("**/node_modules/**".to_string()), Exclude::Pattern("**/dist/**".to_string()), @@ -175,7 +117,6 @@ pub fn get_config_path() -> anyhow::Result { } } - // Fall back to platform-specific directory let config_dir = dirs::config_dir() .ok_or_else(|| anyhow::anyhow!("Could not determine config directory"))? .join("glimpse"); @@ -188,7 +129,7 @@ pub struct RepoConfig { pub exclude: Option>, pub max_size: Option, pub max_depth: Option, - pub output: Option, + pub output: Option, pub file: Option, pub hidden: Option, pub no_ignore: Option, @@ -210,7 +151,6 @@ pub fn load_repo_config(path: &Path) -> anyhow::Result { } } -/// Persist the provided global configuration to disk, overriding any existing config file. pub fn save_config(config: &Config) -> anyhow::Result<()> { let config_path = get_config_path()?; diff --git a/src/core/mod.rs b/src/core/mod.rs new file mode 100644 index 0000000..62fc8ee --- /dev/null +++ b/src/core/mod.rs @@ -0,0 +1,12 @@ +pub mod config; +pub mod source_detection; +pub mod tokenizer; +pub mod types; + +pub use config::{ + get_config_path, load_config, load_repo_config, save_config, save_repo_config, Config, + RepoConfig, +}; +pub use source_detection::is_source_file; +pub use tokenizer::{TokenCount, TokenCounter, TokenizerBackend}; +pub use types::{Exclude, FileEntry, OutputFormat, TokenizerType}; diff --git a/src/source_detection.rs b/src/core/source_detection.rs similarity index 84% rename from src/source_detection.rs rename to src/core/source_detection.rs index c65bd24..d9218f3 100644 --- a/src/source_detection.rs +++ b/src/core/source_detection.rs @@ -1,10 +1,8 @@ use std::fs; use std::path::Path; -// Include the generated code include!(concat!(env!("OUT_DIR"), "/languages.rs")); -/// Extract interpreter from shebang line and exec pattern fn extract_interpreter(data: &str) -> Option { let lines: Vec<&str> = data.lines().take(2).collect(); @@ -35,7 +33,6 @@ fn extract_interpreter(data: &str) -> Option { let mut script = first_part.split('/').next_back()?.to_string(); - // Handle /usr/bin/env if script == "env" { for part in path.split_whitespace().skip(1) { if !part.starts_with('-') && !part.contains('=') { @@ -43,13 +40,11 @@ fn extract_interpreter(data: &str) -> Option { break; } } - // If we only found env with no valid interpreter, return None if script == "env" { return None; } } - // Strip version numbers (python2.7 -> python2) if let Some(idx) = script.find(|c: char| c.is_ascii_digit()) { if let Some(dot_idx) = script[idx..].find('.') { script.truncate(idx + dot_idx); @@ -59,30 +54,25 @@ fn extract_interpreter(data: &str) -> Option { Some(script) } -/// Detect language from shebang fn detect_by_shebang(data: &str) -> bool { extract_interpreter(data) .map(|script| INTERPRETER_NAMES.contains(script.as_str())) .unwrap_or(false) } -/// Checks if a given path is a source file pub fn is_source_file(path: &Path) -> bool { - // Check known filenames first if let Some(name) = path.file_name().and_then(|n| n.to_str()) { if KNOWN_FILENAMES.contains(name) { return true; } } - // Then check extension if let Some(ext) = path.extension().and_then(|e| e.to_str()) { if SOURCE_EXTENSIONS.contains(ext.to_lowercase().as_str()) { return true; } } - // Finally check shebang match fs::read_to_string(path) { Ok(content) => detect_by_shebang(&content), Err(_) => false, @@ -99,19 +89,14 @@ mod tests { #[test] fn test_interpreter_extraction() { let cases = vec![ - // Basic cases ("#!/usr/bin/python", Some("python")), ("#!/bin/bash", Some("bash")), - // env cases with flags and vars ("#!/usr/bin/env python", Some("python")), ("#!/usr/bin/env -S python3 -u", Some("python3")), ("#!/usr/bin/env FOO=bar python", Some("python")), - // Version stripping ("#!/usr/bin/python2.7", Some("python2")), ("#!/usr/bin/ruby1.9.3", Some("ruby1")), - // exec patterns ("#!/bin/sh\nexec python \"$0\" \"$@\"", Some("python")), - // Invalid cases ("no shebang", None), ("#!/", None), ("", None), @@ -130,19 +115,14 @@ mod tests { fn test_source_detection() { let dir = tempdir().unwrap(); - // Test cases: (filename, content, expected) let test_cases = vec![ - // Extensions ("test.rs", "", true), ("test.py", "", true), ("test.js", "", true), - // Known filenames ("Makefile", "", true), ("Dockerfile", "", true), - // Shebangs ("script", "#!/usr/bin/env python\nprint('hi')", true), ("run", "#!/usr/bin/node\nconsole.log()", true), - // Non-source files ("random.xyz", "", false), ("not-script", "just some text", false), ]; diff --git a/src/tokenizer.rs b/src/core/tokenizer.rs similarity index 89% rename from src/tokenizer.rs rename to src/core/tokenizer.rs index b68df15..e70572a 100644 --- a/src/tokenizer.rs +++ b/src/core/tokenizer.rs @@ -1,8 +1,11 @@ -use anyhow::{anyhow, Result}; use std::path::PathBuf; + +use anyhow::{anyhow, Result}; use tiktoken_rs::get_bpe_from_model; use tokenizers::Tokenizer as HfTokenizer; +use super::types::FileEntry; + pub enum TokenizerBackend { Tiktoken(tiktoken_rs::CoreBPE), HuggingFace(Box), @@ -10,7 +13,7 @@ pub enum TokenizerBackend { pub struct TokenCount { pub total_tokens: usize, - pub breakdown: Vec<(PathBuf, usize)>, // (file_path, token_count) + pub breakdown: Vec<(PathBuf, usize)>, } pub struct TokenCounter { @@ -57,10 +60,7 @@ impl TokenCounter { pub fn count_tokens(&self, text: &str) -> Result { match &self.backend { - TokenizerBackend::Tiktoken(bpe) => { - // tiktoken's encode_with_special_tokens is infallible - Ok(bpe.encode_with_special_tokens(text).len()) - } + TokenizerBackend::Tiktoken(bpe) => Ok(bpe.encode_with_special_tokens(text).len()), TokenizerBackend::HuggingFace(tokenizer) => tokenizer .encode(text, false) .map_err(|e| anyhow!("Failed to encode text with HuggingFace tokenizer: {}", e)) @@ -68,7 +68,7 @@ impl TokenCounter { } } - pub fn count_files(&self, entries: &[super::output::FileEntry]) -> Result { + pub fn count_files(&self, entries: &[FileEntry]) -> Result { let mut total_tokens = 0; let mut breakdown = Vec::new(); diff --git a/src/core/types.rs b/src/core/types.rs new file mode 100644 index 0000000..24e7da8 --- /dev/null +++ b/src/core/types.rs @@ -0,0 +1,31 @@ +use std::path::PathBuf; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum OutputFormat { + Tree, + Files, + Both, +} + +#[derive(Debug, Clone)] +pub enum TokenizerType { + Tiktoken, + HuggingFace, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum Exclude { + File(PathBuf), + Pattern(String), +} + +#[derive(Debug, Clone)] +pub struct FileEntry { + pub path: PathBuf, + pub content: String, + pub size: u64, +} diff --git a/src/git_processor.rs b/src/fetch/git.rs similarity index 81% rename from src/git_processor.rs rename to src/fetch/git.rs index 4fe34f7..45faf6f 100644 --- a/src/git_processor.rs +++ b/src/fetch/git.rs @@ -1,6 +1,7 @@ +use std::path::PathBuf; + use anyhow::Result; use git2::Repository; -use std::path::PathBuf; use tempfile::TempDir; use url::Url; @@ -49,12 +50,6 @@ impl GitProcessor { } } -impl Drop for GitProcessor { - fn drop(&mut self) { - // Temp directory will be automatically cleaned up when dropped - } -} - #[cfg(test)] mod tests { use super::*; @@ -109,7 +104,7 @@ mod tests { temp_path.exists(), "Temp directory should exist during processor lifetime" ); - } // processor is dropped here + } assert!( !temp_path.exists(), "Temp directory should be cleaned up after drop" @@ -138,20 +133,4 @@ mod tests { assert_eq!(repo_name, expected_name, "Failed for URL: {url}"); } } - - #[test] - fn test_process_repo_creates_directory() { - let test_repo = "https://github.com/rust-lang/rust-analyzer.git"; - if let Ok(processor) = GitProcessor::new() { - match processor.process_repo(test_repo) { - Ok(path) => { - assert!(path.exists(), "Repository directory should exist"); - assert!(path.join(".git").exists(), "Git directory should exist"); - // Check for some common files that should be present - assert!(path.join("Cargo.toml").exists(), "Cargo.toml should exist"); - } - Err(e) => println!("Skipping clone test due to error: {e}"), - } - } - } } diff --git a/src/fetch/mod.rs b/src/fetch/mod.rs new file mode 100644 index 0000000..eea2448 --- /dev/null +++ b/src/fetch/mod.rs @@ -0,0 +1,5 @@ +pub mod git; +pub mod url; + +pub use git::GitProcessor; +pub use url::UrlProcessor; diff --git a/src/url_processor.rs b/src/fetch/url.rs similarity index 76% rename from src/url_processor.rs rename to src/fetch/url.rs index 707f39a..a09e1da 100644 --- a/src/url_processor.rs +++ b/src/fetch/url.rs @@ -1,9 +1,10 @@ +use std::collections::HashSet; + use anyhow::Result; use arboard::Clipboard; use indicatif::{ProgressBar, ProgressStyle}; use reqwest::blocking::Client; use scraper::{ElementRef, Html, Node, Selector}; -use std::collections::HashSet; use url::Url; pub struct UrlProcessor { @@ -229,7 +230,6 @@ impl UrlProcessor { #[cfg(test)] mod tests { use super::*; - use mockito::Server; #[test] fn test_new_processor() { @@ -293,116 +293,9 @@ mod tests { "#; let links = processor.extract_links(html, &base_url).unwrap(); - assert_eq!(links.len(), 3); // Only http(s) links should be included + assert_eq!(links.len(), 3); assert!(links.contains(&"https://example.com/page1".to_string())); assert!(links.contains(&"https://other.com/page2".to_string())); assert!(links.contains(&"https://example.com/#section".to_string())); } - - #[test] - fn test_process_url_with_mocked_server() { - let mut server = Server::new(); - let url = server.url(); - let _m = server - .mock("GET", "/") - .with_status(200) - .with_header("content-type", "text/html") - .with_body( - r#" - - -

Test Page

-

This is a test.

- Subpage - - - "#, - ) - .create(); - - let mut processor = UrlProcessor::new(1); - let result = processor.process_url(&url, false); - - assert!(result.is_ok()); - let content = result.unwrap(); - assert!(content.contains("# Test Page")); - assert!(content.contains("This is a test")); - } - - #[test] - fn test_process_url_with_link_traversal() { - let mut server = Server::new(); - let url = server.url(); - let _m1 = server - .mock("GET", "/") - .with_status(200) - .with_header("content-type", "text/html") - .with_body( - r#" - - -

Main Page

- Subpage - - - "#, - ) - .create(); - - let _m2 = server - .mock("GET", "/subpage") - .with_status(200) - .with_header("content-type", "text/html") - .with_body( - r#" - - -

Subpage

-

Subpage content.

- - - "#, - ) - .create(); - - let mut processor = UrlProcessor::new(1); - let result = processor.process_url(&url, true); - - assert!(result.is_ok()); - let content = result.unwrap(); - assert!(content.contains("# Main Page")); - assert!(content.contains("# Subpage")); - assert!(content.contains("Subpage content")); - } - - #[test] - fn test_process_node_formatting() { - let processor = UrlProcessor::new(1); - let base_url = Url::parse("https://example.com").unwrap(); - - let html = r#" - -

Heading 1

-

Heading 2

-

Normal paragraph

-
Quote text
- Code block -
    -
  • List item 1
  • -
  • List item 2
  • -
- - "#; - - let markdown = processor.html_to_markdown(html, &base_url); - - assert!(markdown.contains("# Heading 1")); - assert!(markdown.contains("# Heading 1")); - assert!(markdown.contains("## Heading 2")); - assert!(markdown.contains("Normal paragraph")); - assert!(markdown.contains("> Quote text")); - assert!(markdown.contains("Code block")); - assert!(markdown.contains("- List item 1")); - assert!(markdown.contains("- List item 2")); - } } diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..e133234 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,12 @@ +pub mod code; +pub mod core; +pub mod fetch; +pub mod tui; + +pub use core::{ + get_config_path, is_source_file, load_config, load_repo_config, save_config, save_repo_config, + Config, Exclude, FileEntry, OutputFormat, RepoConfig, TokenCount, TokenCounter, + TokenizerBackend, TokenizerType, +}; +pub use fetch::{GitProcessor, UrlProcessor}; +pub use tui::FilePicker; diff --git a/src/main.rs b/src/main.rs index adfc044..20d2925 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,24 +1,31 @@ mod analyzer; mod cli; -mod config; -mod file_picker; -mod git_processor; mod output; -mod source_detection; -mod tokenizer; -mod url_processor; -use crate::analyzer::process_directory; -use crate::cli::Cli; -use crate::config::{ - get_config_path, load_config, load_repo_config, save_config, save_repo_config, RepoConfig, -}; -use crate::git_processor::GitProcessor; -use crate::url_processor::UrlProcessor; use std::fs; use std::io::{self, Write}; use std::path::{Path, PathBuf}; +use anyhow::{bail, Context, Result}; +use indicatif::{ProgressBar, ProgressStyle}; +use rayon::prelude::*; +use tracing::debug; +use tracing_subscriber::EnvFilter; + +use crate::analyzer::process_directory; +use crate::cli::{Cli, CodeArgs, Commands, FunctionTarget, IndexCommand}; +use glimpse::code::extract::Extractor; +use glimpse::code::graph::CallGraph; +use glimpse::code::index::{ + clear_index, file_fingerprint, load_index, save_index, FileRecord, Index, +}; +use glimpse::code::lsp::LspResolver; +use glimpse::fetch::{GitProcessor, UrlProcessor}; +use glimpse::{ + get_config_path, is_source_file, load_config, load_repo_config, save_config, save_repo_config, + RepoConfig, +}; + fn is_url_or_git(path: &str) -> bool { GitProcessor::is_git_url(path) || path.starts_with("http://") || path.starts_with("https://") } @@ -34,10 +41,25 @@ fn has_custom_options(args: &Cli) -> bool { || args.no_ignore } -fn main() -> anyhow::Result<()> { +fn main() -> Result<()> { + tracing_subscriber::fmt() + .with_env_filter(EnvFilter::from_default_env()) + .with_writer(std::io::stderr) + .without_time() + .init(); + let mut config = load_config()?; let mut args = Cli::parse_with_config(&config)?; + debug!("config loaded, args parsed"); + + if let Some(ref cmd) = args.command { + return match cmd { + Commands::Code(code_args) => handle_code_command(code_args), + Commands::Index(index_args) => handle_index_command(&index_args.command), + }; + } + if args.config_path { let path = get_config_path()?; println!("{}", path.display()); @@ -62,7 +84,6 @@ fn main() -> anyhow::Result<()> { save_repo_config(&glimpse_file, &repo_config)?; println!("Configuration saved to {}", glimpse_file.display()); - // If the user explicitly saved a config, remove this directory from the skipped list if let Ok(canonical_root) = std::fs::canonicalize(&root_dir) { let root_str = canonical_root.to_string_lossy().to_string(); if let Some(pos) = config @@ -79,7 +100,6 @@ fn main() -> anyhow::Result<()> { let repo_config = load_repo_config(&glimpse_file)?; apply_repo_config(&mut args, &repo_config); } else if has_custom_options(&args) { - // Determine canonical root directory path for consistent tracking let canonical_root = std::fs::canonicalize(&root_dir).unwrap_or(root_dir.clone()); let root_str = canonical_root.to_string_lossy().to_string(); @@ -96,7 +116,6 @@ fn main() -> anyhow::Result<()> { save_repo_config(&glimpse_file, &repo_config)?; println!("Configuration saved to {}", glimpse_file.display()); - // In case it was previously skipped, remove from skipped list if let Some(pos) = config .skipped_prompt_repos .iter() @@ -106,7 +125,6 @@ fn main() -> anyhow::Result<()> { save_config(&config)?; } } else { - // Record that user declined for this project config.skipped_prompt_repos.push(root_str); save_config(&config)?; } @@ -139,15 +157,12 @@ fn main() -> anyhow::Result<()> { } let process_args = if subpaths.is_empty() { - // No subpaths specified, process the whole repo args.with_path(repo_path.to_str().unwrap()) } else { - // Process only the specified subpaths inside the repo let mut new_args = args.clone(); new_args.paths = subpaths .iter() .map(|sub| { - // Join with repo_path let mut joined = std::path::PathBuf::from(&repo_path); joined.push(sub); joined.to_string_lossy().to_string() @@ -170,7 +185,6 @@ fn main() -> anyhow::Result<()> { } else if args.print { println!("{content}"); } else { - // Default behavior for URLs if no -f or --print: copy to clipboard match arboard::Clipboard::new() .and_then(|mut clipboard| clipboard.set_text(content)) { @@ -196,14 +210,12 @@ fn find_containing_dir_with_glimpse(path: &Path) -> anyhow::Result { path.to_path_buf() }; - // Try to find a .glimpse file or go up until we reach the root loop { if current.join(".glimpse").exists() { return Ok(current); } if !current.pop() { - // If we can't go up anymore, just use the original path return Ok(if path.is_file() { path.parent().unwrap_or(Path::new(".")).to_path_buf() } else { @@ -214,14 +226,12 @@ fn find_containing_dir_with_glimpse(path: &Path) -> anyhow::Result { } fn create_repo_config_from_args(args: &Cli) -> RepoConfig { - use crate::config::BackwardsCompatOutputFormat; - RepoConfig { include: args.include.clone(), exclude: args.exclude.clone(), max_size: args.max_size, max_depth: args.max_depth, - output: args.output.clone().map(BackwardsCompatOutputFormat::from), + output: args.get_output_format(), file: args.file.clone(), hidden: Some(args.hidden), no_ignore: Some(args.no_ignore), @@ -246,7 +256,7 @@ fn apply_repo_config(args: &mut Cli, repo_config: &RepoConfig) { } if let Some(ref output) = repo_config.output { - args.output = Some((*output).clone().into()); + args.output = Some(output.clone().into()); } if let Some(ref file) = repo_config.file { @@ -261,3 +271,376 @@ fn apply_repo_config(args: &mut Cli, repo_config: &RepoConfig) { args.no_ignore = no_ignore; } } + +fn handle_code_command(args: &CodeArgs) -> Result<()> { + let root = args + .root + .canonicalize() + .unwrap_or_else(|_| args.root.clone()); + let target = FunctionTarget::parse(&args.target)?; + + let mut index = load_index(&root)?.unwrap_or_else(Index::new); + let needs_update = index_directory(&root, &mut index, args.hidden, args.no_ignore)?; + let mut needs_save = needs_update > 0; + + // Only run LSP resolution if: + // 1. --precise is requested + // 2. Either files were updated OR no calls have been resolved yet (first --precise run) + let has_any_resolved = index.calls().any(|c| c.resolved.is_some()); + if args.precise && (needs_update > 0 || !has_any_resolved) { + let resolved = resolve_calls_with_lsp(&root, &mut index)?; + if resolved > 0 { + needs_save = true; + } + } + + if needs_save { + save_index(&index, &root)?; + } + + // After LSP resolution, use build_with_options which checks call.resolved first + // This avoids creating another LSP resolver and re-trying failed calls + let graph = CallGraph::build_with_options(&index, args.strict); + + let node_id = if let Some(ref file) = target.file { + let file_path = root.join(file); + let rel_path = file_path + .strip_prefix(&root) + .unwrap_or(&file_path) + .to_path_buf(); + graph + .find_node_by_file_and_name(&rel_path, &target.function) + .or_else(|| graph.find_node_by_file_and_name(&file_path, &target.function)) + } else { + graph.find_node(&target.function) + }; + + let Some(node_id) = node_id else { + bail!("function '{}' not found in index", target.function); + }; + + let depth = args.depth.unwrap_or(1); + + let definitions = if args.callers { + graph + .get_callers_to_depth(node_id, depth) + .into_iter() + .filter_map(|id| graph.get_node(id).map(|n| &n.definition)) + .collect() + } else { + graph.definitions_to_depth(node_id, depth) + }; + + let output = format_definitions(&definitions, &root)?; + + if let Some(ref file) = args.file { + fs::write(file, &output)?; + eprintln!("Output written to: {}", file.display()); + } else { + print!("{}", output); + } + + Ok(()) +} + +fn handle_index_command(cmd: &IndexCommand) -> Result<()> { + match cmd { + IndexCommand::Build { + path, + force, + precise, + hidden, + no_ignore, + } => { + let root = path.canonicalize().unwrap_or_else(|_| path.clone()); + + let mut index = if *force { + Index::new() + } else { + load_index(&root)?.unwrap_or_else(Index::new) + }; + + let updated = index_directory(&root, &mut index, *hidden, *no_ignore)?; + + // Only run LSP resolution if files were updated or no calls resolved yet + let has_any_resolved = index.calls().any(|c| c.resolved.is_some()); + if *precise && (updated > 0 || !has_any_resolved) { + let resolved = resolve_calls_with_lsp(&root, &mut index)?; + if resolved > 0 { + eprintln!("Resolved {} calls with LSP", resolved); + } + } + + save_index(&index, &root)?; + + let file_count = index.files.len(); + let def_count = index.definitions().count(); + let call_count = index.calls().count(); + let resolved_count = index.calls().filter(|c| c.resolved.is_some()).count(); + + if updated > 0 || *precise { + eprintln!( + "Index updated: {} files ({} updated), {} definitions, {} calls ({} resolved)", + file_count, updated, def_count, call_count, resolved_count + ); + } else { + eprintln!( + "Index up to date: {} files, {} definitions, {} calls ({} resolved)", + file_count, def_count, call_count, resolved_count + ); + } + } + IndexCommand::Clear { path } => { + let root = path.canonicalize().unwrap_or_else(|_| path.clone()); + clear_index(&root)?; + eprintln!("Index cleared for: {}", root.display()); + } + IndexCommand::Status { path } => { + let root = path.canonicalize().unwrap_or_else(|_| path.clone()); + + match load_index(&root)? { + Some(index) => { + let file_count = index.files.len(); + let def_count = index.definitions().count(); + let call_count = index.calls().count(); + let import_count = index.imports().count(); + + println!("Index status for: {}", root.display()); + println!(" Files: {}", file_count); + println!(" Definitions: {}", def_count); + println!(" Calls: {}", call_count); + println!(" Imports: {}", import_count); + } + None => { + println!("No index found for: {}", root.display()); + } + } + } + } + + Ok(()) +} + +const INDEX_CHUNK_SIZE: usize = 256; + +fn index_directory(root: &Path, index: &mut Index, hidden: bool, no_ignore: bool) -> Result { + let pb = ProgressBar::new_spinner(); + pb.set_style( + ProgressStyle::default_spinner() + .template("{spinner:.green} {msg}") + .expect("valid template"), + ); + pb.set_message("scanning files..."); + pb.enable_steady_tick(std::time::Duration::from_millis(100)); + + let source_files: Vec<_> = ignore::WalkBuilder::new(root) + .hidden(!hidden) + .git_ignore(!no_ignore) + .ignore(!no_ignore) + .build() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().map(|ft| ft.is_file()).unwrap_or(false)) + .filter(|e| is_source_file(e.path())) + .filter(|e| { + e.path() + .extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| !ext.is_empty()) + }) + .collect(); + + pb.set_message(format!( + "found {} source files, checking for changes...", + source_files.len() + )); + + let stale_files: Vec<_> = source_files + .into_iter() + .filter_map(|entry| { + let path = entry.path(); + let rel_path = path.strip_prefix(root).unwrap_or(path); + let ext = path.extension().and_then(|e| e.to_str())?; + if ext.is_empty() { + return None; + } + let (mtime, size) = file_fingerprint(path).ok()?; + if index.is_stale(rel_path, mtime, size) { + Some(( + path.to_path_buf(), + rel_path.to_path_buf(), + ext.to_string(), + mtime, + size, + )) + } else { + None + } + }) + .collect(); + + pb.finish_and_clear(); + + let total = stale_files.len(); + if total == 0 { + return Ok(0); + } + + let pb = ProgressBar::new(total as u64); + pb.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} {msg}") + .expect("valid template") + .progress_chars("#>-"), + ); + pb.set_message("indexing..."); + + let mut updated = 0; + + for chunk in stale_files.chunks(INDEX_CHUNK_SIZE) { + let records: Vec = chunk + .par_iter() + .filter_map(|(path, rel_path, ext, mtime, size)| { + let extractor = match Extractor::from_extension(ext) { + Ok(e) => e, + Err(e) => { + debug!(ext = %ext, error = ?e, "no extractor for extension"); + return None; + } + }; + let source = fs::read(path).ok()?; + + let mut parser = tree_sitter::Parser::new(); + parser.set_language(extractor.language()).ok()?; + let tree = parser.parse(&source, None)?; + + let definitions = extractor.extract_definitions(&tree, &source, rel_path); + let calls = extractor.extract_calls(&tree, &source, rel_path); + let imports = extractor.extract_imports(&tree, &source, rel_path); + + pb.inc(1); + + Some(FileRecord { + path: rel_path.to_path_buf(), + mtime: *mtime, + size: *size, + definitions, + calls, + imports, + }) + }) + .collect(); + + updated += records.len(); + for record in records { + index.update(record); + } + } + + pb.finish_and_clear(); + Ok(updated) +} + +fn resolve_calls_with_lsp(root: &Path, index: &mut Index) -> Result { + let unresolved_count: usize = index + .files + .values() + .map(|r| r.calls.iter().filter(|c| c.resolved.is_none()).count()) + .sum(); + + if unresolved_count == 0 { + return Ok(0); + } + + let pb = ProgressBar::new(unresolved_count as u64); + pb.set_style( + ProgressStyle::default_bar() + .template("{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} {msg}") + .expect("valid template") + .progress_chars("#>-"), + ); + pb.set_message("resolving calls with LSP..."); + pb.enable_steady_tick(std::time::Duration::from_millis(100)); + + let mut lsp_resolver = LspResolver::with_progress(root, pb.clone()); + let mut resolved = 0; + + let file_paths: Vec<_> = index.files.keys().cloned().collect(); + + for file_path in file_paths { + let Some(record) = index.files.get(&file_path) else { + continue; + }; + + let calls_to_resolve: Vec<_> = record + .calls + .iter() + .enumerate() + .filter(|(_, c)| c.resolved.is_none()) + .map(|(i, c)| (i, c.clone())) + .collect(); + + if calls_to_resolve.is_empty() { + continue; + } + + let mut resolutions = Vec::new(); + + for (call_idx, call) in &calls_to_resolve { + pb.inc(1); + + if let Some(resolved_call) = lsp_resolver.resolve_call_full(call, index) { + resolutions.push((*call_idx, resolved_call)); + } + } + + if let Some(record) = index.files.get_mut(&file_path) { + for (call_idx, resolved_call) in resolutions { + if call_idx < record.calls.len() { + record.calls[call_idx].resolved = Some(resolved_call); + resolved += 1; + } + } + } + } + + pb.finish_and_clear(); + + let stats = lsp_resolver.stats(); + if stats.by_server.is_empty() { + eprintln!("LSP: no servers responded (check if LSP binaries are working)"); + } else { + eprintln!("LSP: {}", stats); + } + + Ok(resolved) +} + +fn format_definitions( + definitions: &[&glimpse::code::index::Definition], + root: &Path, +) -> Result { + use std::fmt::Write; + + let mut output = String::new(); + + for def in definitions { + let file_path = root.join(&def.file); + let content = fs::read_to_string(&file_path) + .with_context(|| format!("failed to read: {}", file_path.display()))?; + + let lines: Vec<&str> = content.lines().collect(); + let start = def.span.start_line.saturating_sub(1); + let end = def.span.end_line.min(lines.len()); + + writeln!(output, "## {}:{}", def.file.display(), def.name)?; + writeln!(output)?; + writeln!(output, "```")?; + for line in &lines[start..end] { + writeln!(output, "{}", line)?; + } + writeln!(output, "```")?; + writeln!(output)?; + } + + Ok(output) +} diff --git a/src/output.rs b/src/output.rs index ee305e4..e4af9dc 100644 --- a/src/output.rs +++ b/src/output.rs @@ -1,19 +1,14 @@ -use crate::{ - cli::{Cli, OutputFormat}, - tokenizer::TokenCounter, -}; +use std::fs; +use std::io::BufWriter; + use anyhow::Result; use base64::Engine; use num_format::{Buffer, Locale}; use printpdf::*; -use std::{fs, io::BufWriter, path::PathBuf}; -#[derive(Debug, Clone)] -pub struct FileEntry { - pub path: PathBuf, - pub content: String, - pub size: u64, -} +use glimpse::{FileEntry, OutputFormat, TokenCounter}; + +use crate::cli::Cli; pub fn generate_output( entries: &[FileEntry], @@ -73,7 +68,6 @@ pub fn generate_output( } } - // Add summary if xml_format { output.push_str("\n"); output.push_str(&format!("Total files: {}\n", entries.len())); @@ -117,7 +111,6 @@ pub fn display_token_counts(token_counter: TokenCounter, entries: &[FileEntry]) println!("Total tokens: {}", buf.as_str()); println!("\nBreakdown by file:"); - // Sorting breakdown let mut breakdown = token_count.breakdown; breakdown.sort_by(|(_, a), (_, b)| b.cmp(a)); let top_files = breakdown.iter().take(15); @@ -134,7 +127,6 @@ fn generate_tree(entries: &[FileEntry]) -> Result { let mut output = String::new(); let mut current_path = vec![]; - // Sort entries by path to ensure consistent output let mut sorted_entries = entries.to_vec(); sorted_entries.sort_by(|a, b| a.path.cmp(&b.path)); @@ -144,7 +136,6 @@ fn generate_tree(entries: &[FileEntry]) -> Result { for (i, component) in components.iter().enumerate() { if i >= current_path.len() || component != ¤t_path[i] { let prefix = " ".repeat(i); - // Always use └── for the last component of a file path if i == components.len() - 1 { output.push_str(&format!( "{}└── {}\n", @@ -152,7 +143,6 @@ fn generate_tree(entries: &[FileEntry]) -> Result { component.as_os_str().to_string_lossy() )); } else { - // For directories, check if it's the last one at this level let is_last_dir = sorted_entries .iter() .filter_map(|e| e.path.components().nth(i)) @@ -204,7 +194,6 @@ fn generate_files(entries: &[FileEntry], xml_format: bool) -> Result { } fn try_copy_with_osc52(content: &str) -> Result<(), Box> { - // OSC 52 sequence to set clipboard for special cases (like SSH) print!( "\x1B]52;c;{}\x07", base64::engine::general_purpose::STANDARD.encode(content) @@ -213,12 +202,10 @@ fn try_copy_with_osc52(content: &str) -> Result<(), Box> } pub fn handle_output(content: String, args: &Cli) -> Result<()> { - // Print to stdout if no other output method is specified if args.print { println!("{content}"); } - // Copy to clipboard if requested if !args.print { match arboard::Clipboard::new().and_then(|mut clipboard| clipboard.set_text(content.clone())) { Ok(_) => println!("Context prepared! Paste into your LLM of choice + Profit."), @@ -231,7 +218,6 @@ pub fn handle_output(content: String, args: &Cli) -> Result<()> { } } - // Write to file if path provided if let Some(file_path) = &args.file { fs::write(file_path, content)?; println!("Output written to: {}", file_path.display()); @@ -247,7 +233,6 @@ pub fn generate_pdf(entries: &[FileEntry], format: OutputFormat) -> Result { current_layer.use_text( @@ -270,7 +255,6 @@ pub fn generate_pdf(entries: &[FileEntry], format: OutputFormat) -> Result Result Result Result Vec { - vec![ - FileEntry { - path: PathBuf::from("src/main.rs"), - content: "fn main() {}\n".to_string(), - size: 12, - }, - FileEntry { - path: PathBuf::from("src/lib/utils.rs"), - content: "pub fn helper() {}\n".to_string(), - size: 18, - }, - ] - } - - #[test] - fn test_tree_output() { - let entries = create_test_entries(); - let tree = generate_tree(&entries).unwrap(); - let expected = "└── src/\n ├── lib/\n └── utils.rs\n └── main.rs\n"; - assert_eq!( - tree, expected, - "Tree output doesn't match expected structure" - ); - } - - #[test] - fn test_files_output() { - let entries = create_test_entries(); - let files = generate_files(&entries, false).unwrap(); - let expected = format!( - "\nFile: {}\n{}\n{}\n\nFile: {}\n{}\n{}\n", - "src/main.rs", - "=".repeat(48), - "fn main() {}\n", - "src/lib/utils.rs", - "=".repeat(48), - "pub fn helper() {}\n" - ); - assert_eq!(files, expected); - } - - #[test] - fn test_generate_output() { - let entries = create_test_entries(); - - // Test tree format - let tree_output = generate_output(&entries, OutputFormat::Tree, false, None).unwrap(); - assert!(tree_output.contains("Directory Structure:")); - assert!(tree_output.contains("src/")); - assert!(tree_output.contains("main.rs")); - - // Test files format - let files_output = generate_output(&entries, OutputFormat::Files, false, None).unwrap(); - assert!(files_output.contains("File Contents:")); - assert!(files_output.contains("fn main()")); - assert!(files_output.contains("pub fn helper()")); - - // Test both format - let both_output = generate_output(&entries, OutputFormat::Both, false, None).unwrap(); - assert!(both_output.contains("Directory Structure:")); - assert!(both_output.contains("File Contents:")); - } - - #[test] - fn test_xml_output() { - let entries = create_test_entries(); - - // Test XML tree format - let xml_tree_output = generate_output( - &entries, - OutputFormat::Tree, - true, - Some("test_project".to_string()), - ) - .unwrap(); - assert!(xml_tree_output.contains("")); - assert!(xml_tree_output.contains("")); - assert!(xml_tree_output.contains("")); - assert!(xml_tree_output.contains("")); - assert!(xml_tree_output.contains("")); - assert!(xml_tree_output.contains("")); - - // Test XML files format - let xml_files_output = generate_output( - &entries, - OutputFormat::Files, - true, - Some("test_project".to_string()), - ) - .unwrap(); - assert!(xml_files_output.contains("")); - assert!(xml_files_output.contains("")); - assert!(xml_files_output.contains("")); - assert!(xml_files_output.contains("")); - assert!(xml_files_output.contains("")); - assert!(xml_files_output.contains("")); - - // Test XML both format - let xml_both_output = generate_output( - &entries, - OutputFormat::Both, - true, - Some("test_project".to_string()), - ) - .unwrap(); - assert!(xml_both_output.contains("")); - assert!(xml_both_output.contains("")); - assert!(xml_both_output.contains("")); - assert!(xml_both_output.contains("")); - assert!(xml_both_output.contains("")); - assert!(xml_both_output.contains("")); - } - - #[test] - fn test_handle_output() { - use tempfile::tempdir; - - let temp_dir = tempdir().unwrap(); - let temp_file = temp_dir.path().join("test_output.txt"); - - let content = "Test content".to_string(); - let args = Cli { - config: false, - paths: vec![".".to_string()], - include: None, - only_include: None, - exclude: None, - max_size: Some(1000), - max_depth: Some(10), - output: Some(OutputFormat::Both), - file: Some(temp_file.clone()), - print: false, - threads: None, - hidden: false, - no_ignore: false, - no_tokens: true, - model: None, - tokenizer: Some(crate::cli::TokenizerType::Tiktoken), - tokenizer_file: None, - interactive: false, - pdf: None, - traverse_links: false, - link_depth: None, - config_path: false, - xml: false, - }; - - handle_output(content.clone(), &args).unwrap(); - - // Verify file content - let file_content = std::fs::read_to_string(temp_file).unwrap(); - assert_eq!(file_content, content); - } -} diff --git a/src/file_picker.rs b/src/tui/file_picker.rs similarity index 95% rename from src/file_picker.rs rename to src/tui/file_picker.rs index 1314b1c..2592cae 100644 --- a/src/file_picker.rs +++ b/src/tui/file_picker.rs @@ -1,3 +1,9 @@ +use std::{ + io::{self, stdout}, + path::{Path, PathBuf}, + time::Duration, +}; + use anyhow::Result; use crossterm::{ event::{self, Event, KeyCode}, @@ -12,11 +18,6 @@ use ratatui::{ widgets::{Block, Borders, List, ListItem, ListState, Paragraph}, Terminal, }; -use std::{ - io::{self, stdout}, - path::{Path, PathBuf}, - time::Duration, -}; struct TerminalGuard; @@ -121,15 +122,14 @@ impl FilePicker { .direction(Direction::Vertical) .constraints( [ - Constraint::Length(1), // Current folder - Constraint::Percentage(80), // File list - Constraint::Percentage(20), // Selected files + Constraint::Length(1), + Constraint::Percentage(80), + Constraint::Percentage(20), ] .as_ref(), ) .split(f.area()); - // Current folder let current_path = self.get_relative_path(&self.current_dir); let folder = Paragraph::new(format!("📁 {}", current_path.display())).block( Block::default() @@ -139,7 +139,6 @@ impl FilePicker { f.render_widget(folder, chunks[0]); - // File list let items: Vec = self .files .iter() @@ -164,7 +163,6 @@ impl FilePicker { f.render_stateful_widget(list, chunks[1], &mut self.list_state.clone()); - // Selected files let selected_items: Vec = self .selected_files .iter() @@ -205,7 +203,6 @@ impl FilePicker { let entry = entry?; let path = entry.path(); - // skip hidden files if not showing them if !self.show_hidden && path .file_name() @@ -214,7 +211,6 @@ impl FilePicker { continue; } - // skip ignored files if respecting ignore if self.respect_ignore { let path_clone = path.clone(); let mut builder = ignore::gitignore::GitignoreBuilder::new(path_clone); @@ -227,7 +223,6 @@ impl FilePicker { } } - // skip files larger than max size if path.is_file() && entry.metadata()?.len() > self.max_size { continue; } @@ -235,7 +230,6 @@ impl FilePicker { files.push(path); } - // sort directories first, then files files.sort_by(|a, b| { if a.is_dir() && !b.is_dir() { std::cmp::Ordering::Less @@ -309,7 +303,6 @@ impl FilePicker { if let Some(selected) = self.selected_list_state.selected() { if selected < self.selected_files.len() { self.selected_files.remove(selected); - // Adjust the selection after removal if self.selected_files.is_empty() { self.selected_list_state.select(None); } else { @@ -342,7 +335,6 @@ impl FilePicker { } } - // If this was previously empty, move the buffer cursor to the first item if before == 0 && !self.selected_files.is_empty() { self.selected_list_state.select(Some(0)); } diff --git a/src/tui/mod.rs b/src/tui/mod.rs new file mode 100644 index 0000000..9dce8c9 --- /dev/null +++ b/src/tui/mod.rs @@ -0,0 +1,3 @@ +pub mod file_picker; + +pub use file_picker::FilePicker; diff --git a/test_project/src/lib.rs b/test_project/src/lib.rs deleted file mode 100644 index a61949e..0000000 --- a/test_project/src/lib.rs +++ /dev/null @@ -1 +0,0 @@ -pub fn helper() { println!("Helper function"); } diff --git a/test_project/src/main.rs b/test_project/src/main.rs deleted file mode 100644 index a3e978a..0000000 --- a/test_project/src/main.rs +++ /dev/null @@ -1 +0,0 @@ -fn main() { println!("Hello, world!"); } diff --git a/tests/extraction.rs b/tests/extraction.rs new file mode 100644 index 0000000..066bfd7 --- /dev/null +++ b/tests/extraction.rs @@ -0,0 +1,639 @@ +use std::path::Path; + +use glimpse::code::extract::Extractor; +use glimpse::code::index::DefinitionKind; +use tree_sitter::Parser; + +fn parse_and_extract(lang: &str, source: &str) -> ExtractResult { + let extractor = Extractor::new(lang).expect(&format!("failed to load {}", lang)); + let mut parser = Parser::new(); + parser + .set_language(extractor.language()) + .expect("failed to set language"); + let tree = parser.parse(source, None).expect("failed to parse"); + let path = Path::new("test.src"); + + ExtractResult { + definitions: extractor.extract_definitions(&tree, source.as_bytes(), path), + calls: extractor.extract_calls(&tree, source.as_bytes(), path), + imports: extractor.extract_imports(&tree, source.as_bytes(), path), + } +} + +struct ExtractResult { + definitions: Vec, + calls: Vec, + imports: Vec, +} + +mod rust { + use super::*; + + const SAMPLE: &str = r#" +use std::fs; +use std::path::Path; +use crate::config::Config; + +fn main() { + let config = Config::load(); + helper(config); + println!("done"); +} + +fn helper(cfg: Config) { + cfg.validate(); + process(cfg); +} + +fn process(cfg: Config) { + fs::write("out.txt", cfg.data()); +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("rust", SAMPLE); + + assert_eq!(result.definitions.len(), 3); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"helper")); + assert!(names.contains(&"process")); + + for def in &result.definitions { + assert!(matches!(def.kind, DefinitionKind::Function)); + } + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("rust", SAMPLE); + + let callers: Vec<_> = result + .calls + .iter() + .filter_map(|c| { + c.caller + .as_ref() + .map(|caller| (caller.as_str(), c.callee.as_str())) + }) + .collect(); + + assert!(callers.contains(&("main", "helper"))); + assert!(callers.contains(&("helper", "process"))); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("rust", SAMPLE); + + assert!(!result.imports.is_empty()); + let paths: Vec<_> = result.imports.iter().map(|i| &i.module_path).collect(); + assert!(paths.iter().any(|p| p.contains("std"))); + } +} + +mod python { + use super::*; + + const SAMPLE: &str = r#" +import os +from pathlib import Path +from typing import Optional + +def main(): + config = load_config() + process(config) + +def load_config(): + return Config() + +def process(config): + save(config.data) + +class Config: + def __init__(self): + self.data = {} + + def validate(self): + return True +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("python", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"load_config")); + assert!(names.contains(&"process")); + assert!(names.contains(&"Config")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("python", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"load_config")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("python", SAMPLE); + + assert!(!result.imports.is_empty()); + let paths: Vec<_> = result.imports.iter().map(|i| &i.module_path).collect(); + assert!(paths + .iter() + .any(|p| p.contains("os") || p.contains("pathlib"))); + } +} + +mod typescript { + use super::*; + + const SAMPLE: &str = r#" +import { readFile } from 'fs'; +import path from 'path'; + +function main() { + const config = loadConfig(); + process(config); +} + +function loadConfig(): Config { + return new Config(); +} + +const process = (config: Config) => { + config.validate(); + save(config); +}; + +class Config { + validate() { + return true; + } +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("typescript", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadConfig")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("typescript", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadConfig")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("typescript", SAMPLE); + + assert!(!result.imports.is_empty()); + } +} + +mod javascript { + use super::*; + + const SAMPLE: &str = r#" +const fs = require('fs'); +import { join } from 'path'; + +function main() { + const data = loadData(); + process(data); +} + +function loadData() { + return fs.readFileSync('data.json'); +} + +const process = (data) => { + transform(data); +}; +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("javascript", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadData")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("javascript", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadData")); + assert!(callees.contains(&"process")); + } +} + +mod go { + use super::*; + + const SAMPLE: &str = r#" +package main + +import ( + "fmt" + "os" +) + +func main() { + config := loadConfig() + process(config) +} + +func loadConfig() *Config { + return &Config{} +} + +func process(cfg *Config) { + cfg.Validate() + save(cfg) +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("go", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadConfig")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("go", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadConfig")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("go", SAMPLE); + + assert!(!result.imports.is_empty()); + } +} + +mod c { + use super::*; + + const SAMPLE: &str = r#" +#include +#include "config.h" + +void process(Config* cfg); + +int main() { + Config* cfg = load_config(); + process(cfg); + return 0; +} + +Config* load_config() { + return malloc(sizeof(Config)); +} + +void process(Config* cfg) { + validate(cfg); + save(cfg); +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("c", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"load_config")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("c", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"load_config")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("c", SAMPLE); + + assert!(!result.imports.is_empty()); + } +} + +mod cpp { + use super::*; + + const SAMPLE: &str = r#" +#include +#include "config.hpp" + +class Processor { +public: + void run() { + process(); + } + + void process() { + helper(); + } +}; + +int main() { + Processor p; + p.run(); + return 0; +} + +void standalone() { + std::cout << "hello" << std::endl; +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("cpp", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"standalone")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("cpp", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"run")); + } +} + +mod java { + use super::*; + + const SAMPLE: &str = r#" +import java.util.List; +import com.example.Config; + +public class Main { + public static void main(String[] args) { + Config config = loadConfig(); + process(config); + } + + private static Config loadConfig() { + return new Config(); + } + + private static void process(Config cfg) { + cfg.validate(); + save(cfg); + } +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("java", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadConfig")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("java", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadConfig")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("java", SAMPLE); + + assert!(!result.imports.is_empty()); + } +} + +mod bash { + use super::*; + + const SAMPLE: &str = r#" +#!/bin/bash + +source ./config.sh + +main() { + load_config + process "$1" +} + +load_config() { + echo "loading" +} + +process() { + validate "$1" + save "$1" +} + +main "$@" +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("bash", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"load_config")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("bash", SAMPLE); + + // Bash treats all commands as calls + assert!(!result.calls.is_empty()); + } +} + +mod zig { + use super::*; + + const SAMPLE: &str = r#" +const std = @import("std"); + +pub fn main() void { + const config = loadConfig(); + process(config); +} + +fn loadConfig() Config { + return Config{}; +} + +fn process(cfg: Config) void { + cfg.validate(); + save(cfg); +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("zig", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadConfig")); + assert!(names.contains(&"process")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("zig", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadConfig")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("zig", SAMPLE); + + assert!(!result.imports.is_empty()); + assert!(result.imports.iter().any(|i| i.module_path == "std")); + } +} + +mod scala { + use super::*; + + const SAMPLE: &str = r#" +import scala.collection.mutable +import com.example.Config + +object Main { + def main(args: Array[String]): Unit = { + val config = loadConfig() + process(config) + } + + def loadConfig(): Config = { + new Config() + } + + def process(cfg: Config): Unit = { + cfg.validate() + save(cfg) + } +} + +class Processor { + def run(): Unit = { + helper() + } +} + +trait Validator { + def validate(): Boolean +} +"#; + + #[test] + #[ignore] + fn definitions() { + let result = parse_and_extract("scala", SAMPLE); + + let names: Vec<_> = result.definitions.iter().map(|d| d.name.as_str()).collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"loadConfig")); + assert!(names.contains(&"process")); + assert!(names.contains(&"Main")); + assert!(names.contains(&"Processor")); + assert!(names.contains(&"Validator")); + } + + #[test] + #[ignore] + fn calls() { + let result = parse_and_extract("scala", SAMPLE); + + let callees: Vec<_> = result.calls.iter().map(|c| c.callee.as_str()).collect(); + assert!(callees.contains(&"loadConfig")); + assert!(callees.contains(&"process")); + } + + #[test] + #[ignore] + fn imports() { + let result = parse_and_extract("scala", SAMPLE); + + assert!(!result.imports.is_empty()); + } +} diff --git a/tests/integration.rs b/tests/integration.rs new file mode 100644 index 0000000..66929ab --- /dev/null +++ b/tests/integration.rs @@ -0,0 +1,558 @@ +use std::collections::HashSet; +use std::fs; +use std::path::Path; + +use glimpse::code::extract::Extractor; +use glimpse::code::graph::CallGraph; +use glimpse::code::index::{file_fingerprint, FileRecord, Index}; +use tree_sitter::Parser; + +fn index_file(index: &mut Index, extractor: &Extractor, path: &Path, source: &str) { + let mut parser = Parser::new(); + parser.set_language(extractor.language()).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + let (mtime, size) = file_fingerprint(path).unwrap_or((0, source.len() as u64)); + + let record = FileRecord { + path: path.to_path_buf(), + mtime, + size, + definitions: extractor.extract_definitions(&tree, source.as_bytes(), path), + calls: extractor.extract_calls(&tree, source.as_bytes(), path), + imports: extractor.extract_imports(&tree, source.as_bytes(), path), + }; + + index.update(record); +} + +mod call_graph_tests { + use super::*; + use glimpse::code::index::{Call, Definition, DefinitionKind, FileRecord, Span}; + use tempfile::TempDir; + + fn make_span() -> Span { + Span { + start_byte: 0, + end_byte: 10, + start_line: 1, + end_line: 1, + } + } + + fn make_def(name: &str, file: &Path) -> Definition { + Definition { + name: name.to_string(), + kind: DefinitionKind::Function, + span: make_span(), + file: file.to_path_buf(), + signature: None, + } + } + + #[test] + fn test_graph_resolves_cross_file_calls() { + let dir = TempDir::new().unwrap(); + let file_a = dir.path().join("a.rs"); + let file_b = dir.path().join("b.rs"); + + fs::write(&file_a, "fn caller() { callee(); }").unwrap(); + fs::write(&file_b, "pub fn callee() {}").unwrap(); + + let mut index = Index::new(); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("caller", &file_a)], + calls: vec![Call { + qualifier: None, + callee: "callee".to_string(), + caller: Some("caller".to_string()), + span: make_span(), + file: file_a.clone(), + resolved: None, + }], + imports: vec![], + }); + + index.update(FileRecord { + path: file_b.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("callee", &file_b)], + calls: vec![], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + + let caller_id = graph.find_node("caller").unwrap(); + let callees = graph.get_callees(caller_id); + + assert_eq!(callees.len(), 1); + assert_eq!(callees[0].definition.name, "callee"); + assert_eq!(callees[0].definition.file, file_b); + } + + #[test] + fn test_graph_same_name_different_files() { + let dir = TempDir::new().unwrap(); + let file_a = dir.path().join("a.rs"); + let file_b = dir.path().join("b.rs"); + let file_main = dir.path().join("main.rs"); + + fs::write(&file_a, "fn helper() {}").unwrap(); + fs::write(&file_b, "fn helper() {}").unwrap(); + fs::write(&file_main, "fn main() { helper(); }").unwrap(); + + let mut index = Index::new(); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("helper", &file_a)], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: file_b.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("helper", &file_b)], + calls: vec![], + imports: vec![], + }); + + index.update(FileRecord { + path: file_main.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("main", &file_main)], + calls: vec![Call { + qualifier: None, + callee: "helper".to_string(), + caller: Some("main".to_string()), + span: make_span(), + file: file_main.clone(), + resolved: None, + }], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + + assert_eq!(graph.node_count(), 3); + + let a_id = graph.find_node_by_file_and_name(&file_a, "helper"); + let b_id = graph.find_node_by_file_and_name(&file_b, "helper"); + assert!(a_id.is_some()); + assert!(b_id.is_some()); + assert_ne!(a_id, b_id); + } + + #[test] + fn test_graph_transitive_through_resolution() { + let dir = TempDir::new().unwrap(); + let file_a = dir.path().join("a.rs"); + let file_b = dir.path().join("b.rs"); + let file_c = dir.path().join("c.rs"); + + fs::write(&file_a, "fn entry() { middle(); }").unwrap(); + fs::write(&file_b, "fn middle() { leaf(); }").unwrap(); + fs::write(&file_c, "fn leaf() {}").unwrap(); + + let mut index = Index::new(); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("entry", &file_a)], + calls: vec![Call { + qualifier: None, + callee: "middle".to_string(), + caller: Some("entry".to_string()), + span: make_span(), + file: file_a.clone(), + resolved: None, + }], + imports: vec![], + }); + + index.update(FileRecord { + path: file_b.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("middle", &file_b)], + calls: vec![Call { + qualifier: None, + callee: "leaf".to_string(), + caller: Some("middle".to_string()), + span: make_span(), + file: file_b.clone(), + resolved: None, + }], + imports: vec![], + }); + + index.update(FileRecord { + path: file_c.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("leaf", &file_c)], + calls: vec![], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + + let entry_id = graph.find_node("entry").unwrap(); + let transitive = graph.get_transitive_callees(entry_id); + + assert_eq!(transitive.len(), 2); + + let names: HashSet<_> = transitive + .iter() + .map(|n| n.definition.name.as_str()) + .collect(); + assert!(names.contains("middle")); + assert!(names.contains("leaf")); + + let order = graph.post_order_definitions(entry_id); + assert_eq!(order.len(), 3); + assert_eq!(order[0].name, "leaf"); + assert_eq!(order[1].name, "middle"); + assert_eq!(order[2].name, "entry"); + } + + #[test] + fn test_graph_unresolved_calls_ignored() { + let dir = TempDir::new().unwrap(); + let file_a = dir.path().join("a.rs"); + + fs::write(&file_a, "fn caller() { nonexistent(); }").unwrap(); + + let mut index = Index::new(); + + index.update(FileRecord { + path: file_a.clone(), + mtime: 0, + size: 0, + definitions: vec![make_def("caller", &file_a)], + calls: vec![Call { + qualifier: None, + callee: "nonexistent".to_string(), + caller: Some("caller".to_string()), + span: make_span(), + file: file_a.clone(), + resolved: None, + }], + imports: vec![], + }); + + let graph = CallGraph::build(&index); + + let caller_id = graph.find_node("caller").unwrap(); + let callees = graph.get_callees(caller_id); + + assert!(callees.is_empty()); + } +} + +mod language_extraction { + use super::*; + use tempfile::TempDir; + + #[test] + #[ignore] + fn test_rust_full_pipeline() { + let dir = TempDir::new().unwrap(); + let src = dir.path().join("src"); + fs::create_dir_all(&src).unwrap(); + + let main_rs = r#" +mod utils; + +fn main() { + let config = load_config(); + utils::process(config); +} + +fn load_config() -> Config { + Config::default() +} + +struct Config { + data: String, +} + +impl Default for Config { + fn default() -> Self { + Self { data: String::new() } + } +} +"#; + + let utils_rs = r#" +use crate::Config; + +pub fn process(cfg: Config) { + validate(&cfg); + save(&cfg); +} + +fn validate(cfg: &Config) { + check_data(cfg); +} + +fn check_data(_cfg: &Config) {} + +fn save(cfg: &Config) { + write_file(&cfg.data); +} + +fn write_file(_data: &str) {} +"#; + + fs::write(src.join("main.rs"), main_rs).unwrap(); + fs::write(src.join("utils.rs"), utils_rs).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("rust").unwrap(); + + index_file(&mut index, &extractor, &src.join("main.rs"), main_rs); + index_file(&mut index, &extractor, &src.join("utils.rs"), utils_rs); + + let graph = CallGraph::build(&index); + + assert!(graph.node_count() >= 5); + + if let Some(process_id) = graph.find_node("process") { + let callees = graph.get_callees(process_id); + let names: HashSet<_> = callees.iter().map(|n| n.definition.name.as_str()).collect(); + assert!(names.contains("validate") || names.contains("save")); + } + } + + #[test] + #[ignore] + fn test_python_full_pipeline() { + let dir = TempDir::new().unwrap(); + + let main_py = r#" +from utils import helper + +def main(): + data = load() + result = process(data) + helper(result) + +def load(): + return read_file() + +def read_file(): + return "data" + +def process(data): + return transform(data) + +def transform(x): + return x.upper() + +if __name__ == "__main__": + main() +"#; + + let utils_py = r#" +def helper(data): + print(data) + format_output(data) + +def format_output(s): + return s.strip() +"#; + + fs::write(dir.path().join("main.py"), main_py).unwrap(); + fs::write(dir.path().join("utils.py"), utils_py).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("python").unwrap(); + + index_file(&mut index, &extractor, &dir.path().join("main.py"), main_py); + index_file( + &mut index, + &extractor, + &dir.path().join("utils.py"), + utils_py, + ); + + let graph = CallGraph::build(&index); + + if let Some(main_id) = graph.find_node("main") { + let transitive = graph.get_transitive_callees(main_id); + assert!(!transitive.is_empty()); + } + } + + #[test] + #[ignore] + fn test_typescript_full_pipeline() { + let dir = TempDir::new().unwrap(); + + let main_ts = r#" +import { helper } from './utils'; + +function main() { + const result = processData(); + helper(result); +} + +function processData(): string { + return transform("input"); +} + +function transform(input: string): string { + return input.toUpperCase(); +} + +main(); +"#; + + let utils_ts = r#" +export function helper(data: string) { + console.log(data); + format(data); +} + +function format(s: string): string { + return s.trim(); +} +"#; + + fs::write(dir.path().join("main.ts"), main_ts).unwrap(); + fs::write(dir.path().join("utils.ts"), utils_ts).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("typescript").unwrap(); + + index_file(&mut index, &extractor, &dir.path().join("main.ts"), main_ts); + index_file( + &mut index, + &extractor, + &dir.path().join("utils.ts"), + utils_ts, + ); + + let graph = CallGraph::build(&index); + + if let Some(main_id) = graph.find_node("main") { + let callees = graph.get_callees(main_id); + assert!(!callees.is_empty()); + } + } + + #[test] + #[ignore] + fn test_go_full_pipeline() { + let dir = TempDir::new().unwrap(); + + let main_go = r#" +package main + +func main() { + config := loadConfig() + process(config) +} + +func loadConfig() *Config { + return &Config{} +} + +func process(cfg *Config) { + validate(cfg) + save(cfg) +} + +func validate(cfg *Config) {} + +func save(cfg *Config) {} + +type Config struct { + Name string +} +"#; + + fs::write(dir.path().join("main.go"), main_go).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("go").unwrap(); + + index_file(&mut index, &extractor, &dir.path().join("main.go"), main_go); + + let graph = CallGraph::build(&index); + + if let Some(main_id) = graph.find_node("main") { + let transitive = graph.get_transitive_callees(main_id); + assert!(transitive.len() >= 2); + } + } +} + +mod index_persistence { + use super::*; + use glimpse::code::index::{clear_index, load_index, save_index}; + use tempfile::TempDir; + + #[test] + fn test_save_and_load_preserves_data() { + let dir = TempDir::new().unwrap(); + + let mut index = Index::new(); + index.update(FileRecord { + path: dir.path().join("test.rs"), + mtime: 12345, + size: 100, + definitions: vec![], + calls: vec![], + imports: vec![], + }); + + save_index(&index, dir.path()).unwrap(); + + let loaded = load_index(dir.path()).unwrap().unwrap(); + assert_eq!(loaded.files.len(), 1); + assert!(loaded.get(&dir.path().join("test.rs")).is_some()); + + clear_index(dir.path()).unwrap(); + assert!(load_index(dir.path()).unwrap().is_none()); + } + + #[test] + fn test_index_staleness_detection() { + let dir = TempDir::new().unwrap(); + let file = dir.path().join("test.rs"); + + fs::write(&file, "fn test() {}").unwrap(); + + let (mtime, size) = file_fingerprint(&file).unwrap(); + + let mut index = Index::new(); + index.update(FileRecord { + path: file.clone(), + mtime, + size, + definitions: vec![], + calls: vec![], + imports: vec![], + }); + + assert!(!index.is_stale(&file, mtime, size)); + assert!(index.is_stale(&file, mtime + 1, size)); + assert!(index.is_stale(&file, mtime, size + 1)); + assert!(index.is_stale(&dir.path().join("other.rs"), mtime, size)); + } +} diff --git a/tests/lsp_integration.rs b/tests/lsp_integration.rs new file mode 100644 index 0000000..b581235 --- /dev/null +++ b/tests/lsp_integration.rs @@ -0,0 +1,972 @@ +use std::fs; +use std::path::{Path, PathBuf}; +use std::thread; +use std::time::Duration; + +use glimpse::code::extract::Extractor; +use glimpse::code::index::{file_fingerprint, Call, FileRecord, Index}; +use glimpse::code::lsp::LspResolver; +use tempfile::TempDir; +use tree_sitter::Parser; + +fn index_file(index: &mut Index, extractor: &Extractor, path: &Path, source: &str) { + let mut parser = Parser::new(); + parser.set_language(extractor.language()).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + let rel_path = path + .file_name() + .map(PathBuf::from) + .unwrap_or(path.to_path_buf()); + let (mtime, size) = file_fingerprint(path).unwrap_or((0, source.len() as u64)); + + let record = FileRecord { + path: rel_path, + mtime, + size, + definitions: extractor.extract_definitions(&tree, source.as_bytes(), path), + calls: extractor.extract_calls(&tree, source.as_bytes(), path), + imports: extractor.extract_imports(&tree, source.as_bytes(), path), + }; + + index.update(record); +} + +fn lsp_available(binary: &str) -> bool { + which::which(binary).is_ok() || { + let lsp_dir = dirs::data_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join("glimpse") + .join("lsp"); + lsp_dir.join(binary).exists() + } +} + +fn wait_for_lsp_ready(resolver: &mut LspResolver, calls: &[Call], index: &Index) { + for _ in 0..30 { + if let Some(call) = calls.first() { + if resolver.resolve_call(call, index).is_some() { + return; + } + } + thread::sleep(Duration::from_millis(500)); + } +} + +fn collect_calls(index: &Index) -> Vec { + index.calls().cloned().collect() +} + +mod rust_lsp { + use super::*; + + fn rust_analyzer_available() -> bool { + lsp_available("rust-analyzer") + } + + #[test] + #[ignore] + fn test_rust_same_file_definition() { + if !rust_analyzer_available() { + eprintln!("Skipping: rust-analyzer not available"); + return; + } + + let dir = TempDir::new().unwrap(); + let src = dir.path().join("src"); + fs::create_dir_all(&src).unwrap(); + + let main_rs = r#"fn main() { + helper(); +} + +fn helper() { + println!("hello"); +} +"#; + + let cargo_toml = r#"[package] +name = "test_project" +version = "0.1.0" +edition = "2021" +"#; + + fs::write(src.join("main.rs"), main_rs).unwrap(); + fs::write(dir.path().join("Cargo.toml"), cargo_toml).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("rust").unwrap(); + + let main_rs_path = src.join("main.rs"); + let rel_path = main_rs_path.strip_prefix(dir.path()).unwrap(); + + let mut parser = tree_sitter::Parser::new(); + parser.set_language(extractor.language()).unwrap(); + let tree = parser.parse(main_rs, None).unwrap(); + + let record = FileRecord { + path: rel_path.to_path_buf(), + mtime: 0, + size: main_rs.len() as u64, + definitions: extractor.extract_definitions(&tree, main_rs.as_bytes(), rel_path), + calls: extractor.extract_calls(&tree, main_rs.as_bytes(), rel_path), + imports: extractor.extract_imports(&tree, main_rs.as_bytes(), rel_path), + }; + + eprintln!("Index record path: {:?}", record.path); + eprintln!( + "Definitions: {:?}", + record + .definitions + .iter() + .map(|d| (&d.name, &d.file)) + .collect::>() + ); + eprintln!( + "Calls: {:?}", + record + .calls + .iter() + .map(|c| (&c.callee, &c.file, c.span.start_line)) + .collect::>() + ); + + index.update(record); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from Rust code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + eprintln!( + "Resolving call: callee={}, file={:?}, line={}", + call.callee, call.file, call.span.start_line + ); + let def = resolver.resolve_call(call, &index); + if def.is_none() { + eprintln!("Resolution failed! Check LSP logs."); + } + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + assert_eq!(def.span.start_line, 5); + } + } + + #[test] + #[ignore] + fn test_rust_cross_module_definition() { + if !rust_analyzer_available() { + eprintln!("Skipping: rust-analyzer not available"); + return; + } + + let dir = TempDir::new().unwrap(); + let src = dir.path().join("src"); + fs::create_dir_all(&src).unwrap(); + + let main_rs = r#"mod utils; + +fn main() { + utils::process(); +} +"#; + + let utils_rs = r#"pub fn process() { + println!("processing"); +} +"#; + + let cargo_toml = r#"[package] +name = "test_project" +version = "0.1.0" +edition = "2021" +"#; + + fs::write(src.join("main.rs"), main_rs).unwrap(); + fs::write(src.join("utils.rs"), utils_rs).unwrap(); + fs::write(dir.path().join("Cargo.toml"), cargo_toml).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("rust").unwrap(); + index_file(&mut index, &extractor, &src.join("main.rs"), main_rs); + index_file(&mut index, &extractor, &src.join("utils.rs"), utils_rs); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "process"); + assert!(process_call.is_some(), "Should find call to process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve utils::process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "process"); + } + } +} + +mod go_lsp { + use super::*; + + fn gopls_available() -> bool { + lsp_available("gopls") + } + + #[test] + #[ignore] + fn test_go_same_file_definition() { + if !gopls_available() { + eprintln!("Skipping: gopls not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_go = r#"package main + +func main() { + helper() +} + +func helper() { + println("hello") +} +"#; + + let go_mod = "module test_project\n\ngo 1.21\n"; + + fs::write(dir.path().join("main.go"), main_go).unwrap(); + fs::write(dir.path().join("go.mod"), go_mod).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("go").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.go"), main_go); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from Go code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } + + #[test] + #[ignore] + fn test_go_cross_package_definition() { + if !gopls_available() { + eprintln!("Skipping: gopls not available"); + return; + } + + let dir = TempDir::new().unwrap(); + let utils_dir = dir.path().join("utils"); + fs::create_dir_all(&utils_dir).unwrap(); + + let main_go = r#"package main + +import "test_project/utils" + +func main() { + utils.Process() +} +"#; + + let utils_go = r#"package utils + +func Process() { + println("processing") +} +"#; + + let go_mod = "module test_project\n\ngo 1.21\n"; + + fs::write(dir.path().join("main.go"), main_go).unwrap(); + fs::write(utils_dir.join("utils.go"), utils_go).unwrap(); + fs::write(dir.path().join("go.mod"), go_mod).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("go").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.go"), main_go); + index_file( + &mut index, + &extractor, + &utils_dir.join("utils.go"), + utils_go, + ); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "Process"); + assert!(process_call.is_some(), "Should find call to Process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve utils.Process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "Process"); + } + } +} + +mod python_lsp { + use super::*; + + fn pyright_available() -> bool { + lsp_available("pyright-langserver") || lsp_available("pyright") + } + + #[test] + #[ignore] + fn test_python_same_file_definition() { + if !pyright_available() { + eprintln!("Skipping: pyright not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_py = r#"def main(): + helper() + +def helper(): + print("hello") + +if __name__ == "__main__": + main() +"#; + + fs::write(dir.path().join("main.py"), main_py).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("python").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.py"), main_py); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from Python code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } + + #[test] + #[ignore] + fn test_python_cross_module_definition() { + if !pyright_available() { + eprintln!("Skipping: pyright not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_py = r#"from utils import process + +def main(): + process() + +if __name__ == "__main__": + main() +"#; + + let utils_py = r#"def process(): + print("processing") +"#; + + fs::write(dir.path().join("main.py"), main_py).unwrap(); + fs::write(dir.path().join("utils.py"), utils_py).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("python").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.py"), main_py); + index_file( + &mut index, + &extractor, + &dir.path().join("utils.py"), + utils_py, + ); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "process"); + assert!(process_call.is_some(), "Should find call to process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "process"); + } + } +} + +mod typescript_lsp { + use super::*; + + fn tsserver_available() -> bool { + lsp_available("typescript-language-server") || lsp_available("tsserver") + } + + #[test] + #[ignore] + fn test_typescript_same_file_definition() { + if !tsserver_available() { + eprintln!("Skipping: typescript-language-server not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_ts = r#"function main() { + helper(); +} + +function helper() { + console.log("hello"); +} + +main(); +"#; + + let tsconfig = r#"{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "strict": true + } +} +"#; + + fs::write(dir.path().join("main.ts"), main_ts).unwrap(); + fs::write(dir.path().join("tsconfig.json"), tsconfig).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("typescript").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.ts"), main_ts); + + let calls = collect_calls(&index); + assert!( + !calls.is_empty(), + "Should extract calls from TypeScript code" + ); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } + + #[test] + #[ignore] + fn test_typescript_cross_module_definition() { + if !tsserver_available() { + eprintln!("Skipping: typescript-language-server not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_ts = r#"import { process } from "./utils"; + +function main() { + process(); +} + +main(); +"#; + + let utils_ts = r#"export function process() { + console.log("processing"); +} +"#; + + let tsconfig = r#"{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "strict": true + } +} +"#; + + fs::write(dir.path().join("main.ts"), main_ts).unwrap(); + fs::write(dir.path().join("utils.ts"), utils_ts).unwrap(); + fs::write(dir.path().join("tsconfig.json"), tsconfig).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("typescript").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.ts"), main_ts); + index_file( + &mut index, + &extractor, + &dir.path().join("utils.ts"), + utils_ts, + ); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "process"); + assert!(process_call.is_some(), "Should find call to process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "process"); + } + } +} + +mod javascript_lsp { + use super::*; + + fn tsserver_available() -> bool { + lsp_available("typescript-language-server") || lsp_available("tsserver") + } + + #[test] + #[ignore] + fn test_javascript_same_file_definition() { + if !tsserver_available() { + eprintln!("Skipping: typescript-language-server not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_js = r#"function main() { + helper(); +} + +function helper() { + console.log("hello"); +} + +main(); +"#; + + let jsconfig = r#"{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs" + } +} +"#; + + fs::write(dir.path().join("main.js"), main_js).unwrap(); + fs::write(dir.path().join("jsconfig.json"), jsconfig).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("javascript").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.js"), main_js); + + let calls = collect_calls(&index); + assert!( + !calls.is_empty(), + "Should extract calls from JavaScript code" + ); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } +} + +mod c_lsp { + use super::*; + + fn clangd_available() -> bool { + lsp_available("clangd") + } + + #[test] + #[ignore] + fn test_c_same_file_definition() { + if !clangd_available() { + eprintln!("Skipping: clangd not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_c = r#"#include + +void helper(void); + +int main(void) { + helper(); + return 0; +} + +void helper(void) { + printf("hello\n"); +} +"#; + + let compile_commands = r#"[ + { + "directory": ".", + "command": "cc -c main.c", + "file": "main.c" + } +] +"#; + + fs::write(dir.path().join("main.c"), main_c).unwrap(); + fs::write(dir.path().join("compile_commands.json"), compile_commands).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("c").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.c"), main_c); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from C code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } + + #[test] + #[ignore] + fn test_c_cross_file_definition() { + if !clangd_available() { + eprintln!("Skipping: clangd not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_c = r#"#include "utils.h" + +int main(void) { + process(); + return 0; +} +"#; + + let utils_h = r#"#ifndef UTILS_H +#define UTILS_H + +void process(void); + +#endif +"#; + + let utils_c = r#"#include "utils.h" +#include + +void process(void) { + printf("processing\n"); +} +"#; + + let compile_commands = r#"[ + { + "directory": ".", + "command": "cc -c main.c", + "file": "main.c" + }, + { + "directory": ".", + "command": "cc -c utils.c", + "file": "utils.c" + } +] +"#; + + fs::write(dir.path().join("main.c"), main_c).unwrap(); + fs::write(dir.path().join("utils.h"), utils_h).unwrap(); + fs::write(dir.path().join("utils.c"), utils_c).unwrap(); + fs::write(dir.path().join("compile_commands.json"), compile_commands).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("c").unwrap(); + index_file(&mut index, &extractor, &dir.path().join("main.c"), main_c); + index_file(&mut index, &extractor, &dir.path().join("utils.c"), utils_c); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "process"); + assert!(process_call.is_some(), "Should find call to process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "process"); + } + } +} + +mod cpp_lsp { + use super::*; + + fn clangd_available() -> bool { + lsp_available("clangd") + } + + #[test] + #[ignore] + fn test_cpp_same_file_definition() { + if !clangd_available() { + eprintln!("Skipping: clangd not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_cpp = r#"#include + +void helper(); + +int main() { + helper(); + return 0; +} + +void helper() { + std::cout << "hello" << std::endl; +} +"#; + + let compile_commands = r#"[ + { + "directory": ".", + "command": "c++ -std=c++17 -c main.cpp", + "file": "main.cpp" + } +] +"#; + + fs::write(dir.path().join("main.cpp"), main_cpp).unwrap(); + fs::write(dir.path().join("compile_commands.json"), compile_commands).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("cpp").unwrap(); + index_file( + &mut index, + &extractor, + &dir.path().join("main.cpp"), + main_cpp, + ); + + let calls = collect_calls(&index); + assert!(!calls.is_empty(), "Should extract calls from C++ code"); + + let helper_call = calls.iter().find(|c| c.callee == "helper"); + assert!(helper_call.is_some(), "Should find call to helper()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = helper_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve helper() call"); + let def = def.unwrap(); + assert_eq!(def.name, "helper"); + } + } + + #[test] + #[ignore] + fn test_cpp_method_definition() { + if !clangd_available() { + eprintln!("Skipping: clangd not available"); + return; + } + + let dir = TempDir::new().unwrap(); + + let main_cpp = r#"#include + +class Processor { +public: + void process(); +}; + +void Processor::process() { + std::cout << "processing" << std::endl; +} + +int main() { + Processor p; + p.process(); + return 0; +} +"#; + + let compile_commands = r#"[ + { + "directory": ".", + "command": "c++ -std=c++17 -c main.cpp", + "file": "main.cpp" + } +] +"#; + + fs::write(dir.path().join("main.cpp"), main_cpp).unwrap(); + fs::write(dir.path().join("compile_commands.json"), compile_commands).unwrap(); + + let mut index = Index::new(); + let extractor = Extractor::new("cpp").unwrap(); + index_file( + &mut index, + &extractor, + &dir.path().join("main.cpp"), + main_cpp, + ); + + let calls = collect_calls(&index); + let process_call = calls.iter().find(|c| c.callee == "process"); + assert!(process_call.is_some(), "Should find call to process()"); + + let mut resolver = LspResolver::new(dir.path()); + wait_for_lsp_ready(&mut resolver, &calls, &index); + + if let Some(call) = process_call { + let def = resolver.resolve_call(call, &index); + assert!(def.is_some(), "LSP should resolve p.process() call"); + let def = def.unwrap(); + assert_eq!(def.name, "process"); + } + } +} + +mod lsp_availability { + use glimpse::code::lsp::check_lsp_availability; + + #[test] + fn test_check_lsp_availability_returns_results() { + let availability = check_lsp_availability(); + + assert!( + !availability.is_empty(), + "Should return availability for at least one language" + ); + + for (lang, info) in &availability { + println!( + " {}: available={}, location={:?}, can_install={}, method={:?}", + lang, info.available, info.location, info.can_auto_install, info.install_method + ); + } + } + + #[test] + fn test_rust_analyzer_detection() { + let availability = check_lsp_availability(); + + if let Some(info) = availability.get("rust") { + println!( + "rust-analyzer: available={}, location={:?}, can_install={}", + info.available, info.location, info.can_auto_install + ); + if info.available { + assert!( + info.location.is_some(), + "If available, should have location" + ); + } + } + } + + #[test] + fn test_npm_packages_can_be_installed() { + let availability = check_lsp_availability(); + + if let Some(info) = availability.get("typescript") { + println!( + "typescript-language-server: available={}, can_install={}, method={:?}", + info.available, info.can_auto_install, info.install_method + ); + if !info.available { + assert_eq!(info.install_method.as_deref(), Some("bun")); + } + } + + if let Some(info) = availability.get("python") { + println!( + "pyright: available={}, can_install={}, method={:?}", + info.available, info.can_auto_install, info.install_method + ); + if !info.available { + assert_eq!(info.install_method.as_deref(), Some("bun")); + } + } + } + + #[test] + fn test_go_package_can_be_installed() { + let availability = check_lsp_availability(); + + if let Some(info) = availability.get("go") { + println!( + "gopls: available={}, can_install={}, method={:?}", + info.available, info.can_auto_install, info.install_method + ); + if !info.available { + assert_eq!(info.install_method.as_deref(), Some("go")); + } + } + } +}