From ed2d8f9a0292f3c6ecf5de2468d18ee10853af69 Mon Sep 17 00:00:00 2001 From: Ram Nadella Date: Sun, 29 Jun 2025 19:27:14 -0400 Subject: [PATCH 1/5] feat: add ruff parser backend alongside tree-sitter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add parser trait abstraction to support multiple backends - Implement ruff-based Python parser as alternative to tree-sitter - Add --parser CLI flag to select backend (default: tree-sitter) - Update pylight_devtools with parser comparison tool - Both parsers extract identical symbols with minor column differences - All tests updated to use SymbolIndex::default() This enables experimenting with ruff's parser while maintaining full compatibility with the existing tree-sitter implementation. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- pylight/Cargo.lock | 318 +++++++++++++++++- pylight/Cargo.toml | 4 + pylight/benches/parallel_indexing.rs | 6 +- pylight/src/bin/pylight.rs | 28 +- pylight/src/bin/pylight_devtools.rs | 130 ++++++- pylight/src/index/symbol_index.rs | 20 +- pylight/src/index/updater.rs | 15 +- pylight/src/lsp/server.rs | 5 +- pylight/src/parser/mod.rs | 35 +- pylight/src/parser/ruff.rs | 206 ++++++++++++ pylight/src/parser/trait.rs | 13 + pylight/src/parser/tree_sitter.rs | 46 +++ pylight/static/devtools.html | 93 +++++ .../tests/integration/test_file_watcher.rs | 10 +- pylight/tests/integration/test_ignore_dirs.rs | 4 +- pylight/tests/integration/test_index.rs | 8 +- pylight/tests/integration/test_lsp.rs | 4 +- pylight/tests/parallel_indexing_test.rs | 4 +- 18 files changed, 905 insertions(+), 44 deletions(-) create mode 100644 pylight/src/parser/ruff.rs create mode 100644 pylight/src/parser/trait.rs create mode 100644 pylight/src/parser/tree_sitter.rs diff --git a/pylight/Cargo.lock b/pylight/Cargo.lock index 255543b..3e71acc 100644 --- a/pylight/Cargo.lock +++ b/pylight/Cargo.lock @@ -232,6 +232,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" dependencies = [ "memchr", + "regex-automata 0.4.9", "serde", ] @@ -253,6 +254,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +[[package]] +name = "castaway" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.0.106" @@ -373,6 +383,20 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "static_assertions", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -418,7 +442,7 @@ dependencies = [ "clap", "criterion-plot", "is-terminal", - "itertools", + "itertools 0.10.5", "num-traits", "once_cell", "oorandom", @@ -439,7 +463,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] @@ -742,6 +766,26 @@ dependencies = [ "thread_local", ] +[[package]] +name = "getopts" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.3.2" @@ -1024,6 +1068,18 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "is-macro" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "is-terminal" version = "0.4.16" @@ -1050,6 +1106,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -1343,6 +1408,44 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1388,6 +1491,15 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "pretty_assertions" version = "1.4.1" @@ -1432,6 +1544,10 @@ dependencies = [ "pretty_assertions", "radix_trie", "rayon", + "ruff_python_ast", + "ruff_python_parser", + "ruff_source_file", + "ruff_text_size", "rustyline", "serde", "serde_json", @@ -1472,6 +1588,36 @@ dependencies = [ "nibble_vec", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.16", +] + [[package]] name = "rayon" version = "1.10.0" @@ -1565,12 +1711,80 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "ruff_python_ast" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff?tag=0.12.1#32c54189cb45a9d0409a1140265ce6d5fcec214d" +dependencies = [ + "aho-corasick", + "bitflags 2.9.0", + "compact_str", + "is-macro", + "itertools 0.14.0", + "memchr", + "ruff_python_trivia", + "ruff_source_file", + "ruff_text_size", + "rustc-hash", + "thiserror", +] + +[[package]] +name = "ruff_python_parser" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff?tag=0.12.1#32c54189cb45a9d0409a1140265ce6d5fcec214d" +dependencies = [ + "bitflags 2.9.0", + "bstr", + "compact_str", + "memchr", + "ruff_python_ast", + "ruff_python_trivia", + "ruff_text_size", + "rustc-hash", + "static_assertions", + "unicode-ident", + "unicode-normalization", + "unicode_names2", +] + +[[package]] +name = "ruff_python_trivia" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff?tag=0.12.1#32c54189cb45a9d0409a1140265ce6d5fcec214d" +dependencies = [ + "itertools 0.14.0", + "ruff_source_file", + "ruff_text_size", + "unicode-ident", +] + +[[package]] +name = "ruff_source_file" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff?tag=0.12.1#32c54189cb45a9d0409a1140265ce6d5fcec214d" +dependencies = [ + "memchr", + "ruff_text_size", +] + +[[package]] +name = "ruff_text_size" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff?tag=0.12.1#32c54189cb45a9d0409a1140265ce6d5fcec214d" + [[package]] name = "rustc-demangle" version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustix" version = "1.0.5" @@ -1715,6 +1929,12 @@ dependencies = [ "libc", ] +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "smallvec" version = "1.14.0" @@ -1737,6 +1957,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -1772,12 +1998,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" dependencies = [ "fastrand 2.3.0", - "getrandom", + "getrandom 0.3.2", "once_cell", "rustix", "windows-sys 0.59.0", ] +[[package]] +name = "thiserror" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -1820,6 +2066,21 @@ dependencies = [ "serde_json", ] +[[package]] +name = "tinyvec" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" version = "1.44.1" @@ -1936,6 +2197,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-normalization" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-segmentation" version = "1.12.0" @@ -1948,6 +2218,28 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +[[package]] +name = "unicode_names2" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd" +dependencies = [ + "phf", + "unicode_names2_generator", +] + +[[package]] +name = "unicode_names2_generator" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91e5b84611016120197efd7dc93ef76774f4e084cd73c9fb3ea4a86c570c56e" +dependencies = [ + "getopts", + "log", + "phf_codegen", + "rand", +] + [[package]] name = "url" version = "2.5.4" @@ -2311,6 +2603,26 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zerofrom" version = "0.1.6" diff --git a/pylight/Cargo.toml b/pylight/Cargo.toml index 5f0a30e..4b94617 100644 --- a/pylight/Cargo.toml +++ b/pylight/Cargo.toml @@ -26,6 +26,10 @@ futures-lite = "1.13" rayon = "1.9.0" tree-sitter = "0.20.10" tree-sitter-python = "0.20.4" +ruff_python_parser = { git = "https://github.com/astral-sh/ruff", tag = "0.12.1" } +ruff_python_ast = { git = "https://github.com/astral-sh/ruff", tag = "0.12.1" } +ruff_text_size = { git = "https://github.com/astral-sh/ruff", tag = "0.12.1" } +ruff_source_file = { git = "https://github.com/astral-sh/ruff", tag = "0.12.1" } fuzzy-matcher = "0.3" bincode = "1.3" flate2 = "1.0" diff --git a/pylight/benches/parallel_indexing.rs b/pylight/benches/parallel_indexing.rs index 85548f6..76a1bb1 100644 --- a/pylight/benches/parallel_indexing.rs +++ b/pylight/benches/parallel_indexing.rs @@ -69,7 +69,7 @@ fn benchmark_parallel_indexing(c: &mut Criterion) { pool.install(|| { // Create a fresh index for each iteration - let index = Arc::new(SymbolIndex::new()); + let index = Arc::new(SymbolIndex::default()); // Parse and index files // Return the result to prevent optimization @@ -107,7 +107,7 @@ fn benchmark_parallel_vs_sequential(c: &mut Criterion) { .unwrap(); pool.install(|| { - let index = Arc::new(SymbolIndex::new()); + let index = Arc::new(SymbolIndex::default()); index.parse_and_index_files(subset.clone()).unwrap() }); }); @@ -122,7 +122,7 @@ fn benchmark_parallel_vs_sequential(c: &mut Criterion) { .unwrap(); pool.install(|| { - let index = Arc::new(SymbolIndex::new()); + let index = Arc::new(SymbolIndex::default()); index.parse_and_index_files(subset.clone()).unwrap() }); }); diff --git a/pylight/src/bin/pylight.rs b/pylight/src/bin/pylight.rs index 386dfd2..93f7364 100644 --- a/pylight/src/bin/pylight.rs +++ b/pylight/src/bin/pylight.rs @@ -1,7 +1,7 @@ //! Pylight LSP server binary use clap::Parser; -use pylight::{LspServer, Result}; +use pylight::{parser::ParserBackend, LspServer, Result}; use tracing_subscriber::EnvFilter; #[derive(Parser, Debug)] @@ -18,6 +18,10 @@ struct Args { /// Search query in standalone mode #[arg(short, long, requires = "standalone")] query: Option, + + /// Parser backend to use (tree-sitter or ruff) + #[arg(long, default_value = "tree-sitter")] + parser: String, } fn main() -> Result<()> { @@ -53,18 +57,32 @@ fn main() -> Result<()> { let args = Args::parse(); + // Parse the parser backend + let parser_backend = ParserBackend::from_str(&args.parser).ok_or_else(|| { + pylight::Error::Parse(format!( + "Invalid parser backend: {}. Valid options: tree-sitter, ruff", + args.parser + )) + })?; + + tracing::info!("Using parser backend: {:?}", parser_backend); + if args.standalone { // Standalone mode for testing - run_standalone(args.directory, args.query) + run_standalone(args.directory, args.query, parser_backend) } else { // LSP server mode tracing::info!("Starting pylight LSP server"); - let server = LspServer::new()?; + let server = LspServer::new(parser_backend)?; server.run() } } -fn run_standalone(directory: Option, query: Option) -> Result<()> { +fn run_standalone( + directory: Option, + query: Option, + parser_backend: ParserBackend, +) -> Result<()> { use pylight::{SearchEngine, SymbolIndex}; use std::sync::Arc; @@ -73,7 +91,7 @@ fn run_standalone(directory: Option, query: Option) tracing::info!("Indexing directory: {}", dir.display()); // Create index and use the parallel index_workspace method - let index = Arc::new(SymbolIndex::new()); + let index = Arc::new(SymbolIndex::new(parser_backend)); index.clone().index_workspace(&dir)?; if let Some(query) = query { diff --git a/pylight/src/bin/pylight_devtools.rs b/pylight/src/bin/pylight_devtools.rs index 5fcd704..a9038c5 100644 --- a/pylight/src/bin/pylight_devtools.rs +++ b/pylight/src/bin/pylight_devtools.rs @@ -1,3 +1,4 @@ +use pylight::parser::{create_parser, ParserBackend}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::io::{BufRead, BufReader, Read, Write}; @@ -88,8 +89,8 @@ fn main() { .respond( response.with_header( tiny_http::Header::from_bytes( - &b"Content-Type"[..], - &b"application/json"[..], + b"Content-Type", + b"application/json", ) .unwrap(), ), @@ -180,6 +181,56 @@ fn main() { ) .unwrap(); } + ("POST", "/compare-parsers") => { + let mut content = String::new(); + request.as_reader().read_to_string(&mut content).unwrap(); + + let req: serde_json::Value = match serde_json::from_str(&content) { + Ok(req) => req, + Err(e) => { + error!("Failed to parse compare request: {}", e); + let response = Response::from_string( + json!({"status": "error", "message": format!("Invalid request: {e}")}) + .to_string(), + ) + .with_status_code(400); + request + .respond( + response.with_header( + tiny_http::Header::from_bytes( + b"Content-Type", + b"application/json", + ) + .unwrap(), + ), + ) + .unwrap(); + continue; + } + }; + + let test_code = req.get("code").and_then(|v| v.as_str()).unwrap_or( + r#" +def test_function(): + pass + +class TestClass: + def method(self): + pass +"#, + ); + + let response = compare_parsers(test_code); + request + .respond( + Response::from_string(serde_json::to_string(&response).unwrap()) + .with_header( + tiny_http::Header::from_bytes(b"Content-Type", b"application/json") + .unwrap(), + ), + ) + .unwrap(); + } _ => { warn!("404 Not Found: {} {}", method, url); request @@ -441,3 +492,78 @@ fn read_lsp_message(instance: &mut PylightInstance) -> Result { Ok(response) } + +fn compare_parsers(code: &str) -> Value { + use std::path::Path; + + let test_path = Path::new("test.py"); + + // Parse with tree-sitter + let ts_start = Instant::now(); + let ts_parser = create_parser(ParserBackend::TreeSitter).unwrap(); + let ts_symbols = ts_parser.parse_file(test_path, code).unwrap_or_default(); + let ts_duration = ts_start.elapsed(); + + // Parse with ruff + let ruff_start = Instant::now(); + let ruff_parser = create_parser(ParserBackend::Ruff).unwrap(); + let ruff_symbols = ruff_parser.parse_file(test_path, code).unwrap_or_default(); + let ruff_duration = ruff_start.elapsed(); + + // Compare results + let ts_symbol_info: Vec = ts_symbols + .iter() + .map(|s| { + json!({ + "name": s.name, + "kind": format!("{:?}", s.kind), + "line": s.line, + "column": s.column, + "container": s.container_name.as_ref(), + }) + }) + .collect(); + + let ruff_symbol_info: Vec = ruff_symbols + .iter() + .map(|s| { + json!({ + "name": s.name, + "kind": format!("{:?}", s.kind), + "line": s.line, + "column": s.column, + "container": s.container_name.as_ref(), + }) + }) + .collect(); + + let same_count = ts_symbols.len() == ruff_symbols.len(); + let same_symbols = ts_symbols + .iter() + .zip(ruff_symbols.iter()) + .all(|(ts, ruff)| { + ts.name == ruff.name && ts.kind == ruff.kind && ts.container_name == ruff.container_name + }); + + json!({ + "tree_sitter": { + "symbols": ts_symbol_info, + "count": ts_symbols.len(), + "duration_ms": ts_duration.as_secs_f64() * 1000.0, + }, + "ruff": { + "symbols": ruff_symbol_info, + "count": ruff_symbols.len(), + "duration_ms": ruff_duration.as_secs_f64() * 1000.0, + }, + "comparison": { + "same_count": same_count, + "same_symbols": same_symbols, + "differences": if !same_symbols { + Some("Symbols differ in name, kind, or container") + } else { + None + } + } + }) +} diff --git a/pylight/src/index/symbol_index.rs b/pylight/src/index/symbol_index.rs index 5d8540e..cc37a54 100644 --- a/pylight/src/index/symbol_index.rs +++ b/pylight/src/index/symbol_index.rs @@ -1,6 +1,7 @@ //! Symbol index implementation -use crate::{PythonParser, Result, Symbol}; +use crate::parser::{create_parser, ParserBackend}; +use crate::{Result, Symbol}; use parking_lot::RwLock; use rayon::prelude::*; use std::collections::HashMap; @@ -13,6 +14,7 @@ pub struct SymbolIndex { symbols: Arc>>>>, all_symbols: Arc>>>, file_metadata: Arc>>, + parser_backend: ParserBackend, } #[derive(Debug, Clone)] @@ -22,14 +24,20 @@ pub struct FileMetadata { } impl SymbolIndex { - pub fn new() -> Self { + pub fn new(parser_backend: ParserBackend) -> Self { Self { symbols: Arc::new(RwLock::new(HashMap::new())), all_symbols: Arc::new(RwLock::new(Vec::new())), file_metadata: Arc::new(RwLock::new(HashMap::new())), + parser_backend, } } + /// Get the parser backend used by this index + pub fn parser_backend(&self) -> ParserBackend { + self.parser_backend + } + pub fn add_file(&self, path: PathBuf, symbols: Vec) -> Result<()> { // Canonicalize the path for consistent comparison let canonical_path = path.canonicalize().unwrap_or(path.clone()); @@ -238,8 +246,8 @@ impl SymbolIndex { thread_id ); - // Each thread gets its own parser - let mut parser = match PythonParser::new() { + // Create parser instance for this thread + let parser = match create_parser(self.parser_backend) { Ok(p) => p, Err(e) => { tracing::warn!("Failed to create parser: {}", e); @@ -335,7 +343,7 @@ impl SymbolIndex { impl Default for SymbolIndex { fn default() -> Self { - Self::new() + Self::new(ParserBackend::TreeSitter) } } @@ -345,7 +353,7 @@ mod tests { #[test] fn test_index_creation() { - let index = SymbolIndex::new(); + let index = SymbolIndex::new(ParserBackend::TreeSitter); assert_eq!(index.get_all_symbols().len(), 0); } } diff --git a/pylight/src/index/updater.rs b/pylight/src/index/updater.rs index 802d437..10f8029 100644 --- a/pylight/src/index/updater.rs +++ b/pylight/src/index/updater.rs @@ -1,7 +1,8 @@ //! Index update coordinator that handles file change events +use crate::parser::create_parser; use crate::watcher::{FileEvent, FileEventHandler}; -use crate::{PythonParser, Result, SymbolIndex}; +use crate::{Result, SymbolIndex}; use parking_lot::RwLock; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -48,8 +49,8 @@ impl IndexUpdater { let start = Instant::now(); - // Create a parser for this file - let mut parser = PythonParser::new()?; + // Create a parser for this file using the index's backend + let parser = create_parser(self.index.parser_backend())?; // Read and parse the file match std::fs::read_to_string(path) { @@ -86,8 +87,8 @@ impl IndexUpdater { info!("Starting full workspace re-index"); let start = Instant::now(); - // Create a new temporary index - let new_index = Arc::new(SymbolIndex::new()); + // Create a new temporary index with the same parser backend + let new_index = Arc::new(SymbolIndex::new(self.index.parser_backend())); // Index the workspace into the new index new_index.clone().index_workspace(&self.workspace_root)?; @@ -208,7 +209,7 @@ mod tests { #[test] fn test_updater_creation() { let temp_dir = TempDir::new().unwrap(); - let index = Arc::new(SymbolIndex::new()); + let index = Arc::new(SymbolIndex::default()); let updater = IndexUpdater::new(index, temp_dir.path().to_path_buf()); assert_eq!(*updater.state.read(), UpdaterState::Idle); @@ -217,7 +218,7 @@ mod tests { #[test] fn test_should_watch_python_files() { let temp_dir = TempDir::new().unwrap(); - let index = Arc::new(SymbolIndex::new()); + let index = Arc::new(SymbolIndex::default()); let updater = IndexUpdater::new(index, temp_dir.path().to_path_buf()); assert!(updater.should_watch(Path::new("test.py"))); diff --git a/pylight/src/lsp/server.rs b/pylight/src/lsp/server.rs index 8577ee3..cf5854c 100644 --- a/pylight/src/lsp/server.rs +++ b/pylight/src/lsp/server.rs @@ -1,6 +1,7 @@ //! Core LSP server implementation use crate::index::updater::IndexUpdater; +use crate::parser::ParserBackend; use crate::watcher::{FileWatcher, WatcherConfig}; use crate::{Error, Result, SearchEngine, SymbolIndex}; use lsp_server::{Connection, Message, RequestId, Response}; @@ -21,12 +22,12 @@ pub struct LspServer { } impl LspServer { - pub fn new() -> Result { + pub fn new(parser_backend: ParserBackend) -> Result { let (connection, _io_threads) = Connection::stdio(); Ok(Self { connection, - index: Arc::new(SymbolIndex::new()), + index: Arc::new(SymbolIndex::new(parser_backend)), search_engine: Arc::new(SearchEngine::new()), workspace_root: None, cancelled_requests: Arc::new(Mutex::new(HashSet::new())), diff --git a/pylight/src/parser/mod.rs b/pylight/src/parser/mod.rs index bd56406..46900f6 100644 --- a/pylight/src/parser/mod.rs +++ b/pylight/src/parser/mod.rs @@ -1,9 +1,42 @@ -//! Python parsing module using tree-sitter +//! Python parsing module with multiple backend support pub mod extractor; pub mod python_parser; +pub mod ruff; +pub mod r#trait; +pub mod tree_sitter; pub use python_parser::PythonParser; +pub use r#trait::Parser; +pub use ruff::RuffParser; +pub use tree_sitter::TreeSitterParser; + +use crate::Result; +use std::sync::Arc; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ParserBackend { + TreeSitter, + Ruff, +} + +impl ParserBackend { + pub fn from_str(s: &str) -> Option { + match s.to_lowercase().as_str() { + "tree-sitter" | "treesitter" => Some(Self::TreeSitter), + "ruff" => Some(Self::Ruff), + _ => None, + } + } +} + +/// Create a parser instance based on the specified backend +pub fn create_parser(backend: ParserBackend) -> Result> { + match backend { + ParserBackend::TreeSitter => Ok(Arc::new(TreeSitterParser::new()?)), + ParserBackend::Ruff => Ok(Arc::new(RuffParser::new())), + } +} #[cfg(test)] mod tests; diff --git a/pylight/src/parser/ruff.rs b/pylight/src/parser/ruff.rs new file mode 100644 index 0000000..6d25b40 --- /dev/null +++ b/pylight/src/parser/ruff.rs @@ -0,0 +1,206 @@ +//! Ruff-based Python parser implementation + +use crate::{Error, Result, Symbol, SymbolKind}; +use ruff_python_ast::{ + visitor::{self, Visitor}, + Mod, Stmt, +}; +use ruff_python_parser::{parse, Mode}; +use ruff_source_file::{LineIndex, SourceCode}; +use std::path::{Path, PathBuf}; + +use super::r#trait::Parser; + +pub struct RuffParser; + +impl RuffParser { + pub fn new() -> Self { + Self + } +} + +impl Default for RuffParser { + fn default() -> Self { + Self::new() + } +} + +#[derive(Clone)] +enum Context { + Class(String), + Function(String), +} + +struct SymbolExtractor<'a> { + symbols: &'a mut Vec, + file_path: PathBuf, + context_stack: Vec, + source_code: SourceCode<'a, 'a>, +} + +impl<'a> SymbolExtractor<'a> { + fn new( + symbols: &'a mut Vec, + file_path: PathBuf, + source: &'a str, + line_index: &'a LineIndex, + ) -> Self { + Self { + symbols, + file_path, + context_stack: Vec::new(), + source_code: SourceCode::new(source, line_index), + } + } + + fn determine_function_kind(&self) -> SymbolKind { + // Check if we're inside a class context + for ctx in &self.context_stack { + if matches!(ctx, Context::Class(_)) { + return SymbolKind::Method; + } + } + + // Check if we're inside a function context (nested function) + if self + .context_stack + .iter() + .any(|ctx| matches!(ctx, Context::Function(_))) + { + SymbolKind::NestedFunction + } else { + SymbolKind::Function + } + } + + fn determine_class_kind(&self) -> SymbolKind { + if self.context_stack.is_empty() { + SymbolKind::Class + } else { + SymbolKind::NestedClass + } + } + + fn build_container_name(&self) -> Option { + if self.context_stack.is_empty() { + None + } else { + Some( + self.context_stack + .iter() + .map(|ctx| match ctx { + Context::Class(name) | Context::Function(name) => name.clone(), + }) + .collect::>() + .join("."), + ) + } + } + + fn get_line_column(&self, offset: u32) -> (usize, usize) { + let location = self + .source_code + .source_location(offset.into(), ruff_source_file::PositionEncoding::Utf8); + // Both line and column are 1-based in Ruff + (location.line.get(), location.character_offset.get()) + } +} + +impl<'a> Visitor<'a> for SymbolExtractor<'a> { + fn visit_stmt(&mut self, stmt: &'a Stmt) { + match stmt { + Stmt::FunctionDef(func_def) => { + let name_str = func_def.name.to_string(); + // Note: func_def.is_async is available to check if it's async + let kind = self.determine_function_kind(); + let container = self.build_container_name(); + // Use the name's range to get the line of the actual 'def' keyword + let (line, column) = self.get_line_column(func_def.name.range.start().to_u32()); + + // Get module name from file path + let module_path = self + .file_path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("unknown") + .to_string(); + + let mut symbol = + Symbol::new(name_str.clone(), kind, self.file_path.clone(), line, column); + + if let Some(container) = container { + symbol = symbol.with_container(container); + } + + symbol = symbol.with_module(module_path); + + self.symbols.push(symbol); + + self.context_stack.push(Context::Function(name_str)); + visitor::walk_stmt(self, stmt); + self.context_stack.pop(); + } + Stmt::ClassDef(class_def) => { + let name_str = class_def.name.to_string(); + let kind = self.determine_class_kind(); + let container = self.build_container_name(); + // Use the name's range to get the line of the actual 'class' keyword + let (line, column) = self.get_line_column(class_def.name.range.start().to_u32()); + + // Get module name from file path + let module_path = self + .file_path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("unknown") + .to_string(); + + let mut symbol = + Symbol::new(name_str.clone(), kind, self.file_path.clone(), line, column); + + if let Some(container) = container { + symbol = symbol.with_container(container); + } + + symbol = symbol.with_module(module_path); + + self.symbols.push(symbol); + + self.context_stack.push(Context::Class(name_str)); + visitor::walk_stmt(self, stmt); + self.context_stack.pop(); + } + _ => visitor::walk_stmt(self, stmt), + } + } +} + +impl Parser for RuffParser { + fn parse_file(&self, file_path: &Path, source: &str) -> Result> { + let line_index = LineIndex::from_source_text(source); + + let parsed = parse(source, Mode::Module.into()) + .map_err(|e| Error::Parse(format!("Ruff parse error: {e:?}")))?; + + let mut symbols = Vec::new(); + let mut extractor = + SymbolExtractor::new(&mut symbols, file_path.to_path_buf(), source, &line_index); + + match parsed.syntax() { + Mod::Module(module) => { + for stmt in &module.body { + extractor.visit_stmt(stmt); + } + } + Mod::Expression(_) => { + // Not handling expression mode + } + } + + Ok(symbols) + } + + fn backend_name(&self) -> &'static str { + "ruff" + } +} diff --git a/pylight/src/parser/trait.rs b/pylight/src/parser/trait.rs new file mode 100644 index 0000000..479aa7f --- /dev/null +++ b/pylight/src/parser/trait.rs @@ -0,0 +1,13 @@ +//! Parser trait abstraction for different Python parsing backends + +use crate::{Result, Symbol}; +use std::path::Path; + +/// Trait for Python parsers that can extract symbols from source code +pub trait Parser: Send + Sync { + /// Parse a Python file and extract symbols + fn parse_file(&self, path: &Path, content: &str) -> Result>; + + /// Get the name of the parser backend + fn backend_name(&self) -> &'static str; +} diff --git a/pylight/src/parser/tree_sitter.rs b/pylight/src/parser/tree_sitter.rs new file mode 100644 index 0000000..0dfa55e --- /dev/null +++ b/pylight/src/parser/tree_sitter.rs @@ -0,0 +1,46 @@ +//! Tree-sitter based Python parser implementation + +use crate::{Error, Result, Symbol}; +use std::path::Path; +use std::sync::Mutex; +use tree_sitter::Parser as TSParser; + +use super::extractor::SymbolExtractor; +use super::r#trait::Parser; + +pub struct TreeSitterParser { + parser: Mutex, +} + +impl TreeSitterParser { + pub fn new() -> Result { + let mut parser = TSParser::new(); + parser + .set_language(tree_sitter_python::language()) + .map_err(|e| Error::Parse(format!("Failed to set language: {e}")))?; + Ok(Self { + parser: Mutex::new(parser), + }) + } +} + +impl Parser for TreeSitterParser { + fn parse_file(&self, path: &Path, content: &str) -> Result> { + let mut parser = self.parser.lock().unwrap(); + + let tree = parser + .parse(content, None) + .ok_or_else(|| Error::Parse("Failed to parse file".to_string()))?; + + let mut symbols = Vec::new(); + let mut extractor = + SymbolExtractor::new(content.as_bytes(), path.to_path_buf(), &mut symbols); + + extractor.visit_node(tree.root_node())?; + Ok(symbols) + } + + fn backend_name(&self) -> &'static str { + "tree-sitter" + } +} diff --git a/pylight/static/devtools.html b/pylight/static/devtools.html index 728f575..c663d6a 100644 --- a/pylight/static/devtools.html +++ b/pylight/static/devtools.html @@ -183,6 +183,36 @@

Pylight DevTools

+ +
+

Parser Comparison

+
+ Compare how tree-sitter and ruff parsers extract symbols from Python code. +
+ + + + + +
+