From b8d9b260baae46f8cabb40bdb0fc586e8d11b050 Mon Sep 17 00:00:00 2001 From: Thomas Marshall Date: Wed, 4 Feb 2026 12:51:57 +0000 Subject: [PATCH 1/2] Add diff functionality --- rust/rubydex/src/diff.rs | 122 +++++++++++++++++++++++++++++++++++++++ rust/rubydex/src/lib.rs | 1 + 2 files changed, 123 insertions(+) create mode 100644 rust/rubydex/src/diff.rs diff --git a/rust/rubydex/src/diff.rs b/rust/rubydex/src/diff.rs new file mode 100644 index 00000000..dc09e648 --- /dev/null +++ b/rust/rubydex/src/diff.rs @@ -0,0 +1,122 @@ +use std::collections::HashSet; + +use crate::model::{ + graph::Graph, + ids::{DeclarationId, DefinitionId, NameId, ReferenceId}, + name::NameRef, +}; + +#[derive(Debug, Default, Clone, PartialEq, Eq)] +pub struct GraphDiff { + pub added_declarations: HashSet, + pub removed_declarations: HashSet, + pub changed_declarations: HashSet, + pub added_definitions: HashSet, + pub removed_definitions: HashSet, + pub added_references: HashSet, + pub removed_references: HashSet, + pub added_names: HashSet, + pub removed_names: HashSet, + pub changed_names: HashSet, +} + +impl GraphDiff { + #[must_use] + pub fn is_empty(&self) -> bool { + self.added_declarations.is_empty() + && self.removed_declarations.is_empty() + && self.changed_declarations.is_empty() + && self.added_definitions.is_empty() + && self.removed_definitions.is_empty() + && self.added_references.is_empty() + && self.removed_references.is_empty() + && self.added_names.is_empty() + && self.removed_names.is_empty() + && self.changed_names.is_empty() + } +} + +fn declarations_equal(a: &Graph, b: &Graph, id: DeclarationId) -> bool { + let (Some(decl_a), Some(decl_b)) = (a.declarations().get(&id), b.declarations().get(&id)) else { + return false; + }; + + let Some(ns_a) = decl_a.as_namespace() else { + return true; + }; + let Some(ns_b) = decl_b.as_namespace() else { + return true; + }; + + ns_a.members() == ns_b.members() + && ns_a.ancestors().iter().collect::>() == ns_b.ancestors().iter().collect::>() + && ns_a.descendants() == ns_b.descendants() + && ns_a.singleton_class() == ns_b.singleton_class() +} + +fn names_equal(a: &Graph, b: &Graph, id: NameId) -> bool { + let (Some(name_a), Some(name_b)) = (a.names().get(&id), b.names().get(&id)) else { + return false; + }; + + match (name_a, name_b) { + (NameRef::Resolved(a), NameRef::Resolved(b)) => a.declaration_id() == b.declaration_id(), + (NameRef::Unresolved(_), NameRef::Unresolved(_)) => true, + _ => false, + } +} + +#[must_use] +pub fn diff(a: &Graph, b: &Graph) -> Option { + let mut result = GraphDiff::default(); + + for id in a.declarations().keys() { + if !b.declarations().contains_key(id) { + result.removed_declarations.insert(*id); + } else if !declarations_equal(a, b, *id) { + result.changed_declarations.insert(*id); + } + } + for id in b.declarations().keys() { + if !a.declarations().contains_key(id) { + result.added_declarations.insert(*id); + } + } + + for id in a.definitions().keys() { + if !b.definitions().contains_key(id) { + result.removed_definitions.insert(*id); + } + } + for id in b.definitions().keys() { + if !a.definitions().contains_key(id) { + result.added_definitions.insert(*id); + } + } + + for id in a.constant_references().keys() { + if !b.constant_references().contains_key(id) { + result.removed_references.insert(*id); + } + } + for id in b.constant_references().keys() { + if !a.constant_references().contains_key(id) { + result.added_references.insert(*id); + } + } + + for id in a.names().keys() { + if !b.names().contains_key(id) { + result.removed_names.insert(*id); + } else if !names_equal(a, b, *id) { + result.changed_names.insert(*id); + } + } + for id in b.names().keys() { + if !a.names().contains_key(id) { + result.added_names.insert(*id); + } + } + + if result.is_empty() { None } else { Some(result) } +} diff --git a/rust/rubydex/src/lib.rs b/rust/rubydex/src/lib.rs index ead014aa..867db696 100644 --- a/rust/rubydex/src/lib.rs +++ b/rust/rubydex/src/lib.rs @@ -1,5 +1,6 @@ pub mod compile_assertions; pub mod diagnostic; +pub mod diff; pub mod errors; pub mod indexing; pub mod job_queue; From fd3871dd416742dbc2e035a6e588be310bc28f7f Mon Sep 17 00:00:00 2001 From: Thomas Marshall Date: Wed, 4 Feb 2026 12:52:06 +0000 Subject: [PATCH 2/2] Add diff entry point --- rust/rubydex/examples/diff.rs | 203 ++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 rust/rubydex/examples/diff.rs diff --git a/rust/rubydex/examples/diff.rs b/rust/rubydex/examples/diff.rs new file mode 100644 index 00000000..9c1432a4 --- /dev/null +++ b/rust/rubydex/examples/diff.rs @@ -0,0 +1,203 @@ +use std::process::Command; + +use clap::Parser; +use rubydex::{ + diff::{self, GraphDiff}, + indexing, listing, + model::graph::Graph, + resolution::Resolver, +}; + +#[derive(Parser, Debug)] +#[command(name = "diff", about = "Diff two git refs to test graph equality")] +struct Args { + #[arg(help = "Path to git repository")] + path: String, + + #[arg(help = "First git ref (e.g., main, HEAD~1, abc123)")] + ref_a: String, + + #[arg(help = "Second git ref")] + ref_b: String, +} + +fn checkout(path: &str, git_ref: &str) -> Result<(), String> { + let output = Command::new("git") + .args(["checkout", git_ref]) + .current_dir(path) + .output() + .map_err(|e| format!("Failed to run git checkout: {e}"))?; + + if !output.status.success() { + return Err(format!( + "git checkout {} failed: {}", + git_ref, + String::from_utf8_lossy(&output.stderr) + )); + } + Ok(()) +} + +fn get_current_ref(path: &str) -> Result { + let output = Command::new("git") + .args(["rev-parse", "HEAD"]) + .current_dir(path) + .output() + .map_err(|e| format!("Failed to run git rev-parse: {e}"))?; + + if !output.status.success() { + return Err("git rev-parse HEAD failed".to_string()); + } + Ok(String::from_utf8_lossy(&output.stdout).trim().to_string()) +} + +fn build_graph(path: &str) -> Graph { + let (file_paths, _) = listing::collect_file_paths(vec![path.to_string()]); + + let mut graph = Graph::new(); + indexing::index_files(&mut graph, file_paths); + + let mut resolver = Resolver::new(&mut graph); + resolver.resolve_all(); + + graph +} + +fn print_diff(diff: &GraphDiff, graph_a: &Graph, graph_b: &Graph) { + if !diff.added_declarations.is_empty() { + println!("\nAdded declarations ({}):", diff.added_declarations.len()); + for id in &diff.added_declarations { + if let Some(decl) = graph_b.declarations().get(id) { + println!(" + {}", decl.name()); + } + } + } + + if !diff.removed_declarations.is_empty() { + println!("\nRemoved declarations ({}):", diff.removed_declarations.len()); + for id in &diff.removed_declarations { + if let Some(decl) = graph_a.declarations().get(id) { + println!(" - {}", decl.name()); + } + } + } + + if !diff.changed_declarations.is_empty() { + println!("\nChanged declarations ({}):", diff.changed_declarations.len()); + for id in &diff.changed_declarations { + if let Some(decl) = graph_a.declarations().get(id) { + println!(" ~ {}", decl.name()); + print_declaration_diff(graph_a, graph_b, *id); + } + } + } + + if !diff.added_definitions.is_empty() { + println!("\nAdded definitions: {}", diff.added_definitions.len()); + } + if !diff.removed_definitions.is_empty() { + println!("Removed definitions: {}", diff.removed_definitions.len()); + } + + if !diff.added_references.is_empty() { + println!("\nAdded references: {}", diff.added_references.len()); + } + if !diff.removed_references.is_empty() { + println!("Removed references: {}", diff.removed_references.len()); + } + + if !diff.added_names.is_empty() { + println!("\nAdded names: {}", diff.added_names.len()); + } + if !diff.removed_names.is_empty() { + println!("Removed names: {}", diff.removed_names.len()); + } + if !diff.changed_names.is_empty() { + println!("Changed names: {}", diff.changed_names.len()); + } +} + +fn print_declaration_diff(graph_a: &Graph, graph_b: &Graph, id: rubydex::model::ids::DeclarationId) { + let (Some(decl_a), Some(decl_b)) = (graph_a.declarations().get(&id), graph_b.declarations().get(&id)) else { + return; + }; + + let (Some(ns_a), Some(ns_b)) = (decl_a.as_namespace(), decl_b.as_namespace()) else { + return; + }; + + if ns_a.members() != ns_b.members() { + for (str_id, decl_id) in ns_a.members() { + if !ns_b.members().contains_key(str_id) + && let Some(decl) = graph_a.declarations().get(decl_id) + { + println!(" - {}", decl.name()); + } + } + for (str_id, decl_id) in ns_b.members() { + if !ns_a.members().contains_key(str_id) + && let Some(decl) = graph_b.declarations().get(decl_id) + { + println!(" + {}", decl.name()); + } + } + } + + let anc_a = ns_a.ancestors(); + let anc_b = ns_b.ancestors(); + let ancestors_a: Vec<_> = anc_a.iter().collect(); + let ancestors_b: Vec<_> = anc_b.iter().collect(); + if ancestors_a != ancestors_b { + println!(" ancestors differ"); + } + + if ns_a.descendants() != ns_b.descendants() { + println!(" descendants differ"); + } + + if ns_a.singleton_class() != ns_b.singleton_class() { + println!(" singleton_class differs"); + } +} + +fn main() -> Result<(), String> { + let args = Args::parse(); + + let original_ref = get_current_ref(&args.path)?; + + println!("Checking out {}...", args.ref_a); + checkout(&args.path, &args.ref_a)?; + println!("Building graph for {}...", args.ref_a); + let graph_a = build_graph(&args.path); + println!( + " {} declarations, {} definitions", + graph_a.declarations().len(), + graph_a.definitions().len() + ); + + println!("Checking out {}...", args.ref_b); + checkout(&args.path, &args.ref_b)?; + println!("Building graph for {}...", args.ref_b); + let graph_b = build_graph(&args.path); + println!( + " {} declarations, {} definitions", + graph_b.declarations().len(), + graph_b.definitions().len() + ); + + println!("Restoring {original_ref}..."); + checkout(&args.path, &original_ref)?; + + println!("\nComparing graphs..."); + match diff::diff(&graph_a, &graph_b) { + Some(diff) => { + println!("Graphs differ!"); + print_diff(&diff, &graph_a, &graph_b); + } + None => { + println!("Graphs are identical!"); + } + } + + Ok(()) +}