Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 136 additions & 15 deletions rust/rubydex/src/indexing/rbs_indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ use crate::indexing::local_graph::LocalGraph;
use crate::model::comment::Comment;
use crate::model::definitions::{
ClassDefinition, ConstantDefinition, Definition, DefinitionFlags, ExtendDefinition, GlobalVariableDefinition,
IncludeDefinition, Mixin, ModuleDefinition, PrependDefinition,
IncludeDefinition, MethodDefinition, Mixin, ModuleDefinition, PrependDefinition,
};
use crate::model::visibility::Visibility;
use crate::model::document::Document;
use crate::model::ids::{DefinitionId, NameId, ReferenceId, UriId};
use crate::model::name::{Name, ParentScope};
Expand Down Expand Up @@ -147,14 +148,49 @@ impl<'a> RBSIndexer<'a> {
}
}

fn collect_comments(comment_node: Option<CommentNode>) -> Vec<Comment> {
comment_node
.into_iter()
.map(|comment| {
let text = Self::bytes_to_string(comment.string().as_bytes());
Comment::new(Offset::from_rbs_location(&comment.location()), text)
})
.collect()
#[allow(clippy::cast_possible_truncation)]
fn collect_comments(&self, comment_node: Option<CommentNode>) -> Vec<Comment> {
let Some(comment_node) = comment_node else {
return Vec::new();
};

let location = comment_node.location();
let start = location.start().cast_unsigned() as usize;
let end = location.end().cast_unsigned() as usize;

let source_bytes = self.source.as_bytes();

// Find the indentation level by scanning backwards from start to the line beginning
let mut indent_start = start;
while indent_start > 0 && source_bytes[indent_start - 1] == b' ' {
indent_start -= 1;
}
let indent_len = start - indent_start;

let comment_block = &self.source[start..end];
let lines: Vec<&str> = comment_block.split('\n').collect();

let mut comments = Vec::with_capacity(lines.len());
let mut current_offset = start as u32;

for (i, line) in lines.iter().enumerate() {
let line_text = if i == 0 {
line.to_string()
} else {
line[indent_len..].to_string()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can panic if a subsequent comment line has fewer leading spaces than indent_len. For example, if the first line has 4 spaces of indentation but a later line accidentally starts at column 0, line[4..] would panic with an out-of-bounds string slice.

Might be safer to use line.get(indent_len..).unwrap_or(line) here:

Suggested change
line[indent_len..].to_string()
line.get(indent_len..).unwrap_or(line).to_string()

};

let line_bytes = line_text.len() as u32;
let offset = Offset::new(current_offset, current_offset + line_bytes);
comments.push(Comment::new(offset, line_text));

// Advance past current line + newline + indentation for the next line
if i < lines.len() - 1 {
current_offset += line_bytes + 1 + indent_len as u32;
}
}

comments
}

fn register_definition(
Expand Down Expand Up @@ -201,7 +237,7 @@ impl Visit for RBSIndexer<'_> {
let offset = Offset::from_rbs_location(&class_node.location());
let name_offset = Offset::from_rbs_location(&type_name.name().location());

let comments = Self::collect_comments(class_node.comment());
let comments = self.collect_comments(class_node.comment());

let superclass_ref = class_node.super_class().as_ref().map(|super_node| {
let type_name = super_node.name();
Expand Down Expand Up @@ -241,7 +277,7 @@ impl Visit for RBSIndexer<'_> {
let offset = Offset::from_rbs_location(&module_node.location());
let name_offset = Offset::from_rbs_location(&type_name.name().location());

let comments = Self::collect_comments(module_node.comment());
let comments = self.collect_comments(module_node.comment());

let definition = Definition::Module(Box::new(ModuleDefinition::new(
name_id,
Expand Down Expand Up @@ -271,7 +307,7 @@ impl Visit for RBSIndexer<'_> {
let name_id = self.index_type_name(&type_name, nesting_name_id);
let offset = Offset::from_rbs_location(&constant_node.location());

let comments = Self::collect_comments(constant_node.comment());
let comments = self.collect_comments(constant_node.comment());

let definition = Definition::Constant(Box::new(ConstantDefinition::new(
name_id,
Expand All @@ -293,7 +329,7 @@ impl Visit for RBSIndexer<'_> {
.intern_string(Self::bytes_to_string(global_node.name().name()));
let offset = Offset::from_rbs_location(&global_node.location());

let comments = Self::collect_comments(global_node.comment());
let comments = self.collect_comments(global_node.comment());

let definition = Definition::GlobalVariable(Box::new(GlobalVariableDefinition::new(
str_id,
Expand Down Expand Up @@ -324,6 +360,42 @@ impl Visit for RBSIndexer<'_> {
Mixin::Extend(ExtendDefinition::new(ref_id))
});
}

fn visit_method_definition_node(&mut self, def_node: &node::MethodDefinitionNode) {
// Singleton and singleton_instance methods are not indexed for now.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should have the mechanisms for indexing singleton methods. Not sure what singleton_instance method means.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’ve been working on instance method support because it's easier to finish.

I’ll work on singleton method support once I finish indexing instance methods.
singleton_instance is for module_function. (Yeah, that makes things even more complicated. 😄)

if matches!(
def_node.kind(),
node::MethodDefinitionKind::Singleton | node::MethodDefinitionKind::SingletonInstance
) {
return;
}

let str_id = self.local_graph.intern_string(format!("{}()", Self::bytes_to_string(def_node.name().name())));
let offset = Offset::from_rbs_location(&def_node.location());
let comments = self.collect_comments(def_node.comment());
let flags = Self::flags(&def_node.annotations());
let lexical_nesting_id = self.parent_lexical_scope_id();

// RBS carries visibility directly on the node; Unspecified defaults to Public.
let visibility = match def_node.visibility() {
node::MethodDefinitionVisibility::Private => Visibility::Private,
node::MethodDefinitionVisibility::Public | node::MethodDefinitionVisibility::Unspecified => Visibility::Public,
};

let definition = Definition::Method(Box::new(MethodDefinition::new(
str_id,
self.uri_id,
offset,
comments,
flags,
lexical_nesting_id,
Vec::new(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was reflecting on this for the parameters. I think we have a few options, but my suggestion is that we make parameters in MethodDefinition an enum.

enum MethodParameters {
  Simple(Signature),
  Overloaded(Vec<Signature>),
}

// where Signature just holds the vector of parameters for convenience

The reason I feel like this is the best option is because a lot of methods in RBS will not have overloads. If we were to create a separate RBSMethodDefinition that always accepted a vector of signatures, we would pay the price of allocating vectors for a bunch of RBS methods that don't have overloads.

I believe we'll reduce our memory usage and keep the implementation simple by simply allowing method defs to accept a single signature or overloads.

visibility,
None,
)));

self.register_definition(definition, lexical_nesting_id);
}
}

#[cfg(test)]
Expand All @@ -332,6 +404,8 @@ mod tests {

use crate::indexing::rbs_indexer::RBSIndexer;
use crate::model::definitions::DefinitionFlags;
use crate::model::visibility::Visibility;
use crate::offset::Offset;
use crate::test_utils::LocalGraphTest;
use crate::{
assert_def_comments_eq, assert_def_mixins_eq, assert_def_name_eq, assert_def_name_offset_eq, assert_def_str_eq,
Expand Down Expand Up @@ -526,7 +600,7 @@ mod tests {

assert_definition_at!(&context, "2:1-2:12", Constant, |def| {
assert_def_name_eq!(&context, def, "FOO");
assert_def_comments_eq!(&context, def, ["Some documentation\n"]);
assert_def_comments_eq!(&context, def, ["# Some documentation"]);
});
}

Expand All @@ -551,7 +625,7 @@ mod tests {

assert_definition_at!(&context, "4:1-4:14", GlobalVariable, |def| {
assert_def_str_eq!(&context, def, "$bar");
assert_def_comments_eq!(&context, def, ["A global variable\n"]);
assert_def_comments_eq!(&context, def, ["# A global variable"]);
});
}

Expand Down Expand Up @@ -714,4 +788,51 @@ mod tests {
assert!(def.flags().contains(DefinitionFlags::DEPRECATED));
});
}

#[test]
fn split_multiline_comments() {
let source = "# First line\n# Second line\n# Third line\nclass Foo\nend";
let signature = node::parse(source.as_bytes()).expect("Failed to parse RBS source");
let decl = signature.declarations().iter().next().expect("Expected a declaration");
let Node::Class(class_node) = decl else {
panic!("Expected a class declaration");
};

let indexer = RBSIndexer::new("file:///foo.rbs".to_string(), source);
let comments = indexer.collect_comments(class_node.comment());

assert_eq!(comments.len(), 3);
assert_eq!(comments[0].string(), "# First line");
assert_eq!(comments[0].offset(), &Offset::new(0, 12));
assert_eq!(comments[1].string(), "# Second line");
assert_eq!(comments[1].offset(), &Offset::new(13, 26));
assert_eq!(comments[2].string(), "# Third line");
assert_eq!(comments[2].offset(), &Offset::new(27, 39));
}

#[test]
fn index_method_definition() {
let context = index_source({
"
class Foo
def bar: () -> void
end
"
});

assert_no_local_diagnostics!(&context);

assert_definition_at!(&context, "1:1-3:4", Class, |class_def| {
assert_def_name_eq!(&context, class_def, "Foo");
assert_eq!(1, class_def.members().len());

assert_definition_at!(&context, "2:3-2:22", Method, |def| {
assert_def_str_eq!(&context, def, "bar()");
assert!(def.receiver().is_none());
assert_eq!(def.visibility(), &Visibility::Public);
assert_eq!(class_def.id(), def.lexical_nesting_id().unwrap());
assert_eq!(class_def.members()[0], def.id());
});
});
}
}
Loading