From fbb5e617202f3970fae01ed3b2df88e6816b25f2 Mon Sep 17 00:00:00 2001 From: Calum Smith Date: Thu, 9 Oct 2025 13:11:43 -0400 Subject: [PATCH] =?UTF-8?q?Improve=20names=20and=20types=20in=20Rust=20std?= =?UTF-8?q?lib=20scraper=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two main fixes: 1. Names: all pages except for modules' index pages ignored what module they were from, and were just prepended with `std::`. This meant there were 13 pages named `std::Iter`, about structs named `Iter` from different modules. It also meant that things outside modules, e.g. primitive types, were prefixed with `std::`, naming the page on `bool` as `std::bool`, although it can't be referenced that way in code. This also means that there are two pages named `std::char` - one for the module, and one for the primitive type `char`. This prefixes everything in a module with that module's path, and does not prefix primitives. It also includes submodules in the path. For example: std::fn → fn std::Iter → std::option::Iter std::MetadataExt → std::os::linux::fs::MetadataExt 2. Types: almost everything was filed in `std`, with the exception of modules' index pages and primitive types. This meant there were over 30,000 pages in the `std` type, and many types for modules with only one page in them. This creates types for each module which include all submodules, and files anything not in a module, e.g. primitive types, in `std`. For example: std::bool / std::bool → std / bool std / std::Iter → std::option / std::option::Iter std / std::MetadataExt → std::os / std::os::linux::fs::MetadataExt --- lib/docs/filters/rust/entries.rb | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/lib/docs/filters/rust/entries.rb b/lib/docs/filters/rust/entries.rb index 5a60aae785..4effefd668 100644 --- a/lib/docs/filters/rust/entries.rb +++ b/lib/docs/filters/rust/entries.rb @@ -22,7 +22,15 @@ def get_name else at_css('main h1').at_css('button')&.remove name = at_css('main h1').content.remove(/\A.+\s/).remove('⎘') - mod = slug.split('/').first + path = slug.split('/') + if path.length == 2 + # Anything in the standard library but not in a `std::*` module is + # globally available, not `use`d from the `std` crate, so we don't + # prepend `std::` to their name. + return name + end + path.pop if path.last == 'index' + mod = path[0..-2].join('::') name.prepend("#{mod}::") unless name.start_with?(mod) name end @@ -38,13 +46,12 @@ def get_type elsif slug.start_with?('error_codes') 'Compiler Errors' else - path = name.split('::') - heading = at_css('main h1').content.strip - if path.length > 2 || (path.length == 2 && (heading.start_with?('Module') || heading.start_with?('Primitive'))) - path[0..1].join('::') - else - path[0] - end + path = slug.split('/') + # Discard the filename, and use the first two path components as the + # type, or one if there is only one. This means anything in a module + # `std::foo` or submodule `std::foo::bar` gets type `std::foo`, and + # things not in modules, e.g. primitive types, get type `std`. + path[0..-2][0..1].join('::') end end