From 72564a8f9e817a81af3a5a6422e75de08e5ccae6 Mon Sep 17 00:00:00 2001 From: spamguy Date: Mon, 21 Apr 2025 19:49:53 -0700 Subject: [PATCH 1/2] Add POC scraper/cleaner for Angular v17+ docs Add version numbers; add redirect handler. Add Angular v20. Move HTML filters out of base Angular config. Add CLI section. --- lib/docs/filters/angular/clean_html_v18.rb | 54 ++++++++++ lib/docs/filters/angular/entries.rb | 2 + lib/docs/scrapers/angular.rb | 111 +++++++++++++++++---- 3 files changed, 148 insertions(+), 19 deletions(-) create mode 100644 lib/docs/filters/angular/clean_html_v18.rb diff --git a/lib/docs/filters/angular/clean_html_v18.rb b/lib/docs/filters/angular/clean_html_v18.rb new file mode 100644 index 0000000000..5d8f5c6105 --- /dev/null +++ b/lib/docs/filters/angular/clean_html_v18.rb @@ -0,0 +1,54 @@ +module Docs + class Angular + class CleanHtmlV18Filter < Filter + def call + @doc = at_css('.docs-viewer') if at_css('.docs-viewer') + + # Extract

from decorative header. + @doc.prepend_child(at_css('h1')) + css('h1[class]').remove_attr('class') + + css( + '.docs-breadcrumb', + '.docs-github-links', + 'docs-table-of-contents', + '.docs-reference-category', + '.docs-reference-title', + '#jump-msg' + ).remove + + # Strip anchor links from headers. + css('h2', 'h3', 'h4').each do |node| + node.content = node.inner_text + end + + # Make every block a
.
+        css('.docs-code > pre > code').each do |code|
+          code.name = 'pre'
+          code['data-language'] = 'ts'
+          code.content = code.css('.line').map(&:content).join("\n")
+          code.parent.parent.replace(code)
+        end
+
+        # Better format content in CLI reference.
+        css('.docs-ref-content').each do |ref|
+          option = ref.at_css('.docs-reference-option code')
+          option.name = 'h3'
+          option.parent.replace(option)
+        end
+
+        css('.docs-reference-type-and-default', '.docs-reference-option-aliases').each do |node|
+          labels = node.css('span')
+          values = node.css('code')
+          labels.each do |l|
+            l.name = 'h4'
+          end
+        end
+
+        css('footer').remove
+
+        doc
+      end
+    end
+  end
+end
diff --git a/lib/docs/filters/angular/entries.rb b/lib/docs/filters/angular/entries.rb
index ae03c5829a..103a933b87 100644
--- a/lib/docs/filters/angular/entries.rb
+++ b/lib/docs/filters/angular/entries.rb
@@ -10,6 +10,8 @@ def get_name
       def get_type
         if slug.start_with?('guide')
           'Guide'
+        elsif slug.start_with?('cli')
+          'CLI'
         elsif slug.start_with?('tutorial')
           'Tutorial'
         elsif slug.start_with?('api/platform-browser-dynamic')
diff --git a/lib/docs/scrapers/angular.rb b/lib/docs/scrapers/angular.rb
index dcbdd0ea70..7b3ff81165 100644
--- a/lib/docs/scrapers/angular.rb
+++ b/lib/docs/scrapers/angular.rb
@@ -4,32 +4,30 @@ module Docs
   class Angular < UrlScraper
     self.type = 'angular'
     self.links = {
-      home: 'https://angular.io/',
+      home: 'https://angular.dev/',
       code: 'https://github.com/angular/angular'
     }
     self.base_url = 'https://angular.io/'
     self.root_path = 'docs'
 
-    html_filters.push 'angular/clean_html', 'angular/entries'
-
     options[:max_image_size] = 256_000
 
     options[:attribution] = <<-HTML
-      © 2010–2023 Google, Inc.
- Licensed under the Creative Commons Attribution License 4.0. + Super-powered by Google ©2010–2025.
+ Code licensed under an MIT-style License. Documentation licensed under CC BY 4.0. HTML options[:follow_links] = false - options[:only_patterns] = [/\Aguide/, /\Atutorial/, /\Aapi/] + options[:only_patterns] = [/\Aguide/, /\Aapi/, /\Acli/] options[:fix_urls_before_parse] = ->(url) do url.sub! %r{\Aguide/}, '/guide/' - url.sub! %r{\Atutorial/}, '/tutorial/' url.sub! %r{\Aapi/}, '/api/' + url.sub! %r{\cli/}, '/cli/' url.sub! %r{\Agenerated/}, '/generated/' url end - module Common + module JsonNavigation private def initial_urls @@ -79,91 +77,166 @@ def url_for(path) path = path.gsub(/[A-Z_]/) {|s| s.downcase + '_'} super end - include Docs::Angular::Common + include Docs::Angular::JsonNavigation + end + + module Since18 + def self.handle_redirects(version) + lambda do |url| + url.sub! '/guide/templates/reference-variables', '/guide/templates/variables#template-reference-variables' + url.sub! '/guide/signals/inputs', '/guide/components/inputs' + url.sub! '/guide/defer', '/guide/templates/defer' + url.sub! '/guide/templates/class-binding', '/guide/templates/binding#css-class-and-style-property-bindings' + url.sub! %r{/guide/components$}, '/guide/components/anatomy-of-components' + url.sub! '/guide/templates/property-binding', '/guide/templates/binding#binding-dynamic-properties-and-attributes' + url.sub! %r{/guide/ngmodules$}, '/guide/ngmodules/overview' + url.sub! '/guide/components/importing', '/guide/components/anatomy-of-components#using-components' + + url.sub! '/guide/components/anatomy-of-components', '/guide/components' if version == '20' + + url + end + end + end + + version '20' do + self.release = '20.3.4' + self.base_url = 'https://angular.dev/' + self.root_path = 'overview' + + options[:follow_links] = true + options[:container] = '.docs-app-main-content' + options[:fix_urls] = Since18.handle_redirects(self.version) + + html_filters.push 'angular/entries', 'angular/clean_html_v18' + + include Docs::Angular::Since18 + end + + version '19' do + self.release = '19.2.15' + self.base_url = 'https://v19.angular.dev/' + self.root_path = 'overview' + + options[:follow_links] = true + options[:container] = '.docs-app-main-content' + options[:fix_urls] = Since18.handle_redirects(self.version) + + html_filters.push 'angular/entries', 'angular/clean_html_v18' + + include Docs::Angular::Since18 + end + + version '18' do + self.release = '18.2.14' + self.base_url = 'https://v18.angular.dev/' + self.root_path = 'overview' + + options[:follow_links] = true + options[:container] = '.docs-app-main-content' + options[:fix_urls] = Since18.handle_redirects(self.version) + + html_filters.push 'angular/entries', 'angular/clean_html_v18' + + include Docs::Angular::Since18 end - version do + version '17' do self.release = '17.0.8' - self.base_url = 'https://angular.io/' + self.base_url = 'https://v17.angular.io/' + html_filters.push 'angular/clean_html', 'angular/entries' include Docs::Angular::Since12 end version '16' do self.release = '16.2.12' self.base_url = 'https://v16.angular.io/' + html_filters.push 'angular/clean_html', 'angular/entries' include Docs::Angular::Since12 end version '15' do self.release = '15.2.9' self.base_url = 'https://v15.angular.io/' + html_filters.push 'angular/clean_html', 'angular/entries' include Docs::Angular::Since12 end version '14' do self.release = '14.2.12' self.base_url = 'https://v14.angular.io/' + html_filters.push 'angular/clean_html', 'angular/entries' include Docs::Angular::Since12 end version '13' do self.release = '13.3.8' self.base_url = 'https://v13.angular.io/' + html_filters.push 'angular/clean_html', 'angular/entries' include Docs::Angular::Since12 end version '12' do self.release = '12.2.13' self.base_url = 'https://v12.angular.io/' + html_filters.push 'angular/clean_html', 'angular/entries' include Docs::Angular::Since12 end version '11' do self.release = '11.2.14' self.base_url = 'https://v11.angular.io/' - include Docs::Angular::Common + html_filters.push 'angular/clean_html', 'angular/entries' + include Docs::Angular::JsonNavigation end version '10' do self.release = '10.2.3' self.base_url = 'https://v10.angular.io/' - include Docs::Angular::Common + html_filters.push 'angular/clean_html', 'angular/entries' + include Docs::Angular::JsonNavigation end version '9' do self.release = '9.1.12' self.base_url = 'https://v9.angular.io/' - include Docs::Angular::Common + html_filters.push 'angular/clean_html', 'angular/entries' + include Docs::Angular::JsonNavigation end version '8' do self.release = '8.2.14' self.base_url = 'https://v8.angular.io/' - include Docs::Angular::Common + html_filters.push 'angular/clean_html', 'angular/entries' + include Docs::Angular::JsonNavigation end version '7' do self.release = '7.2.15' self.base_url = 'https://v7.angular.io/' - include Docs::Angular::Common + html_filters.push 'angular/clean_html', 'angular/entries' + include Docs::Angular::JsonNavigation end version '6' do self.release = '6.1.10' self.base_url = 'https://v6.angular.io/' - include Docs::Angular::Common + html_filters.push 'angular/clean_html', 'angular/entries' + include Docs::Angular::JsonNavigation end version '5' do self.release = '5.2.11' self.base_url = 'https://v5.angular.io/' - include Docs::Angular::Common + html_filters.push 'angular/clean_html', 'angular/entries' + include Docs::Angular::JsonNavigation end version '4' do self.release = '4.4.6' self.base_url = 'https://v4.angular.io/' - include Docs::Angular::Common + html_filters.push 'angular/clean_html', 'angular/entries' + include Docs::Angular::JsonNavigation end version '2' do From ba0aa600b543e81a07dd9f41752a96ad42dbb610 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Fri, 10 Oct 2025 09:03:30 +0200 Subject: [PATCH 2/2] Update Angular documentation (20.3.4) --- assets/stylesheets/pages/_angular.scss | 36 ++++++++++++++++++++++++++ lib/docs/scrapers/angular.rb | 2 +- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/assets/stylesheets/pages/_angular.scss b/assets/stylesheets/pages/_angular.scss index 84fb16ea33..18b0d93c03 100644 --- a/assets/stylesheets/pages/_angular.scss +++ b/assets/stylesheets/pages/_angular.scss @@ -21,4 +21,40 @@ } td h3 { margin: 0 !important; } + + .docs-reference-member-card .docs-reference-card-item { + &:has(~ .docs-reference-card-item), + &:last-child:not(:first-of-type) { + margin: .25rem 0 1.5rem 1.5rem; + padding: .625rem 1rem; + @extend %box; + } + span { + display: inline-block; + } + .docs-param-group { + margin-block-start: 1rem; + &:not(:has(~ .docs-param-group)) { + margin-block: 1rem; + } + .docs-param-name { + @extend %code; + margin-inline-end: 0.25rem; + } + .docs-param-name:after { + content: ":"; + } + .docs-parameter-description p:first-child { + margin-block-start: 0; + } + } + .docs-param-keyword { + color: var(--focusText); + @extend %code; + margin-inline-end: 0.5rem; + } + .docs-return-type { + padding-block: 1rem; + } + } } diff --git a/lib/docs/scrapers/angular.rb b/lib/docs/scrapers/angular.rb index 7b3ff81165..4725c3fd00 100644 --- a/lib/docs/scrapers/angular.rb +++ b/lib/docs/scrapers/angular.rb @@ -99,7 +99,7 @@ def self.handle_redirects(version) end end - version '20' do + version do self.release = '20.3.4' self.base_url = 'https://angular.dev/' self.root_path = 'overview'