From 983a415bd55c614874935dc2baa4646096da78c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20R=C3=B6nnqvist?= Date: Wed, 3 Dec 2025 13:39:19 +0100 Subject: [PATCH 1/7] Add a new target for rendering content into static HTML rdar://163326857 --- Package.swift | 24 +- Sources/DocCHTML/CMakeLists.txt | 23 + Sources/DocCHTML/LinkProvider.swift | 115 +++ Sources/DocCHTML/MarkdownRenderer.swift | 628 ++++++++++++++++ Sources/DocCHTML/WordBreak.swift | 82 +++ Sources/DocCHTML/XMLNode+element.swift | 57 ++ .../DocCHTMLTests/MarkdownRendererTests.swift | 688 ++++++++++++++++++ Tests/DocCHTMLTests/WordBreakTests.swift | 86 +++ 8 files changed, 1702 insertions(+), 1 deletion(-) create mode 100644 Sources/DocCHTML/CMakeLists.txt create mode 100644 Sources/DocCHTML/LinkProvider.swift create mode 100644 Sources/DocCHTML/MarkdownRenderer.swift create mode 100644 Sources/DocCHTML/WordBreak.swift create mode 100644 Sources/DocCHTML/XMLNode+element.swift create mode 100644 Tests/DocCHTMLTests/MarkdownRendererTests.swift create mode 100644 Tests/DocCHTMLTests/WordBreakTests.swift diff --git a/Package.swift b/Package.swift index 3ceb324be8..f5162a432b 100644 --- a/Package.swift +++ b/Package.swift @@ -2,7 +2,7 @@ /* This source file is part of the Swift.org open source project - Copyright (c) 2021-2024 Apple Inc. and the Swift project authors + Copyright (c) 2021-2025 Apple Inc. and the Swift project authors Licensed under Apache License v2.0 with Runtime Library Exception See https://swift.org/LICENSE.txt for license information @@ -122,6 +122,7 @@ let package = Package( // This target shouldn't have any local dependencies so that all other targets can depend on it. // We can add dependencies on SymbolKit and Markdown here but they're not needed yet. ], + exclude: ["CMakeLists.txt"], swiftSettings: [.swiftLanguageMode(.v6)] ), @@ -134,6 +135,27 @@ let package = Package( swiftSettings: [.swiftLanguageMode(.v6)] ), + .target( + name: "DocCHTML", + dependencies: [ + .target(name: "DocCCommon"), + .product(name: "Markdown", package: "swift-markdown"), + .product(name: "SymbolKit", package: "swift-docc-symbolkit"), + ], + exclude: ["CMakeLists.txt"], + swiftSettings: [.swiftLanguageMode(.v6)] + ), + .testTarget( + name: "DocCHTMLTests", + dependencies: [ + .target(name: "DocCHTML"), + .target(name: "SwiftDocC"), + .product(name: "Markdown", package: "swift-markdown"), + .target(name: "SwiftDocCTestUtilities"), + ], + swiftSettings: [.swiftLanguageMode(.v6)] + ), + // Test app for SwiftDocCUtilities .executableTarget( name: "signal-test-app", diff --git a/Sources/DocCHTML/CMakeLists.txt b/Sources/DocCHTML/CMakeLists.txt new file mode 100644 index 0000000000..3af4f6d1ff --- /dev/null +++ b/Sources/DocCHTML/CMakeLists.txt @@ -0,0 +1,23 @@ +#[[ +This source file is part of the Swift open source project + +Copyright © 2014 - 2025 Apple Inc. and the Swift project authors +Licensed under Apache License v2.0 with Runtime Library Exception + +See https://swift.org/LICENSE.txt for license information +#]] + +add_library(DocCHTML STATIC + LinkProvider.swift + MarkdownRenderer.swift + WordBreak.swift + XMLNode+element.swift) +target_link_libraries(DocCHTML PRIVATE + DocCCommon) +target_link_libraries(DocCHTML PUBLIC + SwiftMarkdown::Markdown + DocC::SymbolKit) +# FIXME(compnerd) workaround leaking dependencies +target_link_libraries(DocCHTML PUBLIC + libcmark-gfm + libcmark-gfm-extensions) diff --git a/Sources/DocCHTML/LinkProvider.swift b/Sources/DocCHTML/LinkProvider.swift new file mode 100644 index 0000000000..b44dda554f --- /dev/null +++ b/Sources/DocCHTML/LinkProvider.swift @@ -0,0 +1,115 @@ +/* + This source file is part of the Swift.org open source project + + Copyright (c) 2025 Apple Inc. and the Swift project authors + Licensed under Apache License v2.0 with Runtime Library Exception + + See https://swift.org/LICENSE.txt for license information + See https://swift.org/CONTRIBUTORS.txt for Swift project authors +*/ + +package import Foundation +package import Markdown +package import DocCCommon + +/// A type that provides information about other pages, and on-page elements, that the rendered page references. +package protocol LinkProvider { + /// Provide information about another page or on-page element, or `nil` if the other page can't be found. + func element(for path: URL) -> LinkedElement? + + /// Provide the path for a symbol based on its unique identifier, or `nil` if the other symbol with that identifier can't be found. + func pathForSymbolID(_ usr: String) -> URL? + + /// Provide information about an asset, or `nil` if the asset can't be found. + func assetNamed(_ assetName: String) -> LinkedAsset? + + /// Fallback link text for a link string that the provider couldn't provide any information for. + func fallbackLinkText(linkString: String) -> String +} + +package struct LinkedElement { + /// The path within the output archive to the linked element. + package var path: URL + /// The names of the linked element, for display when the element is referenced in inline content. + /// + /// Articles, headings, tutorials, and similar pages have a ``Names/single/conceptual(_:)`` name. + /// Symbols can either have a ``Names/single/symbol(_:)`` name or have different names for each language representation (``Names/languageSpecificSymbol``). + package var names: Names + /// The subheadings of the linked element, for display when the element is referenced in either a Topics section, See Also section, or in a `@Links` directive. + /// + /// Articles, headings, tutorials, and similar pages have a ``Names/single/conceptual(_:)`` name. + /// Symbols can either have a ``Names/single/symbol(_:)`` name or have different names for each language representation (``Names/languageSpecificSymbol``). + package var subheadings: Subheadings + /// The abstract of the page—to be displayed in either a Topics section, See Also section, or in a `@Links` directive—or `nil` if the linked element doesn't have an abstract. + package var abstract: Paragraph? + + package init(path: URL, names: Names, subheadings: Subheadings, abstract: Paragraph?) { + self.path = path + self.names = names + self.subheadings = subheadings + self.abstract = abstract + } + + /// The single name or language-specific names to use when referring to a linked element in inline content. + package enum Names { + /// This element has the same name in all language representations + case single(Name) + /// This element is a symbol with different names in different languages. + /// + /// Because `@DisplayName` applies to all language representations, these language specific names are always the symbol's subheading declaration and should display in a monospaced font. + case languageSpecificSymbol([SourceLanguage: String]) + } + package enum Name { + /// The name refers to an article, heading, or custom `@DisplayName` and should display as regular text. + case conceptual(String) + /// The name refers to a symbol's subheading declaration and should display in a monospaced font. + case symbol(String) + } + + /// The single subheading or language-specific subheadings to use when referring to a linked element in either a Topics section, See Also section, or in a `@Links` directive. + package enum Subheadings { + /// This element has the same name in all language representations + case single(Subheading) + /// This element is a symbol with different names in different languages. + /// + /// Because `@DisplayName` applies to all language representations, these language specific names are always the symbol's subheading declaration and should display in a monospaced font. + case languageSpecificSymbol([SourceLanguage: [SymbolNameFragment]]) + } + package enum Subheading { + /// The name refers to an article, heading, or custom `@DisplayName` and should display as regular text. + case conceptual(String) + /// The name refers to a symbol's subheading declaration and should display in a monospaced font. + case symbol([SymbolNameFragment]) + } + + /// A fragment in a symbol's name + package struct SymbolNameFragment { + /// The textual spelling of this fragment + package var text: String + /// The kind of fragment + package var kind: Kind + + /// The display kind of a single symbol name fragment + package enum Kind: String { + case identifier, decorator + } + + package init(text: String, kind: Kind) { + self.text = text + self.kind = kind + } + } +} + +package struct LinkedAsset { + /// The path within the output archive to each image variant, by their light/dark style. + package var images: [ColorStyle: [Int /* display scale*/: URL]] + + package init(images: [ColorStyle : [Int : URL]]) { + self.images = images + } + + package enum ColorStyle: String { + case light, dark + } +} diff --git a/Sources/DocCHTML/MarkdownRenderer.swift b/Sources/DocCHTML/MarkdownRenderer.swift new file mode 100644 index 0000000000..3c89aa7529 --- /dev/null +++ b/Sources/DocCHTML/MarkdownRenderer.swift @@ -0,0 +1,628 @@ +/* + This source file is part of the Swift.org open source project + + Copyright (c) 2025 Apple Inc. and the Swift project authors + Licensed under Apache License v2.0 with Runtime Library Exception + + See https://swift.org/LICENSE.txt for license information + See https://swift.org/CONTRIBUTORS.txt for Swift project authors +*/ + +#if canImport(FoundationXML) +// TODO: Consider other HTML rendering options as a future improvement (rdar://165755530) +package import FoundationXML +package import FoundationEssentials +internal import struct Foundation.CharacterSet +#else +package import Foundation +#endif +package import Markdown + +/// The primary goal for the rendered HTML output. +package enum RenderGoal { + /// The rendered output should prioritize richness, optimizing for human consumption. + /// + /// The rendered output might include explicit work-breaks, syntax highlighted code, etc. + case richness + /// The minimalistic rendered output should prioritize conciseness, optimizing for consumption by machines such as SEO indexers or LLMs. + case conciseness +} + +/// An HTML renderer for DocC markdown content. +/// +/// Markdown elements that have different meaning depending on where they occur in the page structure (for example links in prose vs. links in topic sections) should be handled at a layer above this plain markdown renderer. +package struct MarkdownRenderer { + /// The path within the output archive to the page that this renderer renders. + let path: URL + /// The goal of the rendered HTML output. + let goal: RenderGoal + /// A type that provides information about other pages that the rendered page references. + let linkProvider: Provider + + package init(path: URL, goal: RenderGoal, linkProvider: Provider) { + self.path = path + self.goal = goal + self.linkProvider = linkProvider + } + + func visit(_ paragraph: Paragraph) -> XMLNode { + .element(named: "p", children: visit(paragraph.children)) + } + + func visit(_ blockQuote: BlockQuote) -> XMLNode { + let aside = Aside(blockQuote) + + var children: [XMLNode] = [ + .element(named: "p", children: [.text(aside.kind.displayName)], attributes: ["class": "label"]) + ] + for child in aside.content { + children.append(visit(child)) + } + + return .element( + named: "blockquote", + children: children, + attributes: ["class": "aside \(aside.kind.rawValue.lowercased())"] + ) + } + + package func visit(_ heading: Heading) -> XMLNode { + selfReferencingHeading(level: heading.level, content: visit(heading.children), plainTextTitle: heading.plainText) + } + + func selfReferencingHeading(level: Int, content: [XMLNode], plainTextTitle: @autoclosure () -> String) -> XMLElement { + switch goal { + case .conciseness: + return .element(named: "h\(level)", children: content) + + case .richness: + let id = urlReadableFragment(plainTextTitle().lowercased()) + return .element( + named: "h\(level)", + children: [ + // Wrap the heading content in an anchor ... + .element(named: "a", children: content, attributes: ["href": "#\(id)"]) + ], + // ... that refers to the heading itself + attributes: ["id": id] + ) + } + } + + func visit(_ emphasis: Emphasis) -> XMLNode { + .element(named: "i", children: visit(emphasis.children)) + } + + func visit(_ strong: Strong) -> XMLNode { + .element(named: "b", children: visit(strong.children)) + } + + func visit(_ strikethrough: Strikethrough) -> XMLNode { + .element(named: "s", children: visit(strikethrough.children)) + } + + func visit(_ inlineCode: InlineCode) -> XMLNode { + .element(named: "code", children: [.text(inlineCode.code)]) + } + + func visit(_ text: Text) -> XMLNode { + .text(text.string) + } + + func visit(_: LineBreak) -> XMLNode { + .element(named: "br") + } + + func visit(_: SoftBreak) -> XMLNode { + .text(" ") // A soft line break doesn't actually break the content + } + + func visit(_: ThematicBreak) -> XMLNode { + .element(named: "hr") + } + + private func _removeComments(from node: XMLNode) { + guard let element = node as? XMLElement, + let children = element.children + else { + return + } + + let withoutComments = children.filter { $0.kind != .comment } + element.setChildren(withoutComments) + + for child in withoutComments { + _removeComments(from: child) + } + } + + func visit(_ html: HTMLBlock) -> XMLNode { + do { + let parsed = try XMLElement(xmlString: html.rawHTML) + _removeComments(from: parsed) + return parsed + } catch { + return .text("") + } + } + + func visit(_ html: InlineHTML) -> XMLNode { + // Inline HTML is one tag at a time, meaning that the closing and opening tags are parsed separately + // Because of this, we can't parse it with `XMLElement` or `XMLParser`. + + // We assume that we want all tags except for comments + guard !html.rawHTML.hasPrefix("` that we'd want to exclude from the output. + // - An empty element like `
` or `
` that's complete on its own. + // - An element with children like `Something` that needs to be created out of multiple markup elements. + // + // FIXME: See if this can be extracted into 2 private functions to make the code easier to read. + // Because it may take multiple markdown elements to create an HTML element, we pop elements rather than iterating var elements = Array(container) outer: while !elements.isEmpty { let element = elements.removeFirst() guard let start = element as? InlineHTML else { + // If the markup _isn't_ inline HTML we can simply visit it to transform it. children.append(visit(element)) continue } - // Try to parse the smallest valid inline HTML + // Otherwise, we need to determine how long this markdown element it. var rawHTML = start.rawHTML guard !rawHTML.hasPrefix("