diff --git a/Package.swift b/Package.swift index 3ceb324be8..f5162a432b 100644 --- a/Package.swift +++ b/Package.swift @@ -2,7 +2,7 @@ /* This source file is part of the Swift.org open source project - Copyright (c) 2021-2024 Apple Inc. and the Swift project authors + Copyright (c) 2021-2025 Apple Inc. and the Swift project authors Licensed under Apache License v2.0 with Runtime Library Exception See https://swift.org/LICENSE.txt for license information @@ -122,6 +122,7 @@ let package = Package( // This target shouldn't have any local dependencies so that all other targets can depend on it. // We can add dependencies on SymbolKit and Markdown here but they're not needed yet. ], + exclude: ["CMakeLists.txt"], swiftSettings: [.swiftLanguageMode(.v6)] ), @@ -134,6 +135,27 @@ let package = Package( swiftSettings: [.swiftLanguageMode(.v6)] ), + .target( + name: "DocCHTML", + dependencies: [ + .target(name: "DocCCommon"), + .product(name: "Markdown", package: "swift-markdown"), + .product(name: "SymbolKit", package: "swift-docc-symbolkit"), + ], + exclude: ["CMakeLists.txt"], + swiftSettings: [.swiftLanguageMode(.v6)] + ), + .testTarget( + name: "DocCHTMLTests", + dependencies: [ + .target(name: "DocCHTML"), + .target(name: "SwiftDocC"), + .product(name: "Markdown", package: "swift-markdown"), + .target(name: "SwiftDocCTestUtilities"), + ], + swiftSettings: [.swiftLanguageMode(.v6)] + ), + // Test app for SwiftDocCUtilities .executableTarget( name: "signal-test-app", diff --git a/Sources/DocCHTML/CMakeLists.txt b/Sources/DocCHTML/CMakeLists.txt new file mode 100644 index 0000000000..3af4f6d1ff --- /dev/null +++ b/Sources/DocCHTML/CMakeLists.txt @@ -0,0 +1,23 @@ +#[[ +This source file is part of the Swift open source project + +Copyright © 2014 - 2025 Apple Inc. and the Swift project authors +Licensed under Apache License v2.0 with Runtime Library Exception + +See https://swift.org/LICENSE.txt for license information +#]] + +add_library(DocCHTML STATIC + LinkProvider.swift + MarkdownRenderer.swift + WordBreak.swift + XMLNode+element.swift) +target_link_libraries(DocCHTML PRIVATE + DocCCommon) +target_link_libraries(DocCHTML PUBLIC + SwiftMarkdown::Markdown + DocC::SymbolKit) +# FIXME(compnerd) workaround leaking dependencies +target_link_libraries(DocCHTML PUBLIC + libcmark-gfm + libcmark-gfm-extensions) diff --git a/Sources/DocCHTML/LinkProvider.swift b/Sources/DocCHTML/LinkProvider.swift new file mode 100644 index 0000000000..8125970a53 --- /dev/null +++ b/Sources/DocCHTML/LinkProvider.swift @@ -0,0 +1,116 @@ +/* + This source file is part of the Swift.org open source project + + Copyright (c) 2025 Apple Inc. and the Swift project authors + Licensed under Apache License v2.0 with Runtime Library Exception + + See https://swift.org/LICENSE.txt for license information + See https://swift.org/CONTRIBUTORS.txt for Swift project authors +*/ + +package import Foundation +package import Markdown +package import DocCCommon + +/// A type that provides information about other pages, and on-page elements, that the rendered page references. +package protocol LinkProvider { + /// Provide information about another page or on-page element, or `nil` if the other page can't be found. + func element(for path: URL) -> LinkedElement? + + /// Provide the path for a symbol based on its unique identifier, or `nil` if the other symbol with that identifier can't be found. + func pathForSymbolID(_ usr: String) -> URL? + + /// Provide information about an asset (for example an image or video), or `nil` if the asset can't be found. + func assetNamed(_ assetName: String) -> LinkedAsset? + + /// Fallback link text for a link string that the provider couldn't provide any information for. + func fallbackLinkText(linkString: String) -> String +} + +package struct LinkedElement { + /// The path within the output archive to the linked element. + package var path: URL + /// The names of the linked element, for display when the element is referenced in inline content. + /// + /// Articles, headings, tutorials, and similar pages have a ``Names/single/conceptual(_:)`` name. + /// Symbols can either have a ``Names/single/symbol(_:)`` name or have different names for each language representation (``Names/languageSpecificSymbol``). + package var names: Names + /// The subheadings of the linked element, for display when the element is referenced in either a Topics section, See Also section, or in a `@Links` directive. + /// + /// Articles, headings, tutorials, and similar pages have a ``Names/single/conceptual(_:)`` name. + /// Symbols can either have a ``Names/single/symbol(_:)`` name or have different names for each language representation (``Names/languageSpecificSymbol``). + package var subheadings: Subheadings + /// The abstract of the page—to be displayed in either a Topics section, See Also section, or in a `@Links` directive—or `nil` if the linked element doesn't have an abstract. + package var abstract: Paragraph? + + package init(path: URL, names: Names, subheadings: Subheadings, abstract: Paragraph?) { + self.path = path + self.names = names + self.subheadings = subheadings + self.abstract = abstract + } + + /// The single name or language-specific names to use when referring to a linked element in inline content. + package enum Names { + /// This element has the same name in all language representations + case single(Name) + /// This element is a symbol with different names in different languages. + /// + /// Because `@DisplayName` applies to all language representations, these language specific names are always the symbol's subheading declaration and should display in a monospaced font. + case languageSpecificSymbol([SourceLanguage: String]) + } + package enum Name { + /// The name refers to an article, heading, or custom `@DisplayName` and should display as regular text. + case conceptual(String) + /// The name refers to a symbol's subheading declaration and should display in a monospaced font. + case symbol(String) + } + + /// The single subheading or language-specific subheadings to use when referring to a linked element in either a Topics section, See Also section, or in a `@Links` directive. + package enum Subheadings { + /// This element has the same name in all language representations + case single(Subheading) + /// This element is a symbol with different names in different languages. + /// + /// Because `@DisplayName` applies to all language representations, these language specific names are always the symbol's subheading declaration and should display in a monospaced font. + case languageSpecificSymbol([SourceLanguage: [SymbolNameFragment]]) + } + package enum Subheading { + /// The name refers to an article, heading, or custom `@DisplayName` and should display as regular text. + case conceptual(String) + /// The name refers to a symbol's subheading declaration and should display in a monospaced font. + case symbol([SymbolNameFragment]) + } + + /// A fragment in a symbol's name + package struct SymbolNameFragment { + /// The textual spelling of this fragment + package var text: String + /// The kind of fragment + package var kind: Kind + + /// The display kind of a single symbol name fragment + package enum Kind: String { + case identifier, decorator + } + + package init(text: String, kind: Kind) { + self.text = text + self.kind = kind + } + } +} + +/// Information about a referenced image, video, or download asset that may be represented by more than one file for different color styles and display scales. +package struct LinkedAsset { + /// The path within the output archive to each file for this asset, grouped by their light/dark style and display scale. + package var files: [ColorStyle: [Int /* display scale*/: URL]] + + package init(files: [ColorStyle : [Int /* display scale*/: URL]]) { + self.files = files + } + + package enum ColorStyle: String { + case light, dark + } +} diff --git a/Sources/DocCHTML/MarkdownRenderer.swift b/Sources/DocCHTML/MarkdownRenderer.swift new file mode 100644 index 0000000000..294d3b6602 --- /dev/null +++ b/Sources/DocCHTML/MarkdownRenderer.swift @@ -0,0 +1,837 @@ +/* + This source file is part of the Swift.org open source project + + Copyright (c) 2025 Apple Inc. and the Swift project authors + Licensed under Apache License v2.0 with Runtime Library Exception + + See https://swift.org/LICENSE.txt for license information + See https://swift.org/CONTRIBUTORS.txt for Swift project authors +*/ + +#if canImport(FoundationXML) +// TODO: Consider other HTML rendering options as a future improvement (rdar://165755530) +package import FoundationXML +package import FoundationEssentials +internal import struct Foundation.CharacterSet +#else +package import Foundation +#endif +package import Markdown + +/// The primary goal for the rendered HTML output. +package enum RenderGoal { + /// The rendered output should prioritize richness, optimizing for human consumption. + /// + /// The rendered output might include explicit work-breaks, syntax highlighted code, etc. + case richness + /// The minimalistic rendered output should prioritize conciseness, optimizing for consumption by machines such as SEO indexers or LLMs. + case conciseness +} + +/// An HTML renderer for DocC markdown content. +/// +/// Markdown elements that have different meaning depending on where they occur in the page structure (for example links in prose vs. links in topic sections) should be handled at a layer above this plain markdown renderer. +package struct MarkdownRenderer { + /// The path within the output archive to the page that this renderer renders. + let path: URL + /// The goal of the rendered HTML output. + let goal: RenderGoal + /// A type that provides information about other pages that the rendered page references. + let linkProvider: Provider + + package init(path: URL, goal: RenderGoal, linkProvider: Provider) { + self.path = path + self.goal = goal + self.linkProvider = linkProvider + } + + /// Transforms a markdown paragraph into a `

` HTML element. + /// + /// As part of transforming the paragraph, the renderer also transforms all of the its content recursively. + /// For example, the renderer transforms this markdown + /// ```md + /// Some _formatted_ text + /// ``` + /// into XML nodes representing this HTML structure + /// ```html + ///

Some formatted text

+ /// ``` + func visit(_ paragraph: Paragraph) -> XMLNode { + .element(named: "p", children: visit(paragraph.children)) + } + + /// Transforms a markdown block quote into a `
` HTML element that represents an "aside". + /// + /// As part of transforming the paragraph, the renderer also transforms all of its content recursively. + /// For example, the renderer transforms this markdown + /// ```md + /// > Note: Something noteworthy + /// ``` + /// into XML nodes representing this HTML structure + /// ``` + ///
+ ///

Note

+ ///

Something noteworthy

+ ///
+ /// ``` + func visit(_ blockQuote: BlockQuote) -> XMLNode { + let aside = Aside(blockQuote) + + var children: [XMLNode] = [ + .element(named: "p", children: [.text(aside.kind.displayName)], attributes: ["class": "label"]) + ] + for child in aside.content { + children.append(visit(child)) + } + + return .element( + named: "blockquote", + children: children, + attributes: ["class": "aside \(aside.kind.rawValue.lowercased())"] + ) + } + + /// Transforms a markdown heading into a`` HTML element whose content is wrapped in an `` element that references the heading itself. + /// + /// As part of transforming the heading, the renderer also transforms all of the its content recursively. + /// For example, the renderer transforms this markdown + /// ```md + /// # Some _Formatted_ text + /// ``` + /// into XML nodes representing this HTML structure + /// ``` + ///

+ /// + /// Some formattedtext + /// + ///

+ /// ``` + /// + /// - Note: When the renderer has a ``RenderGoal/conciseness`` goal, it doesn't wrap the headings content in an anchor. + package func visit(_ heading: Heading) -> XMLNode { + selfReferencingHeading(level: heading.level, content: visit(heading.children), plainTextTitle: heading.plainText) + } + + func selfReferencingHeading(level: Int, content: [XMLNode], plainTextTitle: @autoclosure () -> String) -> XMLElement { + switch goal { + case .conciseness: + return .element(named: "h\(level)", children: content) + + case .richness: + let id = urlReadableFragment(plainTextTitle()) + return .element( + named: "h\(level)", + children: [ + // Wrap the heading content in an anchor ... + .element(named: "a", children: content, attributes: ["href": "#\(id)"]) + ], + // ... that refers to the heading itself + attributes: ["id": id] + ) + } + } + + /// Transforms a markdown emphasis into a`` HTML element. + func visit(_ emphasis: Emphasis) -> XMLNode { + .element(named: "i", children: visit(emphasis.children)) + } + + /// Transforms a markdown strong into a`` HTML element. + func visit(_ strong: Strong) -> XMLNode { + .element(named: "b", children: visit(strong.children)) + } + + /// Transforms a markdown strikethrough into a`` HTML element. + func visit(_ strikethrough: Strikethrough) -> XMLNode { + .element(named: "s", children: visit(strikethrough.children)) + } + + /// Transforms a markdown inline code into a`` HTML element. + func visit(_ inlineCode: InlineCode) -> XMLNode { + .element(named: "code", children: [.text(inlineCode.code)]) + } + + /// Transforms a markdown text into an HTML escaped text node. + func visit(_ text: Text) -> XMLNode { + .text(text.string) + } + + /// Transforms a markdown line break into an empty`
` HTML element. + func visit(_: LineBreak) -> XMLNode { + .element(named: "br") + } + + /// Transforms a markdown line break into a single space. + func visit(_: SoftBreak) -> XMLNode { + .text(" ") // A soft line break doesn't actually break the content + } + + /// Transforms a markdown line break into an empty`
` HTML element. + func visit(_: ThematicBreak) -> XMLNode { + .element(named: "hr") + } + + private func _removeComments(from node: XMLNode) { + guard let element = node as? XMLElement, + let children = element.children + else { + return + } + + let withoutComments = children.filter { $0.kind != .comment } + element.setChildren(withoutComments) + + for child in withoutComments { + _removeComments(from: child) + } + } + + /// Transforms a block of HTML in the source markdown into XML nodes representing the same structure with all the comments removed. + func visit(_ html: HTMLBlock) -> XMLNode { + do { + let parsed = try XMLElement(xmlString: html.rawHTML) + _removeComments(from: parsed) + return parsed + } catch { + return .text("") + } + } + + /// Transforms an inline HTML tag in the source markdown into XML nodes representing the same structure with all the comments removed. + func visit(_ html: InlineHTML) -> XMLNode { + // Inline HTML is one tag at a time, meaning that the closing and opening tags are parsed separately + // Because of this, we can't parse it with `XMLElement` or `XMLParser`. + + // We assume that we want all tags except for comments + guard !html.rawHTML.hasPrefix("` that we'd want to exclude from the output. + // - An empty element like `
` or `
` that's complete on its own. + // - An element with children like `Something` that needs to be created out of multiple markup elements. + // + // FIXME: See if this can be extracted into 2 private functions to make the code easier to read. + // Because it may take multiple markdown elements to create an HTML element, we pop elements rather than iterating + var elements = Array(container) + outer: while !elements.isEmpty { + let element = elements.removeFirst() + + guard let start = element as? InlineHTML else { + // If the markup _isn't_ inline HTML we can simply visit it to transform it. + children.append(visit(element)) + continue + } + + // Otherwise, we need to determine how long this markdown element it. + var rawHTML = start.rawHTML + guard !rawHTML.hasPrefix("