From 84dbf483e6619150dbeda50f3c652a317c084fcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20R=C3=B6nnqvist?= <ronnqvist@apple.com>
Date: Fri, 5 Dec 2025 11:15:34 +0100
Subject: [PATCH 1/3] Add test about parsing inline HTML except for comments

---
 Sources/DocCHTML/MarkdownRenderer.swift       | 12 ++++-
 .../DocCHTMLTests/MarkdownRendererTests.swift | 48 +++++++++++++++++++
 2 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/Sources/DocCHTML/MarkdownRenderer.swift b/Sources/DocCHTML/MarkdownRenderer.swift
index 8ee60d8db..9ea7f7c38 100644
--- a/Sources/DocCHTML/MarkdownRenderer.swift
+++ b/Sources/DocCHTML/MarkdownRenderer.swift
@@ -695,7 +695,10 @@ package struct MarkdownRenderer<Provider: LinkProvider> {
             }
             
             // Next, check if its empty element (for example `<br />` or `<hr />`) that's complete on its own.
-            if let parsed = try? XMLElement(xmlString: rawHTML) {
+            
+            // On non-Darwin platforms, `XMLElement(xmlString:)` sometimes crashes for certain invalid / incomplete XML string.
+            // To minimize the risk of this happening, don't try to parse the XML string as an empty HTML element unless it ends with "/>"
+            if rawHTML.hasSuffix("/>"), let parsed = try? XMLElement(xmlString: rawHTML) {
                 children.append(parsed)
                 continue
             }
@@ -703,6 +706,8 @@ package struct MarkdownRenderer<Provider: LinkProvider> {
             // This could be an HTML element with content or it could be invalid HTML.
             // Don't modify `elements` until we know that we've parsed a valid HTML element.
             var copy = elements
+            let tagName = rawHTML.dropFirst().prefix(while: \.isLetter)
+            let expectedClosingTag = "</\(tagName)>"
             
             // Gradually check a longer and longer series of markup elements to see if they form a valid HTML element.
             inner: while !copy.isEmpty, let next = copy.first as? any InlineMarkup {
@@ -714,7 +719,10 @@ package struct MarkdownRenderer<Provider: LinkProvider> {
                 }
                 
                 rawHTML += next.format()
-                if let parsed = try? XMLElement(xmlString: rawHTML) {
+                if let maybeClosingHTML = next as? InlineHTML,
+                   maybeClosingHTML.rawHTML == expectedClosingTag,
+                   let parsed = try? XMLElement(xmlString: rawHTML)
+                {
                     children.append(parsed) // Include the valid HTML element in the output.
                     elements = copy // Skip over all the elements that were used to create that HTML element.
                     continue outer
diff --git a/Tests/DocCHTMLTests/MarkdownRendererTests.swift b/Tests/DocCHTMLTests/MarkdownRendererTests.swift
index 0b0071617..2f8eeec8d 100644
--- a/Tests/DocCHTMLTests/MarkdownRendererTests.swift
+++ b/Tests/DocCHTMLTests/MarkdownRendererTests.swift
@@ -548,6 +548,54 @@ struct MarkdownRendererTests {
         )
     }
     
+    @Test
+    func testParsesAndPreservesHTMLExceptComments() {
+        assert(
+            rendering: "This is a <!-- inline comment --><strong>formatted</strong> paragraph.",
+            matches: "<p>This is a <strong>formatted</strong> paragraph.</p>"
+        )
+        
+        assert(
+            rendering: "This<br/> is a <em><!-- multi\n line\n comment-->formatted</em>paragraph.",
+            matches: "<p>This<br/> is a <em>formatted</em> paragraph.</p>"
+        )
+        
+        assert(
+            rendering: "This is a <span style=\"color: red\"><!-- before -->custom formatted<!-- after --></span> paragraph.",
+            matches: "<p>This is a <span style=\"color: red\">custom formatted</span> paragraph.</p>"
+        )
+        
+        // This markup doesn't properly close the `<strong>` tag (it uses an `</em>` tag.
+        // In this case we drop both tags but not their content in between. This matches what DocC does for inline HTML with regards to the Render JSON output.
+        assert(
+            rendering: "This is a <strong>custom formatted</em> paragraph.",
+            matches: "<p>This is a custom formatted paragraph.</p>"
+        )
+        
+        // Any content _within_ HTML tags in the markdown isn't parsed as markdown content.
+        assert(
+            rendering: "This is a <span>custom **not** formatted</span> paragraph.",
+            matches: "<p>This is a <span>custom **not** formatted</span> paragraph.</p>"
+        )
+        
+        assert(
+            rendering: """
+            <details>
+                <summary>Some summary<!-- comment in summary--></summary>
+                <!-- comment between elements -->
+                <p><!-- comment before -->Some longer<!-- comment between words --> description<!-- comment after --></p>
+            </details>
+            <!-- comment after block element -->
+            """,
+            matches: """
+            <details>
+                <summary>Some summary</summary>
+                <p>Some longer description</p>
+            </details>
+            """
+        )
+    }
+    
     private func assert(
         rendering markdownContent: String,
         elementToReturn: LinkedElement? = nil,

From e540ba39f94c4e7cca081b604c000fa26fd28ab4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20R=C3=B6nnqvist?= <ronnqvist@apple.com>
Date: Fri, 5 Dec 2025 11:50:39 +0100
Subject: [PATCH 2/3] Extract inner HTML parsing code into a private function

---
 Sources/DocCHTML/MarkdownRenderer.swift | 77 +++++++++++++------------
 1 file changed, 40 insertions(+), 37 deletions(-)

diff --git a/Sources/DocCHTML/MarkdownRenderer.swift b/Sources/DocCHTML/MarkdownRenderer.swift
index 9ea7f7c38..00d55f1ee 100644
--- a/Sources/DocCHTML/MarkdownRenderer.swift
+++ b/Sources/DocCHTML/MarkdownRenderer.swift
@@ -675,66 +675,69 @@ package struct MarkdownRenderer<Provider: LinkProvider> {
         // - An empty element like `<br />` or `<hr />` that's complete on its own.
         // - An element with children like `<span style="color: red;">Something</span>` that needs to be created out of multiple markup elements.
         //
-        // FIXME: See if this can be extracted into 2 private functions to make the code easier to read.
         // Because it may take multiple markdown elements to create an HTML element, we pop elements rather than iterating
-        var elements = Array(container)
-        outer: while !elements.isEmpty {
-            let element = elements.removeFirst()
-            
-            guard let start = element as? InlineHTML else {
+        var remainder = Array(container)[...]
+        while let element = remainder.popFirst() {
+            guard let openingHTML = element as? InlineHTML else {
                 // If the markup _isn't_ inline HTML we can simply visit it to transform it.
                 children.append(visit(element))
                 continue
             }
             
             // Otherwise, we need to determine how long this markdown element it.
-            var rawHTML = start.rawHTML
+            let rawHTML = openingHTML.rawHTML
+            // Simply skip any HTML/XML comments.
             guard !rawHTML.hasPrefix("<!--") else {
-                // If it's a basic link, simply skip it.
                 continue
             }
             
             // Next, check if its empty element (for example `<br />` or `<hr />`) that's complete on its own.
             
-            // On non-Darwin platforms, `XMLElement(xmlString:)` sometimes crashes for certain invalid / incomplete XML string.
+            // On non-Darwin platforms, `XMLElement(xmlString:)` sometimes crashes for certain invalid / incomplete XML strings.
             // To minimize the risk of this happening, don't try to parse the XML string as an empty HTML element unless it ends with "/>"
             if rawHTML.hasSuffix("/>"), let parsed = try? XMLElement(xmlString: rawHTML) {
                 children.append(parsed)
-                continue
             }
-            
-            // This could be an HTML element with content or it could be invalid HTML.
-            // Don't modify `elements` until we know that we've parsed a valid HTML element.
-            var copy = elements
-            let tagName = rawHTML.dropFirst().prefix(while: \.isLetter)
-            let expectedClosingTag = "</\(tagName)>"
-            
-            // Gradually check a longer and longer series of markup elements to see if they form a valid HTML element.
-            inner: while !copy.isEmpty, let next = copy.first as? any InlineMarkup {
-                _ = copy.removeFirst()
-                
-                // Skip any HTML/XML comments _inside_ this HTML tag
-                if let html = next as? InlineHTML, html.rawHTML.hasPrefix("<!--") {
-                    continue inner
-                }
-                
-                rawHTML += next.format()
-                if let maybeClosingHTML = next as? InlineHTML,
-                   maybeClosingHTML.rawHTML == expectedClosingTag,
-                   let parsed = try? XMLElement(xmlString: rawHTML)
-                {
-                    children.append(parsed) // Include the valid HTML element in the output.
-                    elements = copy // Skip over all the elements that were used to create that HTML element.
-                    continue outer
-                }
+            // Lastly, check if this is the start of an HTML element that needs to be constructed out of more than one markup element
+            else if let parsed = _findMultiMarkupHTMLElement(in: &remainder, openingRawHTML: rawHTML) {
+                children.append(parsed)
             }
-            // If we reached the end of the inline elements without parsing a valid HTML element, skip that first InlineHTML markup and continue from there.
-            continue
         }
         
         return children
     }
     
+    private func _findMultiMarkupHTMLElement(in remainder: inout ArraySlice<any Markup>, openingRawHTML: String) -> XMLNode? {
+        // Don't modify `remainder` until we know that we've parsed a valid HTML element.
+        var copy = remainder
+        
+        var rawHTML = openingRawHTML
+        let tagName = rawHTML.dropFirst(/* the opening "<" */).prefix(while: \.isLetter)
+        let expectedClosingTag = "</\(tagName)>"
+        
+        // Only iterate as long the markup is _inline_ markup.
+        while let next = copy.first as? any InlineMarkup {
+            _ = copy.removeFirst()
+            let html = next as? InlineHTML
+            
+            // Skip any HTML/XML comments _inside_ this HTML tag
+            if let html, html.rawHTML.hasPrefix("<!--") {
+                continue
+            }
+            
+            // If this wasn't a comment, accumulate more raw HTML to try and parse
+            rawHTML += next.format()
+            // On non-Darwin platforms, `XMLElement(xmlString:)` sometimes crashes for certain invalid / incomplete XML strings.
+            // To minimize the risk of this happening, don't try to parse the XML string as an empty HTML element unless it ends with "/>"
+            if html?.rawHTML == expectedClosingTag, let parsed = try? XMLElement(xmlString: rawHTML) {
+                remainder = copy // Skip over all the elements that were used to create that HTML element.
+                return parsed // Include the valid HTML element in the output.
+            }
+        }
+        // If we reached the end of the _inline_ markup without parsing a valid HTML element, skip just that opening markup without updating `remainder`
+        return nil
+    }
+    
     // MARK: Directives
     
     func visit(_: BlockDirective) -> XMLNode {

From 57dac285b25a0afe2f7e46b5996dba20c9597921 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20R=C3=B6nnqvist?= <ronnqvist@apple.com>
Date: Mon, 8 Dec 2025 10:53:17 +0100
Subject: [PATCH 3/3] Fix minor spelling in code comment

Co-authored-by: Pat Shaughnessy <pat_shaughnessy@apple.com>
---
 Sources/DocCHTML/MarkdownRenderer.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Sources/DocCHTML/MarkdownRenderer.swift b/Sources/DocCHTML/MarkdownRenderer.swift
index 00d55f1ee..c43a4914e 100644
--- a/Sources/DocCHTML/MarkdownRenderer.swift
+++ b/Sources/DocCHTML/MarkdownRenderer.swift
@@ -684,7 +684,7 @@ package struct MarkdownRenderer<Provider: LinkProvider> {
                 continue
             }
             
-            // Otherwise, we need to determine how long this markdown element it.
+            // Otherwise, we need to determine how long this markdown element is.
             let rawHTML = openingHTML.rawHTML
             // Simply skip any HTML/XML comments.
             guard !rawHTML.hasPrefix("<!--") else {