From 9095dedb8fb44df393171c464a188d9c1fecd6f8 Mon Sep 17 00:00:00 2001
From: Johannes Opper <xijo@pm.me>
Date: Tue, 20 Jan 2026 09:06:09 +0100
Subject: [PATCH] Fix whitespace collapsing between inline elements

Preserve trailing newlines as spaces when there's following inline
content. This properly handles HTML whitespace collapsing for cases
like `<span>A\n</span><span>B</span>` which should render as "A B".

The fix recursively traverses parent nodes to detect following content
even when text is deeply nested inside inline elements.

Fixes #34

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 lib/reverse_markdown/converters/text.rb       | 43 +++++++++++++++++--
 spec/components/basic_spec.rb                 | 22 ++++++++++
 spec/components/from_the_wild_spec.rb         |  2 +-
 .../reverse_markdown/converters/text_spec.rb  |  2 +-
 4 files changed, 64 insertions(+), 5 deletions(-)
diff --git a/lib/reverse_markdown/converters/text.rb b/lib/reverse_markdown/converters/text.rb
index 27e40b1..fa03ee3 100644
--- a/lib/reverse_markdown/converters/text.rb
+++ b/lib/reverse_markdown/converters/text.rb
@@ -11,12 +11,20 @@ def convert(node, options = {})
 
       private
 
+      INLINE_ELEMENTS = [:a, :abbr, :b, :bdi, :bdo, :cite, :code, :data, :del,
+                          :dfn, :em, :i, :ins, :kbd, :mark, :q, :rp, :rt, :ruby,
+                          :s, :samp, :small, :span, :strong, :sub, :sup, :time,
+                          :u, :var, :wbr, :font, :tt].freeze
+
       def treat_empty(node)
         parent = node.parent.name.to_sym
         if [:ol, :ul].include?(parent)  # Otherwise the identation is broken
           ''
         elsif node.text == ' '          # Regular whitespace text node
           ' '
+        elsif INLINE_ELEMENTS.include?(parent) && node.text =~ /\n/
+          # Preserve newlines between inline elements as space (HTML whitespace collapsing)
+          ' '
         else
           ''
         end
@@ -25,7 +33,7 @@ def treat_empty(node)
       def treat_text(node)
         text = node.text
         text = preserve_nbsp(text)
-        text = remove_border_newlines(text)
+        text = remove_border_newlines(text, node)
         text = remove_inner_newlines(text)
         text = escape_keychars(text)
 
@@ -43,8 +51,37 @@ def preserve_tags(text)
         text.gsub(/[<>]/, '>' => '\>', '<' => '\<')
       end
 
-      def remove_border_newlines(text)
-        text.gsub(/\A\n+/, '').gsub(/\n+\z/, '')
+      def remove_border_newlines(text, node)
+        result = text.gsub(/\A\n+/, '')
+        # Only convert trailing newlines to space if there's following inline content
+        # This handles HTML whitespace collapsing between inline elements
+        if has_following_inline_content?(node)
+          result.gsub(/\n+\z/, ' ')
+        else
+          result.gsub(/\n+\z/, '')
+        end
+      end
+
+      def has_following_inline_content?(node)
+        # Check if node has a following sibling that is inline content
+        sibling = node.next_sibling
+        while sibling
+          if sibling.text?
+            return true unless sibling.text.strip.empty?
+          elsif INLINE_ELEMENTS.include?(sibling.name.to_sym)
+            return true
+          else
+            # Block element - no space needed before it
+            return false
+          end
+          sibling = sibling.next_sibling
+        end
+
+        # Recursively check if inline parent has following content
+        parent = node.parent
+        return false unless INLINE_ELEMENTS.include?(parent.name.to_sym)
+
+        has_following_inline_content?(parent)
       end
 
       def remove_inner_newlines(text)
diff --git a/spec/components/basic_spec.rb b/spec/components/basic_spec.rb
index 53cc5cf..6295f16 100644
--- a/spec/components/basic_spec.rb
+++ b/spec/components/basic_spec.rb
@@ -40,4 +40,26 @@
   it { is_expected.to match /before hr \n\* \* \*\n after hr/ }
 
   it { is_expected.to match /section 1\n ?\nsection 2/ }
+
+  describe 'whitespace handling between inline elements' do
+    it 'preserves whitespace (including newlines) between spans' do
+      input = "<span>Hello\n</span><span>World</span>"
+      result = ReverseMarkdown.convert(input)
+      expect(result).to eq "Hello World"
+    end
+
+    it 'preserves whitespace between inline elements in paragraphs' do
+      input = "<p><span>Hello\n</span><span>World</span></p>"
+      result = ReverseMarkdown.convert(input)
+      expect(result).to eq "Hello World\n\n"
+    end
+
+    it 'preserves whitespace between nested inline elements' do
+      # The text "A" is nested inside <span> inside <em>, but <em> has a following sibling
+      # This requires traversing up through parent nodes to find following content
+      input = "<p><em><span>A\n</span></em><span>B</span></p>"
+      result = ReverseMarkdown.convert(input)
+      expect(result).to eq "_A_ B\n\n"
+    end
+  end
 end
diff --git a/spec/components/from_the_wild_spec.rb b/spec/components/from_the_wild_spec.rb
index 821b21a..762a674 100644
--- a/spec/components/from_the_wild_spec.rb
+++ b/spec/components/from_the_wild_spec.rb
@@ -6,7 +6,7 @@
   subject { ReverseMarkdown.convert(input) }
 
   it "should make sense of strong-crazy markup (as seen in the wild)" do
-    expect(subject).to include "**.  \n \\*\\*\\* intentcast** : logo design   \n **.**\n\n"
+    expect(subject).to include "**.  \n  \\*\\*\\* intentcast** : logo design     \n    **.**\n\n"
   end
 
   it "should not over escape * or _" do
diff --git a/spec/lib/reverse_markdown/converters/text_spec.rb b/spec/lib/reverse_markdown/converters/text_spec.rb
index 8dd7125..1bc6159 100644
--- a/spec/lib/reverse_markdown/converters/text_spec.rb
+++ b/spec/lib/reverse_markdown/converters/text_spec.rb
@@ -22,7 +22,7 @@
     expect(result).to eq 'foo bar'
   end
 
-  it 'removes trailing newlines' do
+  it 'removes trailing newlines when no following content' do
     input = node_for("<p>foo bar\n\n</p>")
     result = converter.convert(input)
     expect(result).to eq 'foo bar'