Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 40 additions & 3 deletions lib/reverse_markdown/converters/text.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,20 @@ def convert(node, options = {})

private

INLINE_ELEMENTS = [:a, :abbr, :b, :bdi, :bdo, :cite, :code, :data, :del,
:dfn, :em, :i, :ins, :kbd, :mark, :q, :rp, :rt, :ruby,
:s, :samp, :small, :span, :strong, :sub, :sup, :time,
:u, :var, :wbr, :font, :tt].freeze

def treat_empty(node)
parent = node.parent.name.to_sym
if [:ol, :ul].include?(parent) # Otherwise the identation is broken
''
elsif node.text == ' ' # Regular whitespace text node
' '
elsif INLINE_ELEMENTS.include?(parent) && node.text =~ /\n/
# Preserve newlines between inline elements as space (HTML whitespace collapsing)
' '
else
''
end
Expand All @@ -25,7 +33,7 @@ def treat_empty(node)
def treat_text(node)
text = node.text
text = preserve_nbsp(text)
text = remove_border_newlines(text)
text = remove_border_newlines(text, node)
text = remove_inner_newlines(text)
text = escape_keychars(text)

Expand All @@ -43,8 +51,37 @@ def preserve_tags(text)
text.gsub(/[<>]/, '>' => '\>', '<' => '\<')
end

def remove_border_newlines(text)
text.gsub(/\A\n+/, '').gsub(/\n+\z/, '')
def remove_border_newlines(text, node)
result = text.gsub(/\A\n+/, '')
# Only convert trailing newlines to space if there's following inline content
# This handles HTML whitespace collapsing between inline elements
if has_following_inline_content?(node)
result.gsub(/\n+\z/, ' ')
else
result.gsub(/\n+\z/, '')
end
end

def has_following_inline_content?(node)
# Check if node has a following sibling that is inline content
sibling = node.next_sibling
while sibling
if sibling.text?
return true unless sibling.text.strip.empty?
elsif INLINE_ELEMENTS.include?(sibling.name.to_sym)
return true
else
# Block element - no space needed before it
return false
end
sibling = sibling.next_sibling
end

# Recursively check if inline parent has following content
parent = node.parent
return false unless INLINE_ELEMENTS.include?(parent.name.to_sym)

has_following_inline_content?(parent)
end

def remove_inner_newlines(text)
Expand Down
22 changes: 22 additions & 0 deletions spec/components/basic_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,26 @@
it { is_expected.to match /before hr \n\* \* \*\n after hr/ }

it { is_expected.to match /section 1\n ?\nsection 2/ }

describe 'whitespace handling between inline elements' do
it 'preserves whitespace (including newlines) between spans' do
input = "<span>Hello\n</span><span>World</span>"
result = ReverseMarkdown.convert(input)
expect(result).to eq "Hello World"
end

it 'preserves whitespace between inline elements in paragraphs' do
input = "<p><span>Hello\n</span><span>World</span></p>"
result = ReverseMarkdown.convert(input)
expect(result).to eq "Hello World\n\n"
end

it 'preserves whitespace between nested inline elements' do
# The text "A" is nested inside <span> inside <em>, but <em> has a following sibling
# This requires traversing up through parent nodes to find following content
input = "<p><em><span>A\n</span></em><span>B</span></p>"
result = ReverseMarkdown.convert(input)
expect(result).to eq "_A_ B\n\n"
end
end
end
2 changes: 1 addition & 1 deletion spec/components/from_the_wild_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
subject { ReverseMarkdown.convert(input) }

it "should make sense of strong-crazy markup (as seen in the wild)" do
expect(subject).to include "**. \n \\*\\*\\* intentcast** : logo design \n **.**\n\n"
expect(subject).to include "**. \n \\*\\*\\* intentcast** : logo design \n **.**\n\n"
end

it "should not over escape * or _" do
Expand Down
2 changes: 1 addition & 1 deletion spec/lib/reverse_markdown/converters/text_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
expect(result).to eq 'foo bar'
end

it 'removes trailing newlines' do
it 'removes trailing newlines when no following content' do
input = node_for("<p>foo bar\n\n</p>")
result = converter.convert(input)
expect(result).to eq 'foo bar'
Expand Down