Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions lib/reverse_markdown/cleaner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,32 @@ def tidy(string)
result = remove_inner_whitespaces(string)
result = remove_newlines(result)
result = remove_leading_newlines(result)
result = merge_adjacent_emphasis(result)
result = clean_tag_borders(result)
clean_punctuation_characters(result)
end

def merge_adjacent_emphasis(string)
result = string

# Merge adjacent underscore emphasis: _X__Y_ → _XY_
# Apply repeatedly for multiple adjacent tags
loop do
new_result = result.gsub(/_([^_\n]+)__([^_\n]+)_/, '_\1\2_')
break if new_result == result
result = new_result
end

# Merge adjacent strong emphasis: **X****Y** → **XY**
loop do
new_result = result.gsub(/\*\*([^*\n]+)\*\*\*\*([^*\n]+)\*\*/, '**\1\2**')
break if new_result == result
result = new_result
end

result
end

def remove_newlines(string)
string.gsub(/\n{3,}/, "\n\n")
end
Expand Down
37 changes: 37 additions & 0 deletions spec/lib/reverse_markdown/cleaner_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,43 @@
end
end

describe '#merge_adjacent_emphasis' do
it 'merges two adjacent underscore emphasis tags' do
result = cleaner.merge_adjacent_emphasis('_a__b_')
expect(result).to eq '_ab_'
end

it 'merges three adjacent underscore emphasis tags' do
result = cleaner.merge_adjacent_emphasis('_a__b__c_')
expect(result).to eq '_abc_'
end

it 'merges two adjacent strong emphasis tags' do
result = cleaner.merge_adjacent_emphasis('**a****b**')
expect(result).to eq '**ab**'
end

it 'merges three adjacent strong emphasis tags' do
result = cleaner.merge_adjacent_emphasis('**a****b****c**')
expect(result).to eq '**abc**'
end

it 'does not merge emphasis tags separated by whitespace' do
result = cleaner.merge_adjacent_emphasis('_a_ _b_')
expect(result).to eq '_a_ _b_'
end

it 'does not merge strong tags separated by whitespace' do
result = cleaner.merge_adjacent_emphasis('**a** **b**')
expect(result).to eq '**a** **b**'
end

it 'handles mixed content correctly' do
result = cleaner.merge_adjacent_emphasis('text _a__b_ more **c****d** end')
expect(result).to eq 'text _ab_ more **cd** end'
end
end

describe '#clean_tag_borders' do
context 'with default_border is set to space' do
before { ReverseMarkdown.config.tag_border = ' ' }
Expand Down
14 changes: 14 additions & 0 deletions spec/lib/reverse_markdown/converters/em_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,18 @@
expect(result).to include('_hello_')
expect(result).to include('_world_')
end

it 'merges adjacent em tags into single emphasis' do
# Issue #99: Adjacent emphasis tags like <em>wo</em><em>rd</em>
# should produce _word_ not _wo__rd_
expect(ReverseMarkdown.convert('<em>wo</em><em>rd</em>')).to eq '_word_'
end

it 'merges multiple adjacent em tags' do
expect(ReverseMarkdown.convert('<em>a</em><em>b</em><em>c</em>')).to eq '_abc_'
end

it 'keeps separate emphasis when tags have whitespace between them' do
expect(ReverseMarkdown.convert('<em>a</em> <em>b</em>')).to eq '_a_ _b_'
end
end