Skip to content

Commit d62b032

Browse files
committed
Enable break_on_newline extension by default for Markdown
Enable the existing break_on_newline extension in DEFAULT_EXTENSIONS so the Markdown parser converts soft line breaks to HardBreak objects. This produces visible <br> line breaks in HTML output, matching GFM rendering. The conversion happens in the Markdown parser's paragraph() method, which is the proper place for Markdown-specific behavior. The generic accept_paragraph in ToHtml is unchanged and has no Markdown-specific logic. Also remove the CJK-aware newline-to-space gsub from accept_paragraph that is no longer needed. The RDoc markup parser already handles newline joining at parse time in build_paragraph.
1 parent 393c0e8 commit d62b032

File tree

8 files changed

+356
-368
lines changed

8 files changed

+356
-368
lines changed

doc/markup_reference/markdown.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ This section compares RDoc's Markdown implementation with the
579579
| Fenced Code (tildes) |`~~~` || Conflicts with strikethrough syntax |
580580
| Info strings (language) | ✅ any | ⚠️ limited | `ruby`/`rb`, `c`, and `bash`/`sh`/`shell`/`console` highlighted; others accepted as CSS class |
581581
| Blockquotes ||| Full match, nested supported |
582-
| Lazy Continuation || ⚠️ | Continuation text is included in blockquote but line break is lost (becomes a space) |
582+
| Lazy Continuation || ⚠️ | Continuation text is included in blockquote; line break is preserved as `<br>` |
583583
| Bullet Lists ||| `*`, `+`, `-` supported |
584584
| Ordered Lists |`.` `)` | ⚠️ `.` only | RDoc doesn't support `)` delimiter; numbers are always renumbered from 1 |
585585
| Nested Lists ||| 4-space indentation |
@@ -620,6 +620,7 @@ RDoc uses a whitelist of block-level tags defined in
620620
| Link titles || ⚠️ | Parsed but not rendered |
621621
| Images ||| Full match |
622622
| Autolinks `<url>` ||| Full match |
623+
| Soft line breaks || ⚠️ | Newlines within paragraphs produce `<br>`; GFM spec renders as a space |
623624
| Hard line breaks || ⚠️ | 2+ trailing spaces only; backslash `\` at EOL not supported |
624625
| Backslash escapes || ⚠️ | Subset of GFM's escapable characters (e.g., `~` not escapable) |
625626
| HTML entities ||| Named, decimal, hex |

lib/rdoc/markdown.kpeg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@
202202
# Extensions enabled by default
203203

204204
DEFAULT_EXTENSIONS = [
205+
:break_on_newline,
205206
:definition_lists,
206207
:github,
207208
:html,

lib/rdoc/markdown.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,7 @@ def self.rule_info(name, rendered)
587587
# Extensions enabled by default
588588

589589
DEFAULT_EXTENSIONS = [
590+
:break_on_newline,
590591
:definition_lists,
591592
:github,
592593
:html,

lib/rdoc/markup/to_html.rb

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -313,9 +313,6 @@ def accept_block_quote(block_quote)
313313
def accept_paragraph(paragraph)
314314
@res << "\n<p>"
315315
text = paragraph.text @hard_break
316-
text = text.gsub(/(#{SPACE_SEPARATED_LETTER_CLASS})?\K\r?\n(?=(?(1)(#{SPACE_SEPARATED_LETTER_CLASS})?))/o) {
317-
defined?($2) && ' '
318-
}
319316
@res << to_html(text)
320317
@res << "</p>\n"
321318
end

test/rdoc/markup/to_html_test.rb

Lines changed: 4 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def accept_paragraph_br
236236
end
237237

238238
def accept_paragraph_break
239-
assert_equal "\n<p>hello<br> world</p>\n", @to.res.join
239+
assert_equal "\n<p>hello<br>\nworld</p>\n", @to.res.join
240240
end
241241

242242
def accept_paragraph_i
@@ -411,43 +411,13 @@ def test_accept_heading_dedup_resets_on_start_accepting
411411
end
412412

413413
def test_accept_paragraph_newline
414-
hellos = ["hello", "\u{393 3b5 3b9 3ac} \u{3c3 3bf 3c5}"]
415-
worlds = ["world", "\u{3ba 3cc 3c3 3bc 3bf 3c2}"]
416-
ohayo, sekai = %W"\u{304a 306f 3088 3046} \u{4e16 754c}"
417-
418-
hellos.product(worlds) do |hello, world|
419-
@to.start_accepting
420-
@to.accept_paragraph para("#{hello}\n", "#{world}\n")
421-
assert_equal "\n<p>#{hello} #{world}</p>\n", @to.res.join
422-
end
423-
424-
hellos.each do |hello|
425-
@to.start_accepting
426-
@to.accept_paragraph para("#{hello}\n", "#{sekai}\n")
427-
assert_equal "\n<p>#{hello}#{sekai}</p>\n", @to.res.join
428-
end
429-
430-
worlds.each do |world|
431-
@to.start_accepting
432-
@to.accept_paragraph para("#{ohayo}\n", "#{world}\n")
433-
assert_equal "\n<p>#{ohayo}#{world}</p>\n", @to.res.join
434-
end
435-
436414
@to.start_accepting
437-
@to.accept_paragraph para("#{ohayo}\n", "#{sekai}\n")
438-
assert_equal "\n<p>#{ohayo}#{sekai}</p>\n", @to.res.join
415+
@to.accept_paragraph para("hello\n", "world\n")
416+
assert_equal "\n<p>hello\nworld\n</p>\n", @to.res.join
439417

440418
@to.start_accepting
441419
@to.accept_paragraph para("+hello+\n", "world\n")
442-
assert_equal "\n<p><code>hello</code> world</p>\n", @to.res.join
443-
444-
@to.start_accepting
445-
@to.accept_paragraph para("hello\n", "+world+\n")
446-
assert_equal "\n<p>hello <code>world</code></p>\n", @to.res.join
447-
448-
@to.start_accepting
449-
@to.accept_paragraph para("+hello+\n", "+world+\n")
450-
assert_equal "\n<p><code>hello</code> <code>world</code></p>\n", @to.res.join
420+
assert_equal "\n<p><code>hello</code>\nworld\n</p>\n", @to.res.join
451421
end
452422

453423
def test_accept_heading_output_decoration

test/rdoc/parser/changelog_test.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,8 @@ def test_scan_git
421421
'Masataka Pocke Kuwabara', 'kuwabara@pocke.me', '2021-01-01 14:25:08 +0900',
422422
[head(4, 'Make args info for RubyVM::AST to available on endless method without parens'),
423423
head(5, 'Problem'),
424-
para("Arguments information is missing for endless method without parens.\n" +
425-
"For example:"),
424+
para("Arguments information is missing for endless method without parens.",
425+
hard_break, "For example:"),
426426
verb("# ok\n").tap {|v| v.format = :ruby},
427427
para('It causes an error if a program expects <code>args</code> node exists.'),
428428
head(5, 'Solution'),

test/rdoc/rdoc_markdown_test.rb

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def test_parse_block_quote
5656
expected =
5757
doc(
5858
block(
59-
para("this is\na block quote")))
59+
para("this is", hard_break, "a block quote")))
6060

6161
assert_equal expected, doc
6262
end
@@ -70,11 +70,22 @@ def test_parse_block_quote_continue
7070
expected =
7171
doc(
7272
block(
73-
para("this is\na block quote")))
73+
para("this is", hard_break, "a block quote")))
7474

7575
assert_equal expected, doc
7676
end
7777

78+
def test_parse_block_quote_continue_html
79+
doc = parse <<-BLOCK_QUOTE
80+
> this is
81+
a block quote
82+
BLOCK_QUOTE
83+
84+
html = doc.accept(RDoc::Markup::ToHtml.new)
85+
86+
assert_include html, "<p>this is<br>\na block quote</p>"
87+
end
88+
7889
def test_parse_block_quote_list
7990
doc = parse <<-BLOCK_QUOTE
8091
> text
@@ -104,7 +115,7 @@ def test_parse_block_quote_newline
104115
expected =
105116
doc(
106117
block(
107-
para("this is\na block quote")))
118+
para("this is", hard_break, "a block quote")))
108119

109120
assert_equal expected, doc
110121
end
@@ -120,7 +131,7 @@ def test_parse_block_quote_separate
120131
expected =
121132
doc(
122133
block(
123-
para("this is\na block quote"),
134+
para("this is", hard_break, "a block quote"),
124135
para("that continues")))
125136

126137
assert_equal expected, doc
@@ -262,7 +273,7 @@ def test_parse_code_github
262273
assert_equal expected, parse(doc)
263274

264275
expected =
265-
doc(para("Example:\n<code>\n""code goes here\n</code>"))
276+
doc(para("Example:", hard_break, "<code>\n""code goes here\n</code>"))
266277

267278
assert_equal expected, parse(doc.sub(/^\n/, ''))
268279
end
@@ -296,7 +307,7 @@ def test_parse_code_github_format
296307
assert_equal expected, parse(doc)
297308

298309
expected =
299-
doc(para("Example:\n<code>ruby\n""code goes here\n</code>"))
310+
doc(para("Example:", hard_break, "<code>ruby\n""code goes here\n</code>"))
300311

301312
assert_equal expected, parse(doc.sub(/^\n/, ''))
302313
end
@@ -343,7 +354,7 @@ def test_parse_definition_list_indents
343354
item(%w[one], para("Indented one characters")),
344355
item(%w[two], para("Indented two characters")),
345356
item(%w[three], para("Indented three characters"))),
346-
para("four\n : Indented four characters"))
357+
para("four", hard_break, " : Indented four characters"))
347358

348359
assert_equal expected, doc
349360
end
@@ -392,9 +403,9 @@ def test_parse_definition_list_multi_line
392403
expected = doc(
393404
list(:NOTE,
394405
item(%w[one],
395-
para("This is a definition\nthat extends to two lines")),
406+
para("This is a definition", hard_break, "that extends to two lines")),
396407
item(%w[two],
397-
para("This is another definition\nthat also extends to two lines"))))
408+
para("This is another definition", hard_break, "that also extends to two lines"))))
398409

399410
assert_equal expected, doc
400411
end
@@ -430,8 +441,8 @@ def test_parse_definition_list_no
430441
MD
431442

432443
expected = doc(
433-
para("one\n: This is a definition"),
434-
para("two\n: This is another definition"))
444+
para("one", hard_break, ": This is a definition"),
445+
para("two", hard_break, ": This is another definition"))
435446

436447
assert_equal expected, doc
437448
end
@@ -779,7 +790,7 @@ def test_parse_list_bullet_multiline
779790

780791
expected = doc(
781792
list(:BULLET,
782-
item(nil, para("one\n two"))))
793+
item(nil, para("one", hard_break, " two"))))
783794

784795
assert_equal expected, doc
785796
end
@@ -832,7 +843,7 @@ def test_parse_list_bullet_nest_continue
832843
para("outer"),
833844
list(:BULLET,
834845
item(nil,
835-
para("inner\n continue inner")))),
846+
para("inner", hard_break, " continue inner")))),
836847
item(nil,
837848
para("outer 2"))))
838849

@@ -899,7 +910,7 @@ def test_parse_note_indent
899910
expected = doc(
900911
para("Some text.{*1}[rdoc-label:foottext-1:footmark-1]"),
901912
rule(1),
902-
para("{^1}[rdoc-label:footmark-1:foottext-1] With a footnote\n\nmore"))
913+
para("{^1}[rdoc-label:footmark-1:foottext-1] With a footnote", hard_break, "more"))
903914

904915
assert_equal expected, doc
905916
end
@@ -940,8 +951,10 @@ def test_parse_note_multiple
940951
MD
941952

942953
expected = doc(
943-
para("Some text{*1}[rdoc-label:foottext-1:footmark-1]\n" +
944-
"with inline notes{*2}[rdoc-label:foottext-2:footmark-2]\n" +
954+
para("Some text{*1}[rdoc-label:foottext-1:footmark-1]",
955+
hard_break,
956+
"with inline notes{*2}[rdoc-label:foottext-2:footmark-2]",
957+
hard_break,
945958
"and an extra note.{*3}[rdoc-label:foottext-3:footmark-3]"),
946959

947960
rule(1),
@@ -1040,7 +1053,7 @@ def test_parse_paragraph_indent_three
10401053
def test_parse_paragraph_multiline
10411054
doc = parse "one\ntwo"
10421055

1043-
expected = doc(para("one\ntwo"))
1056+
expected = doc(para("one", hard_break, "two"))
10441057

10451058
assert_equal expected, doc
10461059
end
@@ -1444,7 +1457,7 @@ def test_info_string_css_classes
14441457
def test_lazy_continuation_in_blockquote
14451458
html = render("> Foo\nBar\n")
14461459
assert_match(%r{<blockquote>.*Foo.*Bar.*</blockquote>}m, html)
1447-
assert_match(%r{Foo Bar}, html)
1460+
assert_match(%r{Foo<br>\nBar}, html)
14481461
end
14491462

14501463
def test_ordered_list_paren_delimiter_not_supported
@@ -1483,8 +1496,12 @@ def test_autolinks
14831496
end
14841497

14851498
def test_backslash_line_break_not_supported
1499+
# Backslash line break syntax (\<newline>) is not specifically supported,
1500+
# but break_on_newline converts all newlines to <br>, so <br> is present.
1501+
# The backslash itself appears literally in the output.
14861502
html = render("Line one\\\nLine two\n")
1487-
assert_not_match(%r{<br>}, html)
1503+
assert_match(%r{<br>}, html)
1504+
assert_match(%r{Line one\\}, html)
14881505
end
14891506

14901507
def test_escape_tilde_not_supported

0 commit comments

Comments
 (0)