From bfeb41f513e840e2d5fb4c5336aee31705ef8646 Mon Sep 17 00:00:00 2001 From: BMagnu Date: Wed, 17 Apr 2024 15:01:40 +0200 Subject: [PATCH 1/4] Very basic ruby implementation --- crates/core/src/document/html/engine.rs | 65 +++++++++++++++++++++++++ crates/core/src/document/html/layout.rs | 3 ++ 2 files changed, 68 insertions(+) diff --git a/crates/core/src/document/html/engine.rs b/crates/core/src/document/html/engine.rs index 89895d3f..590bb8bc 100644 --- a/crates/core/src/document/html/engine.rs +++ b/crates/core/src/document/html/engine.rs @@ -701,6 +701,48 @@ impl Engine { inlines.push(InlineMaterial::LineBreak); return; }, + "ruby" => { + // Ruby needs to be applied to the first text element only, with the content from the succeeding rt tag. + // So grab all text until rt, and attach its content to the first text child. + let mut text_datas = Vec::new(); + for child in node.children() { + match child.data() { + NodeData::Element(ElementData { name, .. }) => { + if name == "rt" { + let mut ruby_content = "".to_owned(); + for subchild in child.children() { + match subchild.data() { + NodeData::Text(TextData { text, .. }) => { + ruby_content.push_str(&decode_entities(text)) + }, + _ => {}, + } + } + style.ruby = Some(ruby_content); + + for TextData {text, offset} in &text_datas { + inlines.push(InlineMaterial::Text(TextMaterial { + offset: *offset, + text: decode_entities(text).into_owned(), + style: style.clone(), + })); + style.ruby = None; + } + + text_datas.clear(); + } + }, + NodeData::Text(text_data) => { + text_datas.push(text_data.clone()); + }, + _ => {}, + } + } + return; + }, + "rt" => { + return; + }, _ => {}, } @@ -834,6 +876,7 @@ impl Engine { offset: local_offset, language: style.language.clone(), text: buf.to_string(), + ruby: if start_index == 0 { style.ruby.clone() } else { None }, plan, font_features: style.font_features.clone(), font_kind: style.font_kind, @@ -1267,6 +1310,27 @@ impl Engine { font_size: element.font_size, color: element.color, })); + if let Some(ruby) = &element.ruby { + let ruby_plan = { + let font = self.fonts.as_mut().unwrap() + .get_mut(element.font_kind, element.font_style, element.font_weight); + font.set_size(element.font_size / 2, self.dpi); + font.plan(ruby.to_string(), None, style.font_features.as_deref()) + }; + page.push(DrawCommand::ExtraText(TextCommand { + offset: element.offset + root_data.start_offset, + position: pt + pt!(0, -ascender), + rect, + text: ruby.to_string(), + plan: ruby_plan, + uri: element.uri.clone(), + font_kind: element.font_kind, + font_style: element.font_style, + font_weight: element.font_weight, + font_size: element.font_size / 2, + color: element.color, + })); + } }, ParagraphElement::Image(element) => { while let Some(offset) = markers.get(markers_index) { @@ -1490,6 +1554,7 @@ impl Engine { letter_spacing: element.letter_spacing, color: element.color, uri: element.uri.clone(), + ruby: element.ruby.clone(), }), } } diff --git a/crates/core/src/document/html/layout.rs b/crates/core/src/document/html/layout.rs index 0941cad3..e247beb3 100644 --- a/crates/core/src/document/html/layout.rs +++ b/crates/core/src/document/html/layout.rs @@ -69,6 +69,7 @@ pub struct StyleData { pub vertical_align: i32, pub list_style_type: Option, pub uri: Option, + pub ruby: Option, } #[derive(Debug, Copy, Clone)] @@ -193,6 +194,7 @@ impl Default for StyleData { vertical_align: 0, list_style_type: None, uri: None, + ruby: None, } } } @@ -326,6 +328,7 @@ pub struct TextElement { pub offset: usize, pub language: Option, pub text: String, + pub ruby: Option, pub plan: RenderPlan, pub font_features: Option>, pub font_kind: FontKind, From 1685aa58ae2de32bfe0c4a6f73ed6c863957efe6 Mon Sep 17 00:00:00 2001 From: BMagnu Date: Wed, 17 Apr 2024 16:04:29 +0200 Subject: [PATCH 2/4] Improve distribution and width of furigana --- crates/core/src/document/html/engine.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/crates/core/src/document/html/engine.rs b/crates/core/src/document/html/engine.rs index 590bb8bc..55b8e7f0 100644 --- a/crates/core/src/document/html/engine.rs +++ b/crates/core/src/document/html/engine.rs @@ -853,7 +853,9 @@ impl Engine { font.plan(" 0.", None, None) }; let mut start_index = 0; + let mut ruby_start_index = 0; for (end_index, _is_hardbreak) in LineBreakIterator::new(text) { + let text_len = text[start_index..end_index].chars().count(); for chunk in text[start_index..end_index].split_inclusive(char::is_whitespace) { if let Some((i, c)) = chunk.char_indices().next_back() { let j = i + if c.is_whitespace() { 0 } else { c.len_utf8() }; @@ -870,13 +872,16 @@ impl Engine { }; plan.space_out(style.letter_spacing); + let ruby_chars = style.ruby.clone().map(|r| r.chars().count()); + let ruby_chars_use = ruby_chars.map(|c| std::cmp::min(c.div_ceil(text_len), (c - ruby_start_index).div_ceil(text_len))); + items.push(ParagraphItem::Box { width: plan.width, data: ParagraphElement::Text(TextElement { offset: local_offset, language: style.language.clone(), text: buf.to_string(), - ruby: if start_index == 0 { style.ruby.clone() } else { None }, + ruby: style.ruby.clone().map(|r| r.chars().skip(ruby_start_index).take(ruby_chars_use.unwrap()).collect()), plan, font_features: style.font_features.clone(), font_kind: style.font_kind, @@ -889,6 +894,8 @@ impl Engine { uri: style.uri.clone(), }), }); + + ruby_start_index += ruby_chars_use.unwrap_or(0); } if c.is_whitespace() { if c == '\n' && parent_style.retain_whitespace { @@ -1279,7 +1286,7 @@ impl Engine { for i in last_index..index { match items[i] { - ParagraphItem::Box { ref data, width } => { + ParagraphItem::Box { ref data, mut width } => { match data { ParagraphElement::Text(element) => { let pt = pt!(position.x, position.y - element.vertical_align); @@ -1317,6 +1324,9 @@ impl Engine { font.set_size(element.font_size / 2, self.dpi); font.plan(ruby.to_string(), None, style.font_features.as_deref()) }; + if width < ruby_plan.width { + width = ruby_plan.width; + } page.push(DrawCommand::ExtraText(TextCommand { offset: element.offset + root_data.start_offset, position: pt + pt!(0, -ascender), From 7efd9398e88a6ab0bb38a27f66981d688326ec69 Mon Sep 17 00:00:00 2001 From: BMagnu Date: Mon, 22 Apr 2024 09:55:49 +0200 Subject: [PATCH 3/4] Simplify ruby charcount --- crates/core/src/document/html/engine.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/core/src/document/html/engine.rs b/crates/core/src/document/html/engine.rs index 55b8e7f0..b4d0b623 100644 --- a/crates/core/src/document/html/engine.rs +++ b/crates/core/src/document/html/engine.rs @@ -873,7 +873,7 @@ impl Engine { plan.space_out(style.letter_spacing); let ruby_chars = style.ruby.clone().map(|r| r.chars().count()); - let ruby_chars_use = ruby_chars.map(|c| std::cmp::min(c.div_ceil(text_len), (c - ruby_start_index).div_ceil(text_len))); + let ruby_chars_use = ruby_chars.map(|c| (c - ruby_start_index).div_ceil(text_len)); items.push(ParagraphItem::Box { width: plan.width, From 54b9d5467083affc85c96f8c2defc5988fff1098 Mon Sep 17 00:00:00 2001 From: BMagnu Date: Mon, 22 Apr 2024 10:49:07 +0200 Subject: [PATCH 4/4] Moved ruby rendering away from a per-character base Instead, always push inline-blocks with ruby as one paragraph box. --- crates/core/src/document/html/engine.rs | 305 +++++++++++++----------- 1 file changed, 168 insertions(+), 137 deletions(-) diff --git a/crates/core/src/document/html/engine.rs b/crates/core/src/document/html/engine.rs index b4d0b623..51451555 100644 --- a/crates/core/src/document/html/engine.rs +++ b/crates/core/src/document/html/engine.rs @@ -852,159 +852,190 @@ impl Engine { font.set_size(font_size, self.dpi); font.plan(" 0.", None, None) }; - let mut start_index = 0; - let mut ruby_start_index = 0; - for (end_index, _is_hardbreak) in LineBreakIterator::new(text) { - let text_len = text[start_index..end_index].chars().count(); - for chunk in text[start_index..end_index].split_inclusive(char::is_whitespace) { - if let Some((i, c)) = chunk.char_indices().next_back() { - let j = i + if c.is_whitespace() { 0 } else { c.len_utf8() }; - if j > 0 { - let buf = &text[start_index..start_index+j]; - let local_offset = offset + start_index; - let mut plan = { - let font = self.fonts.as_mut().unwrap() - .get_mut(style.font_kind, - style.font_style, - style.font_weight); - font.set_size(font_size, self.dpi); - font.plan(buf, None, style.font_features.as_deref()) - }; - plan.space_out(style.letter_spacing); - - let ruby_chars = style.ruby.clone().map(|r| r.chars().count()); - let ruby_chars_use = ruby_chars.map(|c| (c - ruby_start_index).div_ceil(text_len)); - - items.push(ParagraphItem::Box { - width: plan.width, - data: ParagraphElement::Text(TextElement { - offset: local_offset, - language: style.language.clone(), - text: buf.to_string(), - ruby: style.ruby.clone().map(|r| r.chars().skip(ruby_start_index).take(ruby_chars_use.unwrap()).collect()), - plan, - font_features: style.font_features.clone(), - font_kind: style.font_kind, - font_style: style.font_style, - font_weight: style.font_weight, - vertical_align: style.vertical_align, - letter_spacing: style.letter_spacing, - font_size, - color: style.color, - uri: style.uri.clone(), - }), - }); - - ruby_start_index += ruby_chars_use.unwrap_or(0); - } - if c.is_whitespace() { - if c == '\n' && parent_style.retain_whitespace { - let stretch = if parent_style.text_align == TextAlign::Center { big_stretch } else { line_width }; - items.push(ParagraphItem::Penalty { penalty: INFINITE_PENALTY, width: 0, flagged: false }); - items.push(ParagraphItem::Glue { width: 0, stretch, shrink: 0 }); - - items.push(ParagraphItem::Penalty { width: 0, penalty: -INFINITE_PENALTY, flagged: false }); - - if parent_style.text_align == TextAlign::Center { - items.push(ParagraphItem::Box { width: 0, data: ParagraphElement::Nothing }); - items.push(ParagraphItem::Penalty { width: 0, penalty: INFINITE_PENALTY, flagged: false }); - items.push(ParagraphItem::Glue { width: 0, stretch: big_stretch, shrink: 0 }); - } - start_index += chunk.len(); - continue; + if let Some(ruby_text) = style.ruby.clone() { + // When an inline-block has ruby annotated, it's no longer allowed to linebreak. + // In this case, the entire block has to be pushed as a ParagraphItem::Box + // with the ruby characters rendered on top of the entire box. + let mut plan = { + let font = self.fonts.as_mut().unwrap() + .get_mut(style.font_kind, + style.font_style, + style.font_weight); + font.set_size(font_size, self.dpi); + font.plan(text, None, style.font_features.as_deref()) + }; + plan.space_out(style.letter_spacing); + + items.push(ParagraphItem::Box { + width: plan.width, + data: ParagraphElement::Text(TextElement { + offset: *offset, + language: style.language.clone(), + text: text.to_string(), + ruby: Some(ruby_text), + plan, + font_features: style.font_features.clone(), + font_kind: style.font_kind, + font_style: style.font_style, + font_weight: style.font_weight, + vertical_align: style.vertical_align, + letter_spacing: style.letter_spacing, + font_size, + color: style.color, + uri: style.uri.clone(), + }), + }); + } + else { + let mut start_index = 0; + + for (end_index, _is_hardbreak) in LineBreakIterator::new(text) { + for chunk in text[start_index..end_index].split_inclusive(char::is_whitespace) { + if let Some((i, c)) = chunk.char_indices().next_back() { + let j = i + if c.is_whitespace() { 0 } else { c.len_utf8() }; + if j > 0 { + let buf = &text[start_index..start_index+j]; + let local_offset = offset + start_index; + let mut plan = { + let font = self.fonts.as_mut().unwrap() + .get_mut(style.font_kind, + style.font_style, + style.font_weight); + font.set_size(font_size, self.dpi); + font.plan(buf, None, style.font_features.as_deref()) + }; + plan.space_out(style.letter_spacing); + + items.push(ParagraphItem::Box { + width: plan.width, + data: ParagraphElement::Text(TextElement { + offset: local_offset, + language: style.language.clone(), + text: buf.to_string(), + ruby: None, + plan, + font_features: style.font_features.clone(), + font_kind: style.font_kind, + font_style: style.font_style, + font_weight: style.font_weight, + vertical_align: style.vertical_align, + letter_spacing: style.letter_spacing, + font_size, + color: style.color, + uri: style.uri.clone(), + }), + }); } + if c.is_whitespace() { + if c == '\n' && parent_style.retain_whitespace { + let stretch = if parent_style.text_align == TextAlign::Center { big_stretch } else { line_width }; - let last_c = text[..start_index+i].chars().next_back().or_else(|| { - if index > 0 { - inlines[index-1].text().and_then(|text| text.chars().next_back()) - } else { - None - } - }); + items.push(ParagraphItem::Penalty { penalty: INFINITE_PENALTY, width: 0, flagged: false }); + items.push(ParagraphItem::Glue { width: 0, stretch, shrink: 0 }); - let has_more = text[start_index+i..].chars().any(|c| !c.is_xml_whitespace()) || - inlines[index+1..].iter().any(|m| m.text().map_or(false, - |text| text.chars().any(|c| !c.is_xml_whitespace()))); + items.push(ParagraphItem::Penalty { width: 0, penalty: -INFINITE_PENALTY, flagged: false }); - if !parent_style.retain_whitespace && c.is_xml_whitespace() && - (last_c.map(|c| c.is_xml_whitespace()) != Some(false) || !has_more) { + if parent_style.text_align == TextAlign::Center { + items.push(ParagraphItem::Box { width: 0, data: ParagraphElement::Nothing }); + items.push(ParagraphItem::Penalty { width: 0, penalty: INFINITE_PENALTY, flagged: false }); + items.push(ParagraphItem::Glue { width: 0, stretch: big_stretch, shrink: 0 }); + } start_index += chunk.len(); continue; - } + } - let mut width = if !parent_style.retain_whitespace { - space_plan.glyph_advance(0) - } else if let Some(index) = FONT_SPACES.chars().position(|x| x == c) { - space_plan.glyph_advance(index) - } else if let Some(ratio) = WORD_SPACE_RATIOS.get(&c) { - (space_plan.glyph_advance(0) as f32 * ratio) as i32 - } else if let Some(ratio) = EM_SPACE_RATIOS.get(&c) { - pt_to_px(style.font_size * ratio, self.dpi).round() as i32 - } else { - space_plan.glyph_advance(0) - }; + let last_c = text[..start_index+i].chars().next_back().or_else(|| { + if index > 0 { + inlines[index-1].text().and_then(|text| text.chars().next_back()) + } else { + None + } + }); + + let has_more = text[start_index+i..].chars().any(|c| !c.is_xml_whitespace()) || + inlines[index+1..].iter().any(|m| m.text().map_or(false, + |text| text.chars().any(|c| !c.is_xml_whitespace()))); - width += match style.word_spacing { - WordSpacing::Normal => 0, - WordSpacing::Length(l) => l, - WordSpacing::Ratio(r) => (r * width as f32) as i32, - } + style.letter_spacing; + if !parent_style.retain_whitespace && c.is_xml_whitespace() && + (last_c.map(|c| c.is_xml_whitespace()) != Some(false) || !has_more) { + start_index += chunk.len(); + continue; + } - let is_unbreakable = c == '\u{00A0}' || c == '\u{202F}' || c == '\u{2007}'; + let mut width = if !parent_style.retain_whitespace { + space_plan.glyph_advance(0) + } else if let Some(index) = FONT_SPACES.chars().position(|x| x == c) { + space_plan.glyph_advance(index) + } else if let Some(ratio) = WORD_SPACE_RATIOS.get(&c) { + (space_plan.glyph_advance(0) as f32 * ratio) as i32 + } else if let Some(ratio) = EM_SPACE_RATIOS.get(&c) { + pt_to_px(style.font_size * ratio, self.dpi).round() as i32 + } else { + space_plan.glyph_advance(0) + }; - if (is_unbreakable || (parent_style.retain_whitespace && c.is_xml_whitespace())) && - (last_c == Some('\n') || last_c.is_none()) { - items.push(ParagraphItem::Box { width: 0, data: ParagraphElement::Nothing }); - } + width += match style.word_spacing { + WordSpacing::Normal => 0, + WordSpacing::Length(l) => l, + WordSpacing::Ratio(r) => (r * width as f32) as i32, + } + style.letter_spacing; - if is_unbreakable { - items.push(ParagraphItem::Penalty { width: 0, penalty: INFINITE_PENALTY, flagged: false }); - } + let is_unbreakable = c == '\u{00A0}' || c == '\u{202F}' || c == '\u{2007}'; - match parent_style.text_align { - TextAlign::Justify => { - items.push(ParagraphItem::Glue { width, stretch: width/2, shrink: width/3 }); - }, - TextAlign::Center => { - if style.font_kind == FontKind::Monospace || is_unbreakable { - items.push(ParagraphItem::Glue { width, stretch: 0, shrink: 0 }); - } else { - let stretch = 3 * width; - items.push(ParagraphItem::Glue { width: 0, stretch, shrink: 0 }); - items.push(ParagraphItem::Penalty { width: 0, penalty: 0, flagged: false }); - items.push(ParagraphItem::Glue { width, stretch: -2 * stretch, shrink: 0 }); - items.push(ParagraphItem::Box { width: 0, data: ParagraphElement::Nothing }); - items.push(ParagraphItem::Penalty { width: 0, penalty: INFINITE_PENALTY, flagged: false }); - items.push(ParagraphItem::Glue { width: 0, stretch, shrink: 0 }); - } - }, - TextAlign::Left | TextAlign::Right => { - if style.font_kind == FontKind::Monospace || is_unbreakable { - items.push(ParagraphItem::Glue { width, stretch: 0, shrink: 0 }); - } else { - let stretch = 3 * width; - items.push(ParagraphItem::Glue { width: 0, stretch, shrink: 0 }); - items.push(ParagraphItem::Penalty { width: 0, penalty: 0, flagged: false }); - items.push(ParagraphItem::Glue { width, stretch: -stretch, shrink: 0 }); - } - }, - } - } else if end_index < text.len() { - let penalty = if c == '-' { self.hyphen_penalty } else { 0 }; - let flagged = penalty > 0; - if matches!(parent_style.text_align, TextAlign::Justify | TextAlign::Center) { - items.push(ParagraphItem::Penalty { width: 0, penalty, flagged }); - } else { - let stretch = 3 * space_plan.glyph_advance(0); - items.push(ParagraphItem::Penalty { width: 0, penalty: INFINITE_PENALTY, flagged: false }); - items.push(ParagraphItem::Glue { width: 0, stretch, shrink: 0 }); - items.push(ParagraphItem::Penalty { width: 0, penalty: 10*penalty, flagged: true }); - items.push(ParagraphItem::Glue { width: 0, stretch: -stretch, shrink: 0 }); + if (is_unbreakable || (parent_style.retain_whitespace && c.is_xml_whitespace())) && + (last_c == Some('\n') || last_c.is_none()) { + items.push(ParagraphItem::Box { width: 0, data: ParagraphElement::Nothing }); + } + + if is_unbreakable { + items.push(ParagraphItem::Penalty { width: 0, penalty: INFINITE_PENALTY, flagged: false }); + } + + match parent_style.text_align { + TextAlign::Justify => { + items.push(ParagraphItem::Glue { width, stretch: width/2, shrink: width/3 }); + }, + TextAlign::Center => { + if style.font_kind == FontKind::Monospace || is_unbreakable { + items.push(ParagraphItem::Glue { width, stretch: 0, shrink: 0 }); + } else { + let stretch = 3 * width; + items.push(ParagraphItem::Glue { width: 0, stretch, shrink: 0 }); + items.push(ParagraphItem::Penalty { width: 0, penalty: 0, flagged: false }); + items.push(ParagraphItem::Glue { width, stretch: -2 * stretch, shrink: 0 }); + items.push(ParagraphItem::Box { width: 0, data: ParagraphElement::Nothing }); + items.push(ParagraphItem::Penalty { width: 0, penalty: INFINITE_PENALTY, flagged: false }); + items.push(ParagraphItem::Glue { width: 0, stretch, shrink: 0 }); + } + }, + TextAlign::Left | TextAlign::Right => { + if style.font_kind == FontKind::Monospace || is_unbreakable { + items.push(ParagraphItem::Glue { width, stretch: 0, shrink: 0 }); + } else { + let stretch = 3 * width; + items.push(ParagraphItem::Glue { width: 0, stretch, shrink: 0 }); + items.push(ParagraphItem::Penalty { width: 0, penalty: 0, flagged: false }); + items.push(ParagraphItem::Glue { width, stretch: -stretch, shrink: 0 }); + } + }, + } + } else if end_index < text.len() { + let penalty = if c == '-' { self.hyphen_penalty } else { 0 }; + let flagged = penalty > 0; + if matches!(parent_style.text_align, TextAlign::Justify | TextAlign::Center) { + items.push(ParagraphItem::Penalty { width: 0, penalty, flagged }); + } else { + let stretch = 3 * space_plan.glyph_advance(0); + items.push(ParagraphItem::Penalty { width: 0, penalty: INFINITE_PENALTY, flagged: false }); + items.push(ParagraphItem::Glue { width: 0, stretch, shrink: 0 }); + items.push(ParagraphItem::Penalty { width: 0, penalty: 10*penalty, flagged: true }); + items.push(ParagraphItem::Glue { width: 0, stretch: -stretch, shrink: 0 }); + } } } + start_index += chunk.len(); } - start_index += chunk.len(); } } },