From 3800135cad3ac75212abe84c3f4842a68f10e6f9 Mon Sep 17 00:00:00 2001 From: Alex-Wengg Date: Thu, 12 Mar 2026 15:48:05 -0400 Subject: [PATCH 1/3] refactor: move English TN to src/tts/en/ for consistency All languages now have their own subdirectories: - src/tts/en/ - English TN taggers - src/tts/fr/ - French TN taggers - src/tts/es/ - Spanish TN taggers - src/tts/de/ - German TN taggers - src/tts/zh/ - Chinese TN taggers - src/tts/hi/ - Hindi TN taggers - src/tts/ja/ - Japanese TN taggers Changes: - Moved all English TN files to src/tts/en/ - Created src/tts/en/mod.rs with number_to_words() and helper functions - Updated src/tts/mod.rs to only declare language modules - Updated src/lib.rs to use tts::en:: for English TN - Updated tests/extensive_tests.rs import All 741 tests passing. --- src/lib.rs | 44 +++---- src/tts/{ => en}/cardinal.rs | 0 src/tts/{ => en}/date.rs | 0 src/tts/{ => en}/decimal.rs | 0 src/tts/{ => en}/electronic.rs | 0 src/tts/{ => en}/measure.rs | 0 src/tts/en/mod.rs | 212 +++++++++++++++++++++++++++++++++ src/tts/{ => en}/money.rs | 0 src/tts/{ => en}/ordinal.rs | 0 src/tts/{ => en}/telephone.rs | 0 src/tts/{ => en}/time.rs | 0 src/tts/{ => en}/whitelist.rs | 0 src/tts/mod.rs | 210 +------------------------------- tests/extensive_tests.rs | 2 +- 14 files changed, 239 insertions(+), 229 deletions(-) rename src/tts/{ => en}/cardinal.rs (100%) rename src/tts/{ => en}/date.rs (100%) rename src/tts/{ => en}/decimal.rs (100%) rename src/tts/{ => en}/electronic.rs (100%) rename src/tts/{ => en}/measure.rs (100%) create mode 100644 src/tts/en/mod.rs rename src/tts/{ => en}/money.rs (100%) rename src/tts/{ => en}/ordinal.rs (100%) rename src/tts/{ => en}/telephone.rs (100%) rename src/tts/{ => en}/time.rs (100%) rename src/tts/{ => en}/whitelist.rs (100%) diff --git a/src/lib.rs b/src/lib.rs index a1c12b2..b3c6987 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -124,6 +124,7 @@ fn tn_normalize_for_lang(input: &str, lang: &str) -> String { let input = input.trim(); match lang { + "en" => tn_normalize(input), "fr" => tn_normalize_lang_fr(input), "es" => tn_normalize_lang_es(input), "de" => tn_normalize_lang_de(input), @@ -381,6 +382,9 @@ fn tn_parse_span_lang(span: &str, lang: &str) -> Option<(String, u8)> { } match lang { + "en" => { + try_lang_taggers!(tts::en); + } "fr" => { try_lang_taggers!(tts::fr); } @@ -569,34 +573,34 @@ pub fn normalize_sentence_with_max_span(input: &str, max_span_tokens: usize) -> pub fn tn_normalize(input: &str) -> String { let input = input.trim(); - if let Some(result) = tts::whitelist::parse(input) { + if let Some(result) = tts::en::whitelist::parse(input) { return result; } - if let Some(result) = tts::money::parse(input) { + if let Some(result) = tts::en::money::parse(input) { return result; } - if let Some(result) = tts::measure::parse(input) { + if let Some(result) = tts::en::measure::parse(input) { return result; } - if let Some(result) = tts::date::parse(input) { + if let Some(result) = tts::en::date::parse(input) { return result; } - if let Some(result) = tts::time::parse(input) { + if let Some(result) = tts::en::time::parse(input) { return result; } - if let Some(result) = tts::electronic::parse(input) { + if let Some(result) = tts::en::electronic::parse(input) { return result; } - if let Some(result) = tts::telephone::parse(input) { + if let Some(result) = tts::en::telephone::parse(input) { return result; } - if let Some(result) = tts::ordinal::parse(input) { + if let Some(result) = tts::en::ordinal::parse(input) { return result; } - if let Some(result) = tts::decimal::parse(input) { + if let Some(result) = tts::en::decimal::parse(input) { return result; } - if let Some(result) = tts::cardinal::parse(input) { + if let Some(result) = tts::en::cardinal::parse(input) { return result; } @@ -611,34 +615,34 @@ fn tn_parse_span(span: &str) -> Option<(String, u8)> { return None; } - if let Some(result) = tts::whitelist::parse(span) { + if let Some(result) = tts::en::whitelist::parse(span) { return Some((result, 100)); } - if let Some(result) = tts::money::parse(span) { + if let Some(result) = tts::en::money::parse(span) { return Some((result, 95)); } - if let Some(result) = tts::measure::parse(span) { + if let Some(result) = tts::en::measure::parse(span) { return Some((result, 90)); } - if let Some(result) = tts::date::parse(span) { + if let Some(result) = tts::en::date::parse(span) { return Some((result, 88)); } - if let Some(result) = tts::time::parse(span) { + if let Some(result) = tts::en::time::parse(span) { return Some((result, 85)); } - if let Some(result) = tts::electronic::parse(span) { + if let Some(result) = tts::en::electronic::parse(span) { return Some((result, 82)); } - if let Some(result) = tts::telephone::parse(span) { + if let Some(result) = tts::en::telephone::parse(span) { return Some((result, 78)); } - if let Some(result) = tts::ordinal::parse(span) { + if let Some(result) = tts::en::ordinal::parse(span) { return Some((result, 75)); } - if let Some(result) = tts::decimal::parse(span) { + if let Some(result) = tts::en::decimal::parse(span) { return Some((result, 73)); } - if let Some(result) = tts::cardinal::parse(span) { + if let Some(result) = tts::en::cardinal::parse(span) { return Some((result, 70)); } diff --git a/src/tts/cardinal.rs b/src/tts/en/cardinal.rs similarity index 100% rename from src/tts/cardinal.rs rename to src/tts/en/cardinal.rs diff --git a/src/tts/date.rs b/src/tts/en/date.rs similarity index 100% rename from src/tts/date.rs rename to src/tts/en/date.rs diff --git a/src/tts/decimal.rs b/src/tts/en/decimal.rs similarity index 100% rename from src/tts/decimal.rs rename to src/tts/en/decimal.rs diff --git a/src/tts/electronic.rs b/src/tts/en/electronic.rs similarity index 100% rename from src/tts/electronic.rs rename to src/tts/en/electronic.rs diff --git a/src/tts/measure.rs b/src/tts/en/measure.rs similarity index 100% rename from src/tts/measure.rs rename to src/tts/en/measure.rs diff --git a/src/tts/en/mod.rs b/src/tts/en/mod.rs new file mode 100644 index 0000000..e8258a9 --- /dev/null +++ b/src/tts/en/mod.rs @@ -0,0 +1,212 @@ +//! Text Normalization taggers for English. +//! +//! Converts written-form text to spoken English: +//! - "200" → "two hundred" +//! - "$5.50" → "five dollars and fifty cents" +//! - "January 5, 2025" → "january fifth twenty twenty five" + +pub mod cardinal; +pub mod date; +pub mod decimal; +pub mod electronic; +pub mod measure; +pub mod money; +pub mod ordinal; +pub mod telephone; +pub mod time; +pub mod whitelist; + +/// Ones words indexed by value (0..20). +const ONES: [&str; 20] = [ + "zero", + "one", + "two", + "three", + "four", + "five", + "six", + "seven", + "eight", + "nine", + "ten", + "eleven", + "twelve", + "thirteen", + "fourteen", + "fifteen", + "sixteen", + "seventeen", + "eighteen", + "nineteen", +]; + +/// Tens words indexed by tens digit (2..10 → index 0..8). +const TENS: [&str; 8] = [ + "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety", +]; + +/// Convert an integer to English words. +/// +/// Examples: +/// - `0` → `"zero"` +/// - `21` → `"twenty one"` +/// - `123` → `"one hundred twenty three"` +/// - `1000` → `"one thousand"` +/// - `-42` → `"minus forty two"` +pub fn number_to_words(n: i64) -> String { + if n == 0 { + return "zero".to_string(); + } + + if n < 0 { + // Use wrapping negation and cast to u64 to handle i64::MIN safely, + // since -i64::MIN overflows i64 but fits in u64. + let abs_val = (n as u64).wrapping_neg(); + let mut parts: Vec = Vec::new(); + let mut remaining = abs_val; + + let scales: &[(u64, &str)] = &[ + (1_000_000_000_000_000_000, "quintillion"), + (1_000_000_000_000_000, "quadrillion"), + (1_000_000_000_000, "trillion"), + (1_000_000_000, "billion"), + (1_000_000, "million"), + (1_000, "thousand"), + ]; + + for &(scale_value, scale_name) in scales { + if remaining >= scale_value { + let chunk = remaining / scale_value; + remaining %= scale_value; + parts.push(format!("{} {}", chunk_to_words(chunk as u32), scale_name)); + } + } + + if remaining > 0 { + parts.push(chunk_to_words(remaining as u32)); + } + + return format!("minus {}", parts.join(" ")); + } + + let mut parts: Vec = Vec::new(); + let mut remaining = n as u64; + + // Process scale groups from largest to smallest + let scales: &[(u64, &str)] = &[ + (1_000_000_000_000_000_000, "quintillion"), + (1_000_000_000_000_000, "quadrillion"), + (1_000_000_000_000, "trillion"), + (1_000_000_000, "billion"), + (1_000_000, "million"), + (1_000, "thousand"), + ]; + + for &(scale_value, scale_name) in scales { + if remaining >= scale_value { + let chunk = remaining / scale_value; + remaining %= scale_value; + parts.push(format!("{} {}", chunk_to_words(chunk as u32), scale_name)); + } + } + + // Remainder (0..999) + if remaining > 0 { + parts.push(chunk_to_words(remaining as u32)); + } + + parts.join(" ") +} + +/// Convert a number 1..999 to words. +fn chunk_to_words(n: u32) -> String { + debug_assert!(n > 0 && n < 1000); + let mut parts: Vec<&str> = Vec::new(); + + let hundreds = n / 100; + let rest = n % 100; + + if hundreds > 0 { + parts.push(ONES[hundreds as usize]); + parts.push("hundred"); + } + + if rest >= 20 { + let tens_idx = (rest / 10 - 2) as usize; + parts.push(TENS[tens_idx]); + let ones = rest % 10; + if ones > 0 { + parts.push(ONES[ones as usize]); + } + } else if rest > 0 { + parts.push(ONES[rest as usize]); + } + + parts.join(" ") +} + +/// Spell each digit of a string individually. +/// +/// "14" → "one four" +pub fn spell_digits(s: &str) -> String { + s.chars() + .filter_map(|c| c.to_digit(10).map(|d| ONES[d as usize])) + .collect::>() + .join(" ") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_number_to_words_basic() { + assert_eq!(number_to_words(0), "zero"); + assert_eq!(number_to_words(1), "one"); + assert_eq!(number_to_words(10), "ten"); + assert_eq!(number_to_words(11), "eleven"); + assert_eq!(number_to_words(19), "nineteen"); + assert_eq!(number_to_words(20), "twenty"); + assert_eq!(number_to_words(21), "twenty one"); + assert_eq!(number_to_words(99), "ninety nine"); + } + + #[test] + fn test_number_to_words_hundreds() { + assert_eq!(number_to_words(100), "one hundred"); + assert_eq!(number_to_words(101), "one hundred one"); + assert_eq!(number_to_words(123), "one hundred twenty three"); + assert_eq!(number_to_words(999), "nine hundred ninety nine"); + } + + #[test] + fn test_number_to_words_thousands() { + assert_eq!(number_to_words(1000), "one thousand"); + assert_eq!(number_to_words(1001), "one thousand one"); + assert_eq!( + number_to_words(1234), + "one thousand two hundred thirty four" + ); + assert_eq!(number_to_words(10000), "ten thousand"); + assert_eq!(number_to_words(100000), "one hundred thousand"); + } + + #[test] + fn test_number_to_words_millions() { + assert_eq!(number_to_words(1000000), "one million"); + assert_eq!(number_to_words(2000003), "two million three"); + } + + #[test] + fn test_number_to_words_negative() { + assert_eq!(number_to_words(-42), "minus forty two"); + assert_eq!(number_to_words(-1000), "minus one thousand"); + } + + #[test] + fn test_spell_digits() { + assert_eq!(spell_digits("14"), "one four"); + assert_eq!(spell_digits("0"), "zero"); + assert_eq!(spell_digits("987"), "nine eight seven"); + } +} diff --git a/src/tts/money.rs b/src/tts/en/money.rs similarity index 100% rename from src/tts/money.rs rename to src/tts/en/money.rs diff --git a/src/tts/ordinal.rs b/src/tts/en/ordinal.rs similarity index 100% rename from src/tts/ordinal.rs rename to src/tts/en/ordinal.rs diff --git a/src/tts/telephone.rs b/src/tts/en/telephone.rs similarity index 100% rename from src/tts/telephone.rs rename to src/tts/en/telephone.rs diff --git a/src/tts/time.rs b/src/tts/en/time.rs similarity index 100% rename from src/tts/time.rs rename to src/tts/en/time.rs diff --git a/src/tts/whitelist.rs b/src/tts/en/whitelist.rs similarity index 100% rename from src/tts/whitelist.rs rename to src/tts/en/whitelist.rs diff --git a/src/tts/mod.rs b/src/tts/mod.rs index 41fea87..46c2848 100644 --- a/src/tts/mod.rs +++ b/src/tts/mod.rs @@ -7,217 +7,11 @@ //! //! Supports multiple languages via submodules. -// English (default) -pub mod cardinal; -pub mod date; -pub mod decimal; -pub mod electronic; -pub mod measure; -pub mod money; -pub mod ordinal; -pub mod telephone; -pub mod time; -pub mod whitelist; - -// Additional languages +// Languages pub mod de; +pub mod en; pub mod es; pub mod fr; pub mod hi; pub mod ja; pub mod zh; - -/// Ones words indexed by value (0..20). -const ONES: [&str; 20] = [ - "zero", - "one", - "two", - "three", - "four", - "five", - "six", - "seven", - "eight", - "nine", - "ten", - "eleven", - "twelve", - "thirteen", - "fourteen", - "fifteen", - "sixteen", - "seventeen", - "eighteen", - "nineteen", -]; - -/// Tens words indexed by tens digit (2..10 → index 0..8). -const TENS: [&str; 8] = [ - "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety", -]; - -/// Convert an integer to English words. -/// -/// Examples: -/// - `0` → `"zero"` -/// - `21` → `"twenty one"` -/// - `123` → `"one hundred twenty three"` -/// - `1000` → `"one thousand"` -/// - `-42` → `"minus forty two"` -pub fn number_to_words(n: i64) -> String { - if n == 0 { - return "zero".to_string(); - } - - if n < 0 { - // Use wrapping negation and cast to u64 to handle i64::MIN safely, - // since -i64::MIN overflows i64 but fits in u64. - let abs_val = (n as u64).wrapping_neg(); - let mut parts: Vec = Vec::new(); - let mut remaining = abs_val; - - let scales: &[(u64, &str)] = &[ - (1_000_000_000_000_000_000, "quintillion"), - (1_000_000_000_000_000, "quadrillion"), - (1_000_000_000_000, "trillion"), - (1_000_000_000, "billion"), - (1_000_000, "million"), - (1_000, "thousand"), - ]; - - for &(scale_value, scale_name) in scales { - if remaining >= scale_value { - let chunk = remaining / scale_value; - remaining %= scale_value; - parts.push(format!("{} {}", chunk_to_words(chunk as u32), scale_name)); - } - } - - if remaining > 0 { - parts.push(chunk_to_words(remaining as u32)); - } - - return format!("minus {}", parts.join(" ")); - } - - let mut parts: Vec = Vec::new(); - let mut remaining = n as u64; - - // Process scale groups from largest to smallest - let scales: &[(u64, &str)] = &[ - (1_000_000_000_000_000_000, "quintillion"), - (1_000_000_000_000_000, "quadrillion"), - (1_000_000_000_000, "trillion"), - (1_000_000_000, "billion"), - (1_000_000, "million"), - (1_000, "thousand"), - ]; - - for &(scale_value, scale_name) in scales { - if remaining >= scale_value { - let chunk = remaining / scale_value; - remaining %= scale_value; - parts.push(format!("{} {}", chunk_to_words(chunk as u32), scale_name)); - } - } - - // Remainder (0..999) - if remaining > 0 { - parts.push(chunk_to_words(remaining as u32)); - } - - parts.join(" ") -} - -/// Convert a number 1..999 to words. -fn chunk_to_words(n: u32) -> String { - debug_assert!(n > 0 && n < 1000); - let mut parts: Vec<&str> = Vec::new(); - - let hundreds = n / 100; - let rest = n % 100; - - if hundreds > 0 { - parts.push(ONES[hundreds as usize]); - parts.push("hundred"); - } - - if rest >= 20 { - let tens_idx = (rest / 10 - 2) as usize; - parts.push(TENS[tens_idx]); - let ones = rest % 10; - if ones > 0 { - parts.push(ONES[ones as usize]); - } - } else if rest > 0 { - parts.push(ONES[rest as usize]); - } - - parts.join(" ") -} - -/// Spell each digit of a string individually. -/// -/// "14" → "one four" -pub fn spell_digits(s: &str) -> String { - s.chars() - .filter_map(|c| c.to_digit(10).map(|d| ONES[d as usize])) - .collect::>() - .join(" ") -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_number_to_words_basic() { - assert_eq!(number_to_words(0), "zero"); - assert_eq!(number_to_words(1), "one"); - assert_eq!(number_to_words(10), "ten"); - assert_eq!(number_to_words(11), "eleven"); - assert_eq!(number_to_words(19), "nineteen"); - assert_eq!(number_to_words(20), "twenty"); - assert_eq!(number_to_words(21), "twenty one"); - assert_eq!(number_to_words(99), "ninety nine"); - } - - #[test] - fn test_number_to_words_hundreds() { - assert_eq!(number_to_words(100), "one hundred"); - assert_eq!(number_to_words(101), "one hundred one"); - assert_eq!(number_to_words(123), "one hundred twenty three"); - assert_eq!(number_to_words(999), "nine hundred ninety nine"); - } - - #[test] - fn test_number_to_words_thousands() { - assert_eq!(number_to_words(1000), "one thousand"); - assert_eq!(number_to_words(1001), "one thousand one"); - assert_eq!( - number_to_words(1234), - "one thousand two hundred thirty four" - ); - assert_eq!(number_to_words(10000), "ten thousand"); - assert_eq!(number_to_words(100000), "one hundred thousand"); - } - - #[test] - fn test_number_to_words_millions() { - assert_eq!(number_to_words(1000000), "one million"); - assert_eq!(number_to_words(2000003), "two million three"); - } - - #[test] - fn test_number_to_words_negative() { - assert_eq!(number_to_words(-42), "minus forty two"); - assert_eq!(number_to_words(-1000), "minus one thousand"); - } - - #[test] - fn test_spell_digits() { - assert_eq!(spell_digits("14"), "one four"); - assert_eq!(spell_digits("0"), "zero"); - assert_eq!(spell_digits("987"), "nine eight seven"); - } -} diff --git a/tests/extensive_tests.rs b/tests/extensive_tests.rs index e03c5a1..fb89620 100644 --- a/tests/extensive_tests.rs +++ b/tests/extensive_tests.rs @@ -1453,7 +1453,7 @@ fn test_number_to_words_i64_min() { // through the telephone tagger (the "-" is treated as a separator). // i64::MIN = -9223372036854775808: negating overflows i64 but our fix // uses wrapping_neg + u64 to handle it safely. - use text_processing_rs::tts::number_to_words; + use text_processing_rs::tts::en::number_to_words; let result = number_to_words(i64::MIN); assert!( From 68da111d060c0f56f0a860106075c8541ea6caa5 Mon Sep 17 00:00:00 2001 From: Alex-Wengg Date: Thu, 12 Mar 2026 15:51:17 -0400 Subject: [PATCH 2/3] fix: address PR review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes 3 issues identified in review: 1. German decimal/cardinal parser conflict - German decimal parser now only accepts comma (,) as decimal separator - Period (.) is exclusively a thousands separator in German - Prevents "2.025" from being incorrectly parsed as decimal - "2.025" → "zweitausendfuenfundzwanzig" (cardinal, correct) - "2,025" → "zwei komma null zwei fuenf" (decimal, correct) 2. Operator precedence bug in money parsers - Fixed confusing logic in DE, FR, ES money.rs parse_amount() - Old: `amount_str.contains(sep) && sep \!= '.' || amount_str.contains('.')` - New: `amount_str.contains(',') || amount_str.contains('.')` - Removed redundant actual_sep variable - Clearer and more maintainable code 3. English language handling - Already fixed in previous commit (refactor: move English TN to src/tts/en/) - tn_normalize_for_lang() explicitly handles "en" language code All 740 tests passing. --- src/tts/de/decimal.rs | 16 ++++------------ src/tts/de/money.rs | 10 ++++------ src/tts/es/money.rs | 10 ++++------ src/tts/fr/money.rs | 10 ++++------ 4 files changed, 16 insertions(+), 30 deletions(-) diff --git a/src/tts/de/decimal.rs b/src/tts/de/decimal.rs index 3d8d940..c947445 100644 --- a/src/tts/de/decimal.rs +++ b/src/tts/de/decimal.rs @@ -30,14 +30,11 @@ pub fn parse(input: &str) -> Option { // Check for quantity suffix: "1,5 milliarden" let (number_part, suffix) = extract_suffix(trimmed); - // German uses comma as decimal separator, but also accept period - let sep = if number_part.contains(',') && !number_part.contains('.') { - ',' - } else if number_part.contains('.') { - '.' - } else { + // German uses comma as decimal separator (period is thousands separator) + if !number_part.contains(',') { return None; - }; + } + let sep = ','; let parts: Vec<&str> = number_part.splitn(2, sep).collect(); if parts.len() != 2 { @@ -106,11 +103,6 @@ mod tests { assert_eq!(parse("0,5"), Some("null komma fuenf".to_string())); } - #[test] - fn test_period_decimal() { - assert_eq!(parse("3.14"), Some("drei komma eins vier".to_string())); - } - #[test] fn test_negative() { assert_eq!( diff --git a/src/tts/de/money.rs b/src/tts/de/money.rs index ba5d657..091736e 100644 --- a/src/tts/de/money.rs +++ b/src/tts/de/money.rs @@ -169,12 +169,10 @@ fn parse_amount(amount_str: &str, currency: &Currency) -> Option { return None; } - // Determine decimal separator: German uses comma - let sep = if amount_str.contains(',') { ',' } else { '.' }; - - if amount_str.contains(sep) && sep != '.' || amount_str.contains('.') { - let actual_sep = if amount_str.contains(',') { ',' } else { '.' }; - let parts: Vec<&str> = amount_str.splitn(2, actual_sep).collect(); + // Check for decimal separator (comma or period) + if amount_str.contains(',') || amount_str.contains('.') { + let sep = if amount_str.contains(',') { ',' } else { '.' }; + let parts: Vec<&str> = amount_str.splitn(2, sep).collect(); if parts.len() == 2 { let int_clean: String = parts[0].chars().filter(|c| c.is_ascii_digit()).collect(); let dollars: i64 = if int_clean.is_empty() { diff --git a/src/tts/es/money.rs b/src/tts/es/money.rs index 3d8605f..1e8bb4a 100644 --- a/src/tts/es/money.rs +++ b/src/tts/es/money.rs @@ -172,12 +172,10 @@ fn parse_amount(amount_str: &str, currency: &Currency) -> Option { return None; } - // Determine decimal separator: Spanish uses comma, but accept period too - let sep = if amount_str.contains(',') { ',' } else { '.' }; - - if amount_str.contains(sep) && sep != '.' || amount_str.contains('.') { - let actual_sep = if amount_str.contains(',') { ',' } else { '.' }; - let parts: Vec<&str> = amount_str.splitn(2, actual_sep).collect(); + // Check for decimal separator (comma or period) + if amount_str.contains(',') || amount_str.contains('.') { + let sep = if amount_str.contains(',') { ',' } else { '.' }; + let parts: Vec<&str> = amount_str.splitn(2, sep).collect(); if parts.len() == 2 { let int_clean: String = parts[0].chars().filter(|c| c.is_ascii_digit()).collect(); let dollars: i64 = if int_clean.is_empty() { diff --git a/src/tts/fr/money.rs b/src/tts/fr/money.rs index 13b2ec8..4007b03 100644 --- a/src/tts/fr/money.rs +++ b/src/tts/fr/money.rs @@ -173,12 +173,10 @@ fn parse_amount(amount_str: &str, currency: &Currency) -> Option { return None; } - // Determine decimal separator: French uses comma - let sep = if amount_str.contains(',') { ',' } else { '.' }; - - if amount_str.contains(sep) && sep != '.' || amount_str.contains('.') { - let actual_sep = if amount_str.contains(',') { ',' } else { '.' }; - let parts: Vec<&str> = amount_str.splitn(2, actual_sep).collect(); + // Check for decimal separator (comma or period) + if amount_str.contains(',') || amount_str.contains('.') { + let sep = if amount_str.contains(',') { ',' } else { '.' }; + let parts: Vec<&str> = amount_str.splitn(2, sep).collect(); if parts.len() == 2 { let int_clean: String = parts[0].chars().filter(|c| c.is_ascii_digit()).collect(); let dollars: i64 = if int_clean.is_empty() { From de3388a37f009019c13bfdf8ecbf0fd6c4aa4361 Mon Sep 17 00:00:00 2001 From: Alex-Wengg Date: Thu, 12 Mar 2026 15:57:08 -0400 Subject: [PATCH 3/3] docs: fix stale German decimal module doc comment Remove misleading example claiming "3.14" is supported as decimal input. German decimal parser only accepts comma (,) as decimal separator. Period (.) is exclusively for thousands separation in cardinal numbers. Updated doc to clarify: - Comma is the decimal separator - Period is the thousands separator (cardinal only) - Added negative number example instead --- src/tts/de/decimal.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tts/de/decimal.rs b/src/tts/de/decimal.rs index c947445..983e252 100644 --- a/src/tts/de/decimal.rs +++ b/src/tts/de/decimal.rs @@ -3,7 +3,10 @@ //! Converts written decimal numbers to spoken German: //! - "3,14" → "drei komma eins vier" //! - "0,5" → "null komma fuenf" -//! - "3.14" → "drei komma eins vier" +//! - "-3,14" → "minus drei komma eins vier" +//! +//! German uses comma (,) as the decimal separator. +//! Period (.) is used as thousands separator in cardinal numbers. use super::{number_to_words, spell_digits};