From a806ef95157f184a5e62a96dcf9ec8b5fc3d129b Mon Sep 17 00:00:00 2001 From: tisbuet Date: Mon, 24 Nov 2025 14:41:50 +0000 Subject: [PATCH 1/2] english date fixed --- .gitignore | 1 + pybangla/module/parser.py | 19 ++++++++----------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 0d20b64..ef01234 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.pyc +*.~lock* \ No newline at end of file diff --git a/pybangla/module/parser.py b/pybangla/module/parser.py index 1e485ef..56eb364 100644 --- a/pybangla/module/parser.py +++ b/pybangla/module/parser.py @@ -367,6 +367,10 @@ def number_to_words_converting_process(self, number_string: str, lang="bn"): return def number_to_words(self, number: str, chunk_millions=7, language="bn"): + english_status = self.contains_only_english(number) + if english_status: + language = "en" + en_extraction = list(re.finditer(self.en_regex, number, re.UNICODE)) # print("en_extraction : ", en_extraction[1]) # print("number_to_words : ", number, language) @@ -391,7 +395,6 @@ def number_to_words(self, number: str, chunk_millions=7, language="bn"): [self.number_to_words_converting_process(chunk, lang="bn") for chunk in chunks] ) number = number.replace("শূন্য", "") - return (" ".join(number.split())).replace(" শো", "শো") def digit_number_to_digit_word(self, number, language="bn"): @@ -422,28 +425,22 @@ def year_in_number(self, year_in_number: str, language="bn"): english_status = self.contains_only_english(year_in_number) # print("english_status : ", english_status) if english_status: - year_in_number = "".join( - [cfg._english2bangla2_digits_mapping[i] for i in year_in_number] - ) + language = "en" # print("year_in_number : ", year_in_number) if language == "bn": mid_text = "শো " else: - mid_text = " century " + mid_text = " " if (len(year_in_number) == 4 and year_in_number[1] != "০") or len(year_in_number) == 3: - # print("year in ") - - if year_in_number[1] != "0": - year_str = self.number_to_words(year_in_number) return ( self.number_to_words(year_in_number[:-2]) + mid_text + self.number_to_words(year_in_number[-2:]) ) + else: - # print("+++++++++ else+++++++++++++++") return self.number_to_words(year_in_number) def _replace_starting_zero(self, month): @@ -1735,7 +1732,7 @@ def replace_date_processing(self, text): word_date_string = date_string.replace(str(year), word_year) word_date_string = word_date_string.replace(str(day), day_word) original_text = original_text.replace(date, word_date_string) - # print(n_status, r_date) + if n_status == False: continue From d4b5ebee010b8702198bfdf8cf1a23b0d1b319f3 Mon Sep 17 00:00:00 2001 From: tisbuet Date: Tue, 25 Nov 2025 05:12:31 +0000 Subject: [PATCH 2/2] eval script updated --- pybangla/eval_pybangla_3.0.x.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pybangla/eval_pybangla_3.0.x.py b/pybangla/eval_pybangla_3.0.x.py index 43c0027..97bdc30 100644 --- a/pybangla/eval_pybangla_3.0.x.py +++ b/pybangla/eval_pybangla_3.0.x.py @@ -69,8 +69,8 @@ def evaluate_normalization(data, output_path): normalized_text = nrml.text_normalizer(input_text, all_operation=True) # remove punctuation - # reviewed_text = re.sub(r"[।,!?]", "", str(reviewed_text)) - # normalized_text = re.sub(r"[।,!?]", "", normalized_text) + reviewed_text = re.sub(r"[।,!?]", "", str(reviewed_text)) + normalized_text = re.sub(r"[।,!?]", "", normalized_text) print("\nInput Text: ", input_text) print("Reviewed Text: ", reviewed_text)