Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
*.pyc
*.~lock*
4 changes: 2 additions & 2 deletions pybangla/eval_pybangla_3.0.x.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ def evaluate_normalization(data, output_path):
normalized_text = nrml.text_normalizer(input_text, all_operation=True)

# remove punctuation
# reviewed_text = re.sub(r"[।,!?]", "", str(reviewed_text))
# normalized_text = re.sub(r"[।,!?]", "", normalized_text)
reviewed_text = re.sub(r"[।,!?]", "", str(reviewed_text))
normalized_text = re.sub(r"[।,!?]", "", normalized_text)

print("\nInput Text: ", input_text)
print("Reviewed Text: ", reviewed_text)
Expand Down
19 changes: 8 additions & 11 deletions pybangla/module/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,10 @@ def number_to_words_converting_process(self, number_string: str, lang="bn"):
return

def number_to_words(self, number: str, chunk_millions=7, language="bn"):
english_status = self.contains_only_english(number)
if english_status:
language = "en"

en_extraction = list(re.finditer(self.en_regex, number, re.UNICODE))
# print("en_extraction : ", en_extraction[1])
# print("number_to_words : ", number, language)
Expand All @@ -391,7 +395,6 @@ def number_to_words(self, number: str, chunk_millions=7, language="bn"):
[self.number_to_words_converting_process(chunk, lang="bn") for chunk in chunks]
)
number = number.replace("শূন্য", "")

return (" ".join(number.split())).replace(" শো", "শো")

def digit_number_to_digit_word(self, number, language="bn"):
Expand Down Expand Up @@ -422,28 +425,22 @@ def year_in_number(self, year_in_number: str, language="bn"):
english_status = self.contains_only_english(year_in_number)
# print("english_status : ", english_status)
if english_status:
year_in_number = "".join(
[cfg._english2bangla2_digits_mapping[i] for i in year_in_number]
)
language = "en"

# print("year_in_number : ", year_in_number)
if language == "bn":
mid_text = "শো "
else:
mid_text = " century "
mid_text = " "

if (len(year_in_number) == 4 and year_in_number[1] != "০") or len(year_in_number) == 3:
# print("year in ")

if year_in_number[1] != "0":
year_str = self.number_to_words(year_in_number)
return (
self.number_to_words(year_in_number[:-2])
+ mid_text
+ self.number_to_words(year_in_number[-2:])
)

else:
# print("+++++++++ else+++++++++++++++")
return self.number_to_words(year_in_number)

def _replace_starting_zero(self, month):
Expand Down Expand Up @@ -1735,7 +1732,7 @@ def replace_date_processing(self, text):
word_date_string = date_string.replace(str(year), word_year)
word_date_string = word_date_string.replace(str(day), day_word)
original_text = original_text.replace(date, word_date_string)
# print(n_status, r_date)
if n_status == False:
continue

Expand Down