Skip to content

Commit 0bde533

Browse files
authored
Merge pull request #16 from voidful/codex/update-condition-in-eval_metric.py-and-add-test
Fix WER/CER empty target handling
2 parents 46ae088 + 37235cd commit 0bde533

2 files changed

Lines changed: 20 additions & 2 deletions

File tree

tfkit/test/utility/test_utility_eval_metric.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,24 @@ def test_tokenize_text(self):
151151
eval = tfkit.utility.eval_metric.EvalMetric(tokenizer, normalize_text=True)
152152
self.assertEqual(eval.tokenize_text("How's this work"), "how ' s this work")
153153

154+
def test_empty_er(self):
155+
class DummyTokenizer:
156+
special_tokens_map = {'sep_token': '[SEP]'}
157+
158+
def encode(self, text, add_special_tokens=False):
159+
return text.split()
160+
161+
def decode(self, tokens, **kwargs):
162+
return ' '.join(tokens)
163+
164+
tokenizer = DummyTokenizer()
165+
eval = tfkit.utility.eval_metric.EvalMetric(tokenizer)
166+
eval.add_record("", "", "", task='default')
167+
results = list(eval.cal_score('er'))
168+
self.assertEqual(len(results), 1)
169+
self.assertEqual(results[0][1]['WER'], 0)
170+
self.assertEqual(results[0][1]['CER'], 0)
171+
154172
@pytest.mark.skip()
155173
def testNLGWithPAD(self):
156174
tokenizer = BertTokenizer.from_pretrained('voidful/albert_chinese_tiny')

tfkit/utility/eval_metric.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,8 @@ def cal_score(self, metric):
195195
targets.append(target)
196196
data_score.append([predict, target, {'wer': wer, 'cer': cer}])
197197

198-
wer = 100 * _wer(targets, predicts) if len(target) > 0 else 100
199-
cer = 100 * _cer(targets, predicts) if len(target) > 0 else 100
198+
wer = 100 * _wer(targets, predicts) if len(targets) > 0 else 100
199+
cer = 100 * _cer(targets, predicts) if len(targets) > 0 else 100
200200
result = {"WER": wer, "CER": cer}
201201
data_score = sorted(data_score, key=lambda i: i[2]['wer'], reverse=False)
202202
if "nlg" in metric:

0 commit comments

Comments
 (0)