diff --git a/source/lib/text_processing.py b/source/lib/text_processing.py index f262de5f..a079bb63 100644 --- a/source/lib/text_processing.py +++ b/source/lib/text_processing.py @@ -28,7 +28,6 @@ FASTBPE = LASER + '/tools-external/fastBPE/fast' MOSES_BDIR = LASER + '/tools-external/moses-tokenizer/tokenizer/' MOSES_TOKENIZER = MOSES_BDIR + 'tokenizer.perl -q -no-escape -threads 20 -l ' -MOSES_LC = MOSES_BDIR + 'lowercase.perl' NORM_PUNC = MOSES_BDIR + 'normalize-punctuation.perl -l ' DESCAPE = MOSES_BDIR + 'deescape-special-chars.perl' REM_NON_PRINT_CHAR = MOSES_BDIR + 'remove-non-printing-char.perl' @@ -110,7 +109,7 @@ def Token(inp_fname, out_fname, lang='en', # ############################################################################### -def BPEfastLoad(line, bpe_codes): +def BPEfastLoad(bpe_codes): bpe_vocab = bpe_codes.replace('fcodes', 'fvocab') return fastBPE.fastBPE(bpe_codes, bpe_vocab) diff --git a/source/mine_bitexts.py b/source/mine_bitexts.py index 18137bf5..3a58c37f 100644 --- a/source/mine_bitexts.py +++ b/source/mine_bitexts.py @@ -28,8 +28,7 @@ sys.path.append(LASER + '/source') sys.path.append(LASER + '/source/tools') -from embed import SentenceEncoder, EncodeLoad, EncodeFile, EmbedLoad -from text_processing import Token, BPEfastApply +from embed import EmbedLoad ###############################################################################