diff --git a/g2p/g2p/mandarin.py b/g2p/g2p/mandarin.py index dbc0e33..c9e044c 100755 --- a/g2p/g2p/mandarin.py +++ b/g2p/g2p/mandarin.py @@ -19,7 +19,12 @@ BLANK_LEVEL = 0 # conv = G2PWConverter(style='pinyin', enable_non_tradional_chinese=True) -resource_path = r"./g2p" + +current_dir = os.path.dirname(os.path.abspath(__file__)) + +parent_dir = os.path.dirname(current_dir) +resource_path = os.path.join(parent_dir) + poly_all_class_path = os.path.join( resource_path, "sources", "g2p_chinese_model", "polychar.txt" ) @@ -182,8 +187,13 @@ must_not_er_words = {"女儿", "老儿", "男儿", "少儿", "小儿"} word_pinyin_dict = {} + +chinese_lexicon = os.path.join( + resource_path, "sources", "chinese_lexicon.txt" +) + with open( - r"./g2p/sources/chinese_lexicon.txt", "r", encoding="utf-8" + chinese_lexicon, "r", encoding="utf-8" ) as fread: txt_list = fread.readlines() for txt in txt_list: @@ -192,8 +202,12 @@ fread.close() pinyin_2_bopomofo_dict = {} + +pinyin_2_bpmf = os.path.join( + resource_path, "sources", "pinyin_2_bpmf.txt" +) with open( - r"./g2p/sources/pinyin_2_bpmf.txt", "r", encoding="utf-8" + pinyin_2_bpmf, "r", encoding="utf-8" ) as fread: txt_list = fread.readlines() for txt in txt_list: @@ -211,8 +225,11 @@ } bopomofos2pinyin_dict = {} +bpmf_2_pinyin = os.path.join( + resource_path, "sources", "bpmf_2_pinyin.txt" +) with open( - r"./g2p/sources/bpmf_2_pinyin.txt", "r", encoding="utf-8" + bpmf_2_pinyin, "r", encoding="utf-8" ) as fread: txt_list = fread.readlines() for txt in txt_list: diff --git a/g2p/g2p_generation.py b/g2p/g2p_generation.py index 625c093..30f837e 100755 --- a/g2p/g2p_generation.py +++ b/g2p/g2p_generation.py @@ -112,9 +112,16 @@ def chn_eng_g2p(text: str): all_tokens = all_tokens[:-1] return all_phoneme, all_tokens +current_dir = os.path.dirname(os.path.abspath(__file__)) +vocab = os.path.join( + current_dir, "g2p", "vocab.json" +) -text_tokenizer = PhonemeBpeTokenizer() -with open("./g2p/g2p/vocab.json", "r", encoding='utf-8') as f: +text_tokenizer = PhonemeBpeTokenizer(vacab_path=vocab) + + + +with open(vocab, "r", encoding='utf-8') as f: json_data = f.read() data = json.loads(json_data) vocab = data["vocab"] diff --git a/g2p/utils/g2p.py b/g2p/utils/g2p.py index 9f23fe1..77e474a 100755 --- a/g2p/utils/g2p.py +++ b/g2p/utils/g2p.py @@ -53,8 +53,12 @@ "ko": phonemizer_ko, "de": phonemizer_de, } +current_dir = os.path.dirname(os.path.abspath(__file__)) +mls_en = os.path.join( + current_dir, "mls_en.json" +) -with open("./g2p/utils/mls_en.json", "r", encoding='utf-8') as f: +with open(mls_en, "r", encoding='utf-8') as f: json_data = f.read() token = json.loads(json_data) diff --git a/infer/infer_utils.py b/infer/infer_utils.py index 1db185c..7dee00f 100755 --- a/infer/infer_utils.py +++ b/infer/infer_utils.py @@ -26,7 +26,7 @@ from huggingface_hub import hf_hub_download from sys import path -path.append(os.getcwd()) +path.append(os.path.dirname(os.path.dirname(__file__))) from model import DiT, CFM @@ -215,7 +215,10 @@ def prepare_model(max_frames, device, repo_id="ASLP-lab/DiffRhythm-1_2"): dit_ckpt_path = hf_hub_download( repo_id=repo_id, filename="cfm_model.pt", cache_dir="./pretrained" ) - dit_config_path = "./config/diffrhythm-1b.json" + + BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + dit_config_path = os.path.join(BASE_DIR, "config", "diffrhythm-1b.json") + with open(dit_config_path) as f: model_config = json.load(f) dit_model_cls = DiT @@ -341,7 +344,11 @@ def parse_lyrics(lyrics: str): class CNENTokenizer: def __init__(self): - with open("./g2p/g2p/vocab.json", "r", encoding='utf-8') as file: + + BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + vocab_path = os.path.join(BASE_DIR, "g2p", "g2p", "vocab.json") + + with open(vocab_path, "r", encoding='utf-8') as file: self.phone2id: dict = json.load(file)["vocab"] self.id2phone = {v: k for (k, v) in self.phone2id.items()} from g2p.g2p_generation import chn_eng_g2p