From e1c6bba16a166a773286f0a0123272ac5089726c Mon Sep 17 00:00:00 2001 From: scott Date: Sat, 28 Feb 2026 13:54:16 -0500 Subject: [PATCH] fix: unify qwen tts cache dir for tokenizer loading --- backend/backends/pytorch_backend.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/backend/backends/pytorch_backend.py b/backend/backends/pytorch_backend.py index d0cba11a..449fc846 100644 --- a/backend/backends/pytorch_backend.py +++ b/backend/backends/pytorch_backend.py @@ -178,6 +178,12 @@ def _load_model_sync(self, model_size: str): status="downloading", ) + # Use a single cache root for both HF Hub + Transformers to avoid split-cache + # cases where speech_tokenizer files are downloaded to one location while + # AutoFeatureExtractor resolves from another. + from huggingface_hub import constants as hf_constants + tts_cache_dir = hf_constants.HF_HUB_CACHE + # Load the model (tqdm is patched, but filters out non-download progress) try: # Don't pass device_map on CPU: accelerate's meta-tensor mechanism @@ -186,14 +192,16 @@ def _load_model_sync(self, model_size: str): if self.device == "cpu": self.model = Qwen3TTSModel.from_pretrained( model_path, - torch_dtype=torch.float32, + cache_dir=tts_cache_dir, + dtype=torch.float32, low_cpu_mem_usage=False, ) else: self.model = Qwen3TTSModel.from_pretrained( model_path, + cache_dir=tts_cache_dir, device_map=self.device, - torch_dtype=torch.bfloat16, + dtype=torch.bfloat16, ) finally: # Exit the patch context