pnkvalavala / digitaltwin

Using a single image and just 10 seconds of sample audio, our project enables you to create a video where it appears as if you're speaking the desired text.
26 stars 7 forks source link

RuntimeError TTS function call on jupyter Notebook #2

Open Yazdi9 opened 6 months ago

Yazdi9 commented 6 months ago

RuntimeError Traceback (most recent call last) Cell In[19], line 12 10 from time import time 11 import os ---> 12 tts = TextToSpeech()

File ~\tortoise-tts\tortoise\api.py:196, in TextToSpeech.init(self, autoregressive_batch_size, models_dir, enable_redaction) 191 else: 192 self.autoregressive = UnifiedVoice(max_mel_tokens=604, max_text_tokens=402, max_conditioning_inputs=2, layers=30, 193 model_dim=1024, 194 heads=16, number_text_tokens=255, start_text_token=255, checkpointing=False, 195 train_solo_embeddings=False).cpu().eval() --> 196 self.autoregressive.load_state_dict(torch.load(f'{models_dir}/autoregressive.pth')) 198 self.diffusion = DiffusionTts(model_channels=1024, num_layers=10, in_channels=100, out_channels=200, 199 in_latent_channels=1024, in_tokens=8193, dropout=0, use_fp16=False, num_heads=16, 200 layer_drop=0, unconditioned_percentage=0).cpu().eval() 201 self.diffusion.load_state_dict(torch.load(f'{models_dir}/diffusion_decoder.pth'))

File ~\anaconda3\Lib\site-packages\torch\nn\modules\module.py:2153, in Module.load_state_dict(self, state_dict, strict, assign) 2148 error_msgs.insert( 2149 0, 'Missing key(s) in state_dict: {}. '.format( 2150 ', '.join(f'"{k}"' for k in missing_keys))) 2152 if len(error_msgs) > 0: -> 2153 raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format( 2154 self.class.name, "\n\t".join(error_msgs))) 2155 return _IncompatibleKeys(missing_keys, unexpected_keys)

RuntimeError: Error(s) in loading state_dict for UnifiedVoice: Unexpected key(s) in state_dict: "gpt.h.0.attn.bias", "gpt.h.0.attn.masked_bias", "gpt.h.1.attn.bias", "gpt.h.1.attn.masked_bias", "gpt.h.2.attn.bias", "gpt.h.2.attn.masked_bias", "gpt.h.3.attn.bias", "gpt.h.3.attn.masked_bias", "gpt.h.4.attn.bias", "gpt.h.4.attn.masked_bias", "gpt.h.5.attn.bias", "gpt.h.5.attn.masked_bias", "gpt.h.6.attn.bias", "gpt.h.6.attn.masked_bias", "gpt.h.7.attn.bias", "gpt.h.7.attn.masked_bias", "gpt.h.8.attn.bias", "gpt.h.8.attn.masked_bias", "gpt.h.9.attn.bias", "gpt.h.9.attn.masked_bias", "gpt.h.10.attn.bias", "gpt.h.10.attn.masked_bias", "gpt.h.11.attn.bias", "gpt.h.11.attn.masked_bias", "gpt.h.12.attn.bias", "gpt.h.12.attn.masked_bias", "gpt.h.13.attn.bias", "gpt.h.13.attn.masked_bias", "gpt.h.14.attn.bias", "gpt.h.14.attn.masked_bias", "gpt.h.15.attn.bias", "gpt.h.15.attn.masked_bias", "gpt.h.16.attn.bias", "gpt.h.16.attn.masked_bias", "gpt.h.17.attn.bias", "gpt.h.17.attn.masked_bias", "gpt.h.18.attn.bias", "gpt.h.18.attn.masked_bias", "gpt.h.19.attn.bias", "gpt.h.19.attn.masked_bias", "gpt.h.20.attn.bias", "gpt.h.20.attn.masked_bias", "gpt.h.21.attn.bias", "gpt.h.21.attn.masked_bias", "gpt.h.22.attn.bias", "gpt.h.22.attn.masked_bias", "gpt.h.23.attn.bias", "gpt.h.23.attn.masked_bias", "gpt.h.24.attn.bias", "gpt.h.24.attn.masked_bias", "gpt.h.25.attn.bias", "gpt.h.25.attn.masked_bias", "gpt.h.26.attn.bias", "gpt.h.26.attn.masked_bias", "gpt.h.27.attn.bias", "gpt.h.27.attn.masked_bias", "gpt.h.28.attn.bias", "gpt.h.28.attn.masked_bias", "gpt.h.29.attn.bias", "gpt.h.29.attn.masked_bias".