myshell-ai / MeloTTS

High-quality multi-lingual text-to-speech library by MyShell.ai. Support English, Spanish, French, Chinese, Japanese and Korean.
MIT License
4.57k stars 575 forks source link

Mac not work [intel chip] #9

Open alants56 opened 7 months ago

alants56 commented 7 months ago

File ~/work/python/tts/MeloTTS/melo/api.py:81, in TTS.tts_to_file(self, text, speaker_id, output_path, sdp_ratio, noise_scale, noise_scale_w, speed) 79 t = re.sub(r'([a-z])([A-Z])', r'\1 \2', t) 80 device = self.device ---> 81 bert, ja_bert, phones, tones, lang_ids = utils.get_text_for_tts_infer(t, language, self.hps, device, self.symbol_to_id) 82 with torch.no_grad(): 83 x_tst = phones.to(device).unsqueeze(0)

File ~/work/python/tts/MeloTTS/melo/utils.py:38, in get_text_for_tts_infer(text, language_str, hps, device, symbol_to_id) 36 ja_bert = torch.zeros(768, len(phone)) 37 else: ---> 38 bert = get_bert(norm_text, word2ph, language_str, device) 39 del word2ph 40 assert bert.shape[-1] == len(phone), phone

File ~/work/python/tts/MeloTTS/melo/text/init.py:34, in get_bert(norm_text, word2ph, language, device) 30 from .korean import get_bert_feature as kr_bert 32 lang_bert_func_map = {"ZH": zh_bert, "EN": en_bert, "JP": jp_bert, 'ZH_MIX_EN': zh_mix_en_bert, 33 'FR': fr_bert, 'SP': sp_bert, 'ES': sp_bert, "KR": kr_bert} ---> 34 bert = lang_bert_func_map[language](norm_text, word2ph, device) 35 return bert

File ~/work/python/tts/MeloTTS/melo/text/chinese_mix.py:199, in get_bert_feature(text, word2ph, device) 197 def get_bert_feature(text, word2ph, device): 198 from . import chinese_bert --> 199 return chinese_bert.get_bert_feature(text, word2ph, model_id='bert-base-multilingual-uncased', device=device)

File ~/work/python/tts/MeloTTS/melo/text/chinese_bert.py:35, in get_bert_feature(text, word2ph, device, model_id) 33 for i in inputs: 34 inputs[i] = inputs[i].to(device) ---> 35 res = model(**inputs, output_hidden_states=True) 36 res = torch.cat(res["hidden_states"][-3:-2], -1)[0].cpu() 37 # import pdb; pdb.set_trace() 38 # assert len(word2ph) == len(text) + 2

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, *kwargs) 1190 # If we don't have any hooks, we want to skip the rest of the logic in 1191 # this function, and just call forward. 1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1193 or _global_forward_hooks or _global_forward_pre_hooks): -> 1194 return forward_call(input, **kwargs) 1195 # Do not call functions when jit is used 1196 full_backward_hooks, non_full_backward_hooks = [], []

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py:1358, in BertForMaskedLM.forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, labels, output_attentions, output_hidden_states, return_dict) 1349 r""" 1350 labels (torch.LongTensor of shape (batch_size, sequence_length), optional): 1351 Labels for computing the masked language modeling loss. Indices should be in [-100, 0, ..., 1352 config.vocab_size] (see input_ids docstring) Tokens with indices set to -100 are ignored (masked), the 1353 loss is only computed for the tokens with labels in [0, ..., config.vocab_size] 1354 """ 1356 return_dict = return_dict if return_dict is not None else self.config.use_return_dict -> 1358 outputs = self.bert( 1359 input_ids, 1360 attention_mask=attention_mask, 1361 token_type_ids=token_type_ids, 1362 position_ids=position_ids, 1363 head_mask=head_mask, 1364 inputs_embeds=inputs_embeds, 1365 encoder_hidden_states=encoder_hidden_states, 1366 encoder_attention_mask=encoder_attention_mask, 1367 output_attentions=output_attentions, 1368 output_hidden_states=output_hidden_states, 1369 return_dict=return_dict, 1370 ) 1372 sequence_output = outputs[0] 1373 prediction_scores = self.cls(sequence_output)

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, *kwargs) 1190 # If we don't have any hooks, we want to skip the rest of the logic in 1191 # this function, and just call forward. 1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1193 or _global_forward_hooks or _global_forward_pre_hooks): -> 1194 return forward_call(input, **kwargs) 1195 # Do not call functions when jit is used 1196 full_backward_hooks, non_full_backward_hooks = [], []

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py:1013, in BertModel.forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict) 1006 # Prepare head mask if needed 1007 # 1.0 in head_mask indicate we keep the head 1008 # attention_probs has shape bsz x n_heads x N x N 1009 # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] 1010 # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length] 1011 head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers) -> 1013 embedding_output = self.embeddings( 1014 input_ids=input_ids, 1015 position_ids=position_ids, 1016 token_type_ids=token_type_ids, 1017 inputs_embeds=inputs_embeds, 1018 past_key_values_length=past_key_values_length, 1019 ) 1020 encoder_outputs = self.encoder( 1021 embedding_output, 1022 attention_mask=extended_attention_mask, (...) 1030 return_dict=return_dict, 1031 ) 1032 sequence_output = encoder_outputs[0]

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, *kwargs) 1190 # If we don't have any hooks, we want to skip the rest of the logic in 1191 # this function, and just call forward. 1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1193 or _global_forward_hooks or _global_forward_pre_hooks): -> 1194 return forward_call(input, **kwargs) 1195 # Do not call functions when jit is used 1196 full_backward_hooks, non_full_backward_hooks = [], []

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py:230, in BertEmbeddings.forward(self, input_ids, token_type_ids, position_ids, inputs_embeds, past_key_values_length) 227 token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device) 229 if inputs_embeds is None: --> 230 inputs_embeds = self.word_embeddings(input_ids) 231 token_type_embeddings = self.token_type_embeddings(token_type_ids) 233 embeddings = inputs_embeds + token_type_embeddings

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, *kwargs) 1190 # If we don't have any hooks, we want to skip the rest of the logic in 1191 # this function, and just call forward. 1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1193 or _global_forward_hooks or _global_forward_pre_hooks): -> 1194 return forward_call(input, **kwargs) 1195 # Do not call functions when jit is used 1196 full_backward_hooks, non_full_backward_hooks = [], []

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/modules/sparse.py:160, in Embedding.forward(self, input) 159 def forward(self, input: Tensor) -> Tensor: --> 160 return F.embedding( 161 input, self.weight, self.padding_idx, self.max_norm, 162 self.norm_type, self.scale_grad_by_freq, self.sparse)

File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/nn/functional.py:2210, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse) 2204 # Note [embedding_renorm set_grad_enabled] 2205 # XXX: equivalent to 2206 # with torch.no_grad(): 2207 # torch.embeddingrenorm 2208 # remove once script supports set_grad_enabled 2209 _no_grad_embeddingrenorm(weight, input, max_norm, norm_type) -> 2210 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)

RuntimeError: Placeholder storage has not been allocated on MPS device!

Zengyi-Qin commented 7 months ago

https://github.com/myshell-ai/MeloTTS/blob/main/docs/install.md#windows-and-macos-install

kulogix commented 5 months ago

For Apple Silicon, when manually setting device to "cpu", there's a bug in MeloTTS/melo/text/chinese_bert.py where the device is overridden to 'mps' raises an error that MPS storage location doesn't exist.

get_bert_feature() already has a "device" parameter -- it should be honored as-is. Only if it's None, then the device should be auto-determined.

Make the following change:

def get_bert_feature(text, word2ph, device=None, model_id='hfl/chinese-roberta-wwm-ext-large')
    ...
-   if (
-       sys.platform == "darwin"
-       and torch.backends.mps.is_available()
-       and device == "cpu"
-   ):
-       device = "mps"
-   if not device:
-       device = "cuda"

+   if device is None:
+       if sys.platform == "darwin" and torch.backends.mps.is_available() and torch.backends.mps.is_built():
+           device = "mps"
+       elif torch.cuda.is_available() and torch.backends.cuda.is_built():
+           device = "cuda"
+       else:
+           device = "cpu"