Closed bukhalmae145 closed 7 months ago
pytorch ver should be >= 1.9
pytorch ver should be >= 1.9
I downloaded the latest version of Pytorch.
Sorry, I have no experience with MacOS.
Sorry, I have no experience with MacOS.
Sorry to bother you but I have another question. Can I use korean hubert model(https://huggingface.co/team-lucid/hubert-base-korean) for any of your svc models? Because the pronunciation of the model I trained with the current Hubert Model seems weird and awkward.
yes, but you must train pretrain model use the hubert-base-korean with many singers' data.
yes, but you must train pretrain model use the hubert-base-korean with many singers' data.
So you basically mean that I can't use the model from the link above directly?
In theory, it can't be used.
In theory, it can't be used.
Can you please let me know the procedures specifically so that I can adjust the korean hubert model in Grad-SVC?
It takes some time
It takes some time
Yeah sure please..
2 days later, i will give a demo.
import sys,os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import numpy as np
import argparse
import torch
import librosa
from transformers import HubertModel
def load_audio(file: str, sr: int = 16000):
x, sr = librosa.load(file, sr=sr)
return x
def load_model(path, device):
model = HubertModel.from_pretrained(path)
model.eval()
if not (device == "cpu"):
model.half()
model.to(device)
return model
def pred_vec(model, wavPath, vecPath, device):
audio = load_audio(wavPath)
feats = audio
feats = torch.from_numpy(feats).to(device)
feats = feats[None, :]
if not (device == "cpu"):
feats = feats.half()
with torch.no_grad():
vec = model(feats).last_hidden_state
vec = vec.squeeze().data.cpu().float().numpy()
print(feats.shape)
print(vec.shape) # [length, dim=768] hop=320
np.save(vecPath, vec, allow_pickle=False)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-w", "--wav", help="wav", dest="wav")
parser.add_argument("-v", "--vec", help="vec", dest="vec")
args = parser.parse_args()
print(args.wav)
print(args.vec)
wavPath = args.wav
vecPath = args.vec
device = "cuda" if torch.cuda.is_available() else "cpu"
hubert = load_model('./hubert-base-korean', device)
pred_vec(hubert, wavPath, vecPath, device)
prepare/preprocess_hubert.py should be changed as the same.
and
configs: n_vecs: 256 should be changed to 768
import sys,os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import numpy as np import argparse import torch import librosa from transformers import HubertModel def load_audio(file: str, sr: int = 16000): x, sr = librosa.load(file, sr=sr) return x def load_model(path, device): model = HubertModel.from_pretrained(path) model.eval() if not (device == "cpu"): model.half() model.to(device) return model def pred_vec(model, wavPath, vecPath, device): audio = load_audio(wavPath) feats = audio feats = torch.from_numpy(feats).to(device) feats = feats[None, :] if not (device == "cpu"): feats = feats.half() with torch.no_grad(): vec = model(feats).last_hidden_state vec = vec.squeeze().data.cpu().float().numpy() print(feats.shape) print(vec.shape) # [length, dim=768] hop=320 np.save(vecPath, vec, allow_pickle=False) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-w", "--wav", help="wav", dest="wav") parser.add_argument("-v", "--vec", help="vec", dest="vec") args = parser.parse_args() print(args.wav) print(args.vec) wavPath = args.wav vecPath = args.vec device = "cuda" if torch.cuda.is_available() else "cpu" hubert = load_model('./hubert-base-korean', device) pred_vec(hubert, wavPath, vecPath, device)
prepare/preprocess_hubert.py should be changed as the same.
and
configs: n_vecs: 256 should be changed to 768
I got this error code: python prepare/preprocess_hubert.py None None Some weights of the model checkpoint at ./hubert-base-korean were not used when initializing HubertModel: ['encoder.pos_conv_embed.conv.weight_g', 'encoder.pos_conv_embed.conv.weight_v']
have you changed prepare/preprocess_hubert.py as the same:
def load_model(path, device):
model = HubertModel.from_pretrained(path)
model.eval()
if not (device == "cpu"):
model.half()
model.to(device)
return model
def pred_vec(model, wavPath, vecPath, device):
audio = load_audio(wavPath)
feats = audio
feats = torch.from_numpy(feats).to(device)
feats = feats[None, :]
if not (device == "cpu"):
feats = feats.half()
with torch.no_grad():
vec = model(feats).last_hidden_state
vec = vec.squeeze().data.cpu().float().numpy()
print(feats.shape)
print(vec.shape) # [length, dim=768] hop=320
np.save(vecPath, vec, allow_pickle=False)
have you changed prepare/preprocess_hubert.py as the same:
def load_model(path, device): model = HubertModel.from_pretrained(path) model.eval() if not (device == "cpu"): model.half() model.to(device) return model def pred_vec(model, wavPath, vecPath, device): audio = load_audio(wavPath) feats = audio feats = torch.from_numpy(feats).to(device) feats = feats[None, :] if not (device == "cpu"): feats = feats.half() with torch.no_grad(): vec = model(feats).last_hidden_state vec = vec.squeeze().data.cpu().float().numpy() print(feats.shape) print(vec.shape) # [length, dim=768] hop=320 np.save(vecPath, vec, allow_pickle=False)
Traceback (most recent call last): File "prepare/preprocess_hubert.py", line 53, in
pred_vec(hubert, wavPath, vecPath, device) File "prepare/preprocess_hubert.py", line 26, in pred_vec audio = load_audio(wavPath) File "prepare/preprocess_hubert.py", line 12, in load_audio x, sr = librosa.load(file, sr=sr) File "/Users/workstation/Music/Grad-SVC/Grad-SVC/lib/python3.8/site-packages/librosa/core/audio.py", line 183, in load y, sr_native = audioread_load(path, offset, duration, dtype) File "/Users/workstation/Music/Grad-SVC/Grad-SVC/lib/python3.8/site-packages/decorator.py", line 232, in fun return caller(func, *(extras + args), *kw) File "/Users/workstation/Music/Grad-SVC/Grad-SVC/lib/python3.8/site-packages/librosa/util/decorators.py", line 59, in __wrapper return func(args, **kwargs) File "/Users/workstation/Music/Grad-SVC/Grad-SVC/lib/python3.8/site-packages/librosa/core/audio.py", line 239, in audioread_load reader = audioread.audio_open(path) File "/Users/workstation/Music/Grad-SVC/Grad-SVC/lib/python3.8/site-packages/audioread/init__.py", line 127, in audio_open return BackendClass(path) File "/Users/workstation/Music/Grad-SVC/Grad-SVC/lib/python3.8/site-packages/audioread/rawread.py", line 59, in init__ self._fh = open(filename, 'rb') IsADirectoryError: [Errno 21] Is a directory: 'data_gvc/waves-16k/'
import sys,os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import numpy as np
import argparse
import torch
import librosa
from tqdm import tqdm
from transformers import HubertModel
def load_audio(file: str, sr: int = 16000):
x, sr = librosa.load(file, sr=sr)
return x
def load_model(path, device):
model = HubertModel.from_pretrained(path)
model.eval()
if not (device == "cpu"):
model.half()
model.to(device)
return model
def pred_vec(model, wavPath, vecPath, device):
audio = load_audio(wavPath)
feats = audio
feats = torch.from_numpy(feats).to(device)
feats = feats[None, :]
if not (device == "cpu"):
feats = feats.half()
with torch.no_grad():
vec = model(feats).last_hidden_state
vec = vec.squeeze().data.cpu().float().numpy()
# print(feats.shape)
# print(vec.shape) # [length, dim=768] hop=320
np.save(vecPath, vec, allow_pickle=False)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-w", "--wav", help="wav", dest="wav", required=True)
parser.add_argument("-v", "--vec", help="vec", dest="vec", required=True)
args = parser.parse_args()
print(args.wav)
print(args.vec)
os.makedirs(args.vec, exist_ok=True)
wavPath = args.wav
vecPath = args.vec
device = "cuda" if torch.cuda.is_available() else "cpu"
hubert = load_model('./hubert-base-korean', device)
for spks in os.listdir(wavPath):
if os.path.isdir(f"./{wavPath}/{spks}"):
os.makedirs(f"./{vecPath}/{spks}", exist_ok=True)
files = [f for f in os.listdir(f"./{wavPath}/{spks}") if f.endswith(".wav")]
for file in tqdm(files, desc=f'Processing vec {spks}'):
file = file[:-4]
pred_vec(hubert, f"{wavPath}/{spks}/{file}.wav", f"{vecPath}/{spks}/{file}.vec", device)
import sys,os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import numpy as np import argparse import torch import librosa from tqdm import tqdm from transformers import HubertModel def load_audio(file: str, sr: int = 16000): x, sr = librosa.load(file, sr=sr) return x def load_model(path, device): model = HubertModel.from_pretrained(path) model.eval() if not (device == "cpu"): model.half() model.to(device) return model def pred_vec(model, wavPath, vecPath, device): audio = load_audio(wavPath) feats = audio feats = torch.from_numpy(feats).to(device) feats = feats[None, :] if not (device == "cpu"): feats = feats.half() with torch.no_grad(): vec = model(feats).last_hidden_state vec = vec.squeeze().data.cpu().float().numpy() # print(feats.shape) # print(vec.shape) # [length, dim=768] hop=320 np.save(vecPath, vec, allow_pickle=False) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-w", "--wav", help="wav", dest="wav", required=True) parser.add_argument("-v", "--vec", help="vec", dest="vec", required=True) args = parser.parse_args() print(args.wav) print(args.vec) os.makedirs(args.vec, exist_ok=True) wavPath = args.wav vecPath = args.vec device = "cuda" if torch.cuda.is_available() else "cpu" hubert = load_model('./hubert-base-korean', device) for spks in os.listdir(wavPath): if os.path.isdir(f"./{wavPath}/{spks}"): os.makedirs(f"./{vecPath}/{spks}", exist_ok=True) files = [f for f in os.listdir(f"./{wavPath}/{spks}") if f.endswith(".wav")] for file in tqdm(files, desc=f'Processing vec {spks}'): file = file[:-4] pred_vec(hubert, f"{wavPath}/{spks}/{file}.wav", f"{vecPath}/{spks}/{file}.vec", device)
The preprocess_hubert.py problem is solved but I got this error message when I operated gvc_trainer.py:
Traceback (most recent call last):
File "gvc_trainer.py", line 30, in
use this model
use this model
I already have put those models in the project. (Actually I feel really sorry about you that you really work hard to solve the problem for me. I appreciate it! :) )
maybe check the md5:
md5sum hubert-base-korean/pytorch_model.bin
be042a8b2ed7126c03b1159f86893b8c hubert-base-korean/pytorch_model.bin
md5sum hubert-base-korean/config.json
3bec78d9502a1446df4afae5320b450e hubert-base-korean/config.json
md5sum is a command to cumpute md5 number of a file, it is unique for every file. so same files have same md5 numbers.
md5sum is a command to cumpute md5 number of a file, it is unique for every file. so same files have same md5 numbers.
md5 hubert-base-korean/pytorch_model.bin MD5 (hubert-base-korean/pytorch_model.bin) = be042a8b2ed7126c03b1159f86893b8c md5 hubert-base-korean/config.json MD5 (hubert-base-korean/config.json) = 50e9057abdd7d9944bbfa920cd480596 got this on my terminal
re-download hubert-base-korean/config.json
re-download hubert-base-korean/config.json
I get same md5 numbers even though re downloaded the config.json
"hidden_size": 768 is right?
"hidden_size": 768 is right?
I still get this error message: Traceback (most recent call last): File "gvc_trainer.py", line 30, in
train(hps, args.checkpoint_path) File "/Users/workstation/Music/Grad-SVC/grad_extend/train.py", line 46, in train load_model(model, checkpoint['model']) File "/Users/workstation/Music/Grad-SVC/grad_extend/utils.py", line 24, in load_model model.load_state_dict(new_state_dict) File "/Users/workstation/Music/Grad-SVC/Grad-SVC/lib/python3.8/site-packages/torch/nn/modules/module.py", line 2153, in load_state_dict raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format( RuntimeError: Error(s) in loading state_dict for GradTTS: size mismatch for encoder.prenet.conv_layers.0.weight: copying a param with shape torch.Size([192, 256, 5]) from checkpoint, the shape in current model is torch.Size([192, 768, 5]).
change config/base.yaml n_vecs: 256->n_vecs: 768
change config/base.yaml n_vecs: 256->n_vecs: 768
Already have done it. Can I have the whole Grad-SVC project folder you have?
pretrain: "grad_pretrain/gvc.pretrain.pth" is based on 256 hubert, and it can not be used any more so set pretrain: "" and you need more than 10000 wavs to train your model.
change config/base.yaml n_vecs: 256->n_vecs: 768
Already have done it.
pretrain: "grad_pretrain/gvc.pretrain.pth" is based on 256 hubert, and it can not be used any more so set pretrain: "" and you need more than 10000 wavs to train your model.
What is the minimum length and maximum length of wavs file? And do I have to put breathing sound wav files in the dataset?
2s < length < 20s breathing sound wav files can not too much and need more epochs for you to train your own model full_epochs: 500 fast_epochs: 400
2s < length < 20s breathing sound wav files can not too much and need more epochs for you to train your own model full_epochs: 500 fast_epochs: 400
How many Epochs would generate the best quality? And what is the difference between Full and Fast Epochs?
so i don't know too. Fast Epochs is training transformer only, and full is training transformer and diffusion.
2s < length < 20s breathing sound wav files can not too much and need more epochs for you to train your own model full_epochs: 500 fast_epochs: 400
How many Epochs would generate the best quality?
so i don't know too. Fast Epochs is training transformer only, and full is training transformer and diffusion.
While I operated gvc_inference.py I got this message:
Traceback (most recent call last):
File "hubert/inference.py", line 57, in
Should ./hubert/inference.py look the same as ./prepare/preprocess_hubert.py?
NotADirectoryError: [Errno 20] Not a directory: 'test.wav',you should set the real path of your wave.
NotADirectoryError: [Errno 20] Not a directory: 'test.wav',you should set the real path of your wave.
Well, I have the file test.wav in my directory. And this is my ./hubert/inference.py `import sys,os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(file)))) import numpy as np import argparse import torch import librosa
from tqdm import tqdm from transformers import HubertModel
def load_audio(file: str, sr: int = 16000): x, sr = librosa.load(file, sr=sr) return x
def load_model(path, device): model = HubertModel.from_pretrained(path) model.eval() if not (device == "cpu"): model.half() model.to(device) return model
def pred_vec(model, wavPath, vecPath, device): audio = load_audio(wavPath) feats = audio feats = torch.from_numpy(feats).to(device) feats = feats[None, :] if not (device == "cpu"): feats = feats.half() with torch.no_grad(): vec = model(feats).last_hidden_state vec = vec.squeeze().data.cpu().float().numpy()
# print(vec.shape) # [length, dim=768] hop=320
np.save(vecPath, vec, allow_pickle=False)
if name == "main": parser = argparse.ArgumentParser() parser.add_argument("-w", "--wav", help="wav", dest="wav", required=True) parser.add_argument("-v", "--vec", help="vec", dest="vec", required=True)
args = parser.parse_args()
print(args.wav)
print(args.vec)
os.makedirs(args.vec, exist_ok=True)
wavPath = args.wav
vecPath = args.vec
device = "mps" if torch.backends.mps.is_available() else "cpu"
hubert = load_model('./hubert-base-korean', device)
for spks in os.listdir(wavPath):
if os.path.isdir(f"./{wavPath}/{spks}"):
os.makedirs(f"./{vecPath}/{spks}", exist_ok=True)
files = [f for f in os.listdir(f"./{wavPath}/{spks}") if f.endswith(".wav")]
for file in tqdm(files, desc=f'Processing vec {spks}'):
file = file[:-4]
pred_vec(hubert, f"{wavPath}/{spks}/{file}.wav", f"{vecPath}/{spks}/{file}.vec", device)
`
Should ./hubert/inference.py look the same as ./prepare/preprocess_hubert.py? no
import sys,os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import numpy as np
import argparse
import torch
import librosa
from transformers import HubertModel
def load_audio(file: str, sr: int = 16000):
x, sr = librosa.load(file, sr=sr)
return x
def load_model(path, device):
model = HubertModel.from_pretrained(path)
model.eval()
if not (device == "cpu"):
model.half()
model.to(device)
return model
def pred_vec(model, wavPath, vecPath, device):
audio = load_audio(wavPath)
feats = audio
feats = torch.from_numpy(feats).to(device)
feats = feats[None, :]
if not (device == "cpu"):
feats = feats.half()
with torch.no_grad():
vec = model(feats).last_hidden_state
vec = vec.squeeze().data.cpu().float().numpy()
print(feats.shape)
print(vec.shape) # [length, dim=768] hop=320
np.save(vecPath, vec, allow_pickle=False)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-w", "--wav", help="wav", dest="wav")
parser.add_argument("-v", "--vec", help="vec", dest="vec")
args = parser.parse_args()
print(args.wav)
print(args.vec)
wavPath = args.wav
vecPath = args.vec
device = "cuda" if torch.cuda.is_available() else "cpu"
hubert = load_model('./hubert-base-korean', device)
pred_vec(hubert, wavPath, vecPath, device)
Should ./hubert/inference.py look the same as ./prepare/preprocess_hubert.py? no
Is it the same code as the original Grad-SVC project?
import sys,os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import numpy as np
import argparse
import torch
import librosa
from transformers import HubertModel
def load_audio(file: str, sr: int = 16000):
x, sr = librosa.load(file, sr=sr)
return x
def load_model(path, device):
model = HubertModel.from_pretrained(path)
model.eval()
if not (device == "cpu"):
model.half()
model.to(device)
return model
def pred_vec(model, wavPath, vecPath, device):
audio = load_audio(wavPath)
feats = audio
feats = torch.from_numpy(feats).to(device)
feats = feats[None, :]
if not (device == "cpu"):
feats = feats.half()
with torch.no_grad():
vec = model(feats).last_hidden_state
vec = vec.squeeze().data.cpu().float().numpy()
print(feats.shape)
print(vec.shape) # [length, dim=768] hop=320
np.save(vecPath, vec, allow_pickle=False)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-w", "--wav", help="wav", dest="wav")
parser.add_argument("-v", "--vec", help="vec", dest="vec")
args = parser.parse_args()
print(args.wav)
print(args.vec)
wavPath = args.wav
vecPath = args.vec
device = "cuda" if torch.cuda.is_available() else "cpu"
hubert = load_model('./hubert-base-korean', device)
pred_vec(hubert, wavPath, vecPath, device)
import sys,os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import numpy as np import argparse import torch import librosa from transformers import HubertModel def load_audio(file: str, sr: int = 16000): x, sr = librosa.load(file, sr=sr) return x def load_model(path, device): model = HubertModel.from_pretrained(path) model.eval() if not (device == "cpu"): model.half() model.to(device) return model def pred_vec(model, wavPath, vecPath, device): audio = load_audio(wavPath) feats = audio feats = torch.from_numpy(feats).to(device) feats = feats[None, :] if not (device == "cpu"): feats = feats.half() with torch.no_grad(): vec = model(feats).last_hidden_state vec = vec.squeeze().data.cpu().float().numpy() print(feats.shape) print(vec.shape) # [length, dim=768] hop=320 np.save(vecPath, vec, allow_pickle=False) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-w", "--wav", help="wav", dest="wav") parser.add_argument("-v", "--vec", help="vec", dest="vec") args = parser.parse_args() print(args.wav) print(args.vec) wavPath = args.wav vecPath = args.vec device = "cuda" if torch.cuda.is_available() else "cpu" hubert = load_model('./hubert-base-korean', device) pred_vec(hubert, wavPath, vecPath, device)
I still get this message :(
Traceback (most recent call last):
File "hubert/inference.py", line 53, in
rm test.vec.npy and re-test
rm test.vec.npy and re-test
I works!! I apreciate you with all of my heart!!
New problem.. The wav file I exported with the trained model sounds weird.. Is it true that I have to leave it blank in "pretrain:" in base.yaml? And what is the gvc_pretrained.pth file that has been exported with gvc.pth?
https://drive.google.com/file/d/1djlKYFexiTaSa75QyGYqOfHZxeVnwcSU/view?usp=sharing (Audio file)
Some weights of the model checkpoint at ./hubert-base-korean were not used when initializing HubertModel: ['encoder.pos_conv_embed.conv.weight_g', 'encoder.pos_conv_embed.conv.weight_v']
I don't think you've finished your training
Some weights of the model checkpoint at ./hubert-base-korean were not used when initializing HubertModel:
['encoder.pos_conv_embed.conv.weight_g', 'encoder.pos_conv_embed.conv.weight_v']
I didn't have that problem
I don't think you've finished your training
Some weights of the model checkpoint at ./hubert-base-korean were not used when initializing HubertModel: ['encoder.pos_conv_embed.conv.weight_g', 'encoder.pos_conv_embed.conv.weight_v']
I didn't have that problem
I get this message whenever I use Hubert Model. (Preprocessing, Inferencing) Even the message comes out on the terminal, the process still continues. And I get those weird wav files. Can I have the exact same code you have used?
python svc_trainer.py -c configs/base.yaml -n sovits5.0 Batch size per GPU : 8 /Users/workstation/Music/whisper-vits-svc/whisper-vits-svc/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:30: UserWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm. warnings.warn("torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.") ----------10---------- 2023-10-15 12:28:02,861 - INFO - Start from 32k pretrain model: ./vits_pretrain/sovits5.0.pretrain.pth 2023-10-15 12:28:03,123 - INFO - Starting new training run. ----------373---------- Validation loop: 0%| | 0/2 [00:00<?, ?it/s]/Users/workstation/Music/whisper-vits-svc/vits/attentions.py:319: UserWarning: MPS: The constant padding of more than 3 dimensions is not currently supported natively. It uses View Ops default implementation to run. This may have performance implications. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/mps/operations/Pad.mm:474.) x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, 1]])) /Users/workstation/Music/whisper-vits-svc/whisper-vits-svc/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/SpectralOps.cpp:879.) return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] /Users/workstation/Music/whisper-vits-svc/whisper-vits-svc/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: The operator 'aten::_fft_r2c' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/mps/MPSFallback.mm:13.) return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] Validation loop: 100%|████████████████████████████| 2/2 [00:33<00:00, 16.92s/it] Loading train data: 0%| | 0/48 [00:00<?, ?it/s]/Users/workstation/Music/whisper-vits-svc/whisper-vits-svc/lib/python3.8/site-packages/torch/functional.py:660: UserWarning: A window was not provided. A rectangular window will be applied,which is known to cause spectral leakage. Other windows such as torch.hann_window or torch.hamming_window can are recommended to reduce spectral leakage.To suppress this warning and use a rectangular window, explicitly set
train(0, args, args.checkpoint_path, hp, hp_str)
File "/Users/workstation/Music/whisper-vits-svc/vits_extend/train.py", line 223, in train
loss_g.backward()
File "/Users/workstation/Music/whisper-vits-svc/whisper-vits-svc/lib/python3.8/site-packages/torch/_tensor.py", line 503, in backward
torch.autograd.backward(
File "/Users/workstation/Music/whisper-vits-svc/whisper-vits-svc/lib/python3.8/site-packages/torch/autograd/init.py", line 251, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: Unsupported type byte size: ComplexFloat
window=torch.ones(n_fft, device=<device>)
. (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/aten/src/ATen/native/SpectralOps.cpp:843.) return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] Loading train data: 0%| | 0/48 [01:16<?, ?it/s] Traceback (most recent call last): File "svc_trainer.py", line 41, in