-- Process 0 terminated with the following error:
Traceback (most recent call last):
File "E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\lib\site-packages\torch\multiprocessing\spawn.py", line 69, in _wrap
fn(i, *args)
File "E:\software\Umi\GPT-SoVITS-beta0217fix3\GPT_SoVITS\s2_train.py", line 75, in run
dist.init_process_group(
File "E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\lib\site-packages\torch\distributed\distributed_c10d.py", line 888, in init_process_group
store, rank, world_size = next(rendezvous_iterator)
File "E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\lib\site-packages\torch\distributed\rendezvous.py", line 245, in _env_rendezvous_handler
store = _create_c10d_store(master_addr, master_port, rank, world_size, timeout)
File "E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\lib\site-packages\torch\distributed\rendezvous.py", line 176, in _create_c10d_store
return TCPStore(
TimeoutError: The client socket has timed out after 1800s while trying to connect to (localhost, 41266).
"E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\python.exe" GPT_SoVITS/s2_train.py --config "E:\software\Umi\GPT-SoVITS-beta0217fix3\TEMP/tmp_s2.json" INFO:zhouwensha:{'train': {'log_interval': 100, 'eval_interval': 500, 'seed': 1234, 'epochs': 9, 'learning_rate': 0.0001, 'betas': [0.8, 0.99], 'eps': 1e-09, 'batch_size': 2, 'fp16_run': True, 'lr_decay': 0.999875, 'segment_size': 20480, 'init_lr_ratio': 1, 'warmup_epochs': 0, 'c_mel': 45, 'c_kl': 1.0, 'text_low_lr_rate': 0.4, 'pretrained_s2G': 'GPT_SoVITS/pretrained_models/s2G488k.pth', 'pretrained_s2D': 'GPT_SoVITS/pretrained_models/s2D488k.pth', 'if_save_latest': True, 'if_save_every_weights': True, 'save_every_epoch': 4, 'gpu_numbers': '0'}, 'data': {'max_wav_value': 32768.0, 'sampling_rate': 32000, 'filter_length': 2048, 'hop_length': 640, 'win_length': 2048, 'n_mel_channels': 128, 'mel_fmin': 0.0, 'mel_fmax': None, 'add_blank': True, 'n_speakers': 300, 'cleaned_text': True, 'exp_dir': 'logs/zhouwensha'}, 'model': {'inter_channels': 192, 'hidden_channels': 192, 'filter_channels': 768, 'n_heads': 2, 'n_layers': 6, 'kernel_size': 3, 'p_dropout': 0.1, 'resblock': '1', 'resblock_kernel_sizes': [3, 7, 11], 'resblock_dilation_sizes': [[1, 3, 5], [1, 3, 5], [1, 3, 5]], 'upsample_rates': [10, 8, 2, 2, 2], 'upsample_initial_channel': 512, 'upsample_kernel_sizes': [16, 16, 8, 2, 2], 'n_layers_q': 3, 'use_spectral_norm': False, 'gin_channels': 512, 'semantic_frame_rate': '25hz', 'freeze_quantizer': True}, 's2_ckpt_dir': 'logs/zhouwensha', 'content_module': 'cnhubert', 'save_weight_dir': 'SoVITS_weights', 'name': 'zhouwensha', 'pretrain': None, 'resume_step': None} [E C:\actions-runner_work\pytorch\pytorch\builder\windows\pytorch\torch\csrc\distributed\c10d\socket.cpp:860] [c10d] The client socket has timed out after 1800s while trying to connect to (localhost, 41266). Traceback (most recent call last): File "E:\software\Umi\GPT-SoVITS-beta0217fix3\GPT_SoVITS\s2_train.py", line 600, in
main()
File "E:\software\Umi\GPT-SoVITS-beta0217fix3\GPT_SoVITS\s2_train.py", line 56, in main
mp.spawn(
File "E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\lib\site-packages\torch\multiprocessing\spawn.py", line 239, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\lib\site-packages\torch\multiprocessing\spawn.py", line 197, in start_processes
while not context.join():
File "E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\lib\site-packages\torch\multiprocessing\spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 0 terminated with the following error: Traceback (most recent call last): File "E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\lib\site-packages\torch\multiprocessing\spawn.py", line 69, in _wrap fn(i, *args) File "E:\software\Umi\GPT-SoVITS-beta0217fix3\GPT_SoVITS\s2_train.py", line 75, in run dist.init_process_group( File "E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\lib\site-packages\torch\distributed\distributed_c10d.py", line 888, in init_process_group store, rank, world_size = next(rendezvous_iterator) File "E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\lib\site-packages\torch\distributed\rendezvous.py", line 245, in _env_rendezvous_handler store = _create_c10d_store(master_addr, master_port, rank, world_size, timeout) File "E:\software\Umi\GPT-SoVITS-beta0217fix3\runtime\lib\site-packages\torch\distributed\rendezvous.py", line 176, in _create_c10d_store return TCPStore( TimeoutError: The client socket has timed out after 1800s while trying to connect to (localhost, 41266).
训练时GPU是有占用的,可是SOVITS不会产生训练文件,卡住