ylacombe / finetune-hf-vits

Finetune VITS and MMS using HuggingFace's tools
MIT License
115 stars 25 forks source link

finetuning error: filter audio lengths. File does not exist or is not a regular file (possibly a pipe?). #38

Open yc930401 opened 1 month ago

yc930401 commented 1 month ago

[INFO|feature_extraction_utils.py:537] 2024-08-08 14:16:12,709 >> loading configuration file checkpoints/preprocessor_config.json [INFO|feature_extraction_utils.py:586] 2024-08-08 14:16:12,711 >> Feature extractor VitsFeatureExtractor { "feature_extractor_type": "VitsFeatureExtractor", "feature_size": 80, "hop_length": 256, "max_wav_value": 32768.0, "n_fft": 1024, "padding_side": "right", "padding_value": 0.0, "return_attention_mask": false, "sampling_rate": 16000 }

[INFO|tokenization_utils_base.py:2267] 2024-08-08 14:16:12,941 >> loading file vocab.json [INFO|tokenization_utils_base.py:2267] 2024-08-08 14:16:12,941 >> loading file added_tokens.json [INFO|tokenization_utils_base.py:2267] 2024-08-08 14:16:12,941 >> loading file special_tokens_map.json [INFO|tokenization_utils_base.py:2267] 2024-08-08 14:16:12,941 >> loading file tokenizer_config.json [INFO|tokenization_utils_base.py:2267] 2024-08-08 14:16:12,941 >> loading file tokenizer.json Filter (num_proc=4): 0%| | 0/150 [00:00<?, ? examples/s][src/libmpg123/parse.c:do_readahead():1099] warning: Cannot read next header, a one-frame stream? Duh... Filter (num_proc=4): 0%| | 0/150 [00:01<?, ? examples/s] multiprocess.pool.RemoteTraceback: """ Traceback (most recent call last): File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/multiprocess/pool.py", line 125, in worker result = (True, func(*args, kwds)) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 678, in _write_generator_to_queue for i, result in enumerate(func(kwargs)): File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3552, in _map_single batch = apply_function_on_filtered_inputs( File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3405, in apply_function_on_filtered_inputs inputs = format_table( File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/formatting/formatting.py", line 641, in format_table formatted_output = formatter(pa_table_to_format, query_type=query_type) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/formatting/formatting.py", line 401, in call return self.format_batch(pa_table) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/formatting/formatting.py", line 450, in format_batch batch = self.python_features_decoder.decode_batch(batch) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/formatting/formatting.py", line 222, in decode_batch return self.features.decode_batch(batch) if self.features else batch File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/features/features.py", line 2029, in decode_batch [ File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/features/features.py", line 2030, in decode_nested_example(self[column_name], value, token_per_repo_id=token_per_repo_id) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/features/features.py", line 1351, in decode_nested_example return schema.decode_example(obj, token_per_repo_id=token_per_repo_id) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/features/audio.py", line 187, in decode_example array, sampling_rate = sf.read(file) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/soundfile.py", line 285, in read with SoundFile(file, 'r', samplerate, channels, File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/soundfile.py", line 658, in init self._file = self._open(file, mode_int, closefd) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/soundfile.py", line 1216, in _open raise LibsndfileError(err, prefix="Error opening {0!r}: ".format(self.name)) soundfile.LibsndfileError: Error opening <_io.BytesIO object at 0x7f78a06e8040>: File does not exist or is not a regular file (possibly a pipe?). """

The above exception was the direct cause of the following exception:

Traceback (most recent call last): File "/data/yangcheng/github_projects/finetune-hf-vits/run_vits_finetuning.py", line 1494, in main() File "/data/yangcheng/github_projects/finetune-hf-vits/run_vits_finetuning.py", line 767, in main vectorized_datasets = raw_datasets.filter( File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/dataset_dict.py", line 983, in filter { File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/dataset_dict.py", line 984, in k: dataset.filter( File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 567, in wrapper out: Union["Dataset", "DatasetDict"] = func(self, *args, kwargs) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/fingerprint.py", line 482, in wrapper out = func(dataset, *args, *kwargs) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3714, in filter indices = self.map( File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 602, in wrapper out: Union["Dataset", "DatasetDict"] = func(self, args, kwargs) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 567, in wrapper out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3253, in map for rank, done, content in iflatmap_unordered( File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 718, in iflatmap_unordered [async_result.get(timeout=0.05) for async_result in async_results] File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/datasets/utils/py_utils.py", line 718, in [async_result.get(timeout=0.05) for async_result in async_results] File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/multiprocess/pool.py", line 774, in get raise self._value soundfile.LibsndfileError: Error opening <_io.BytesIO object at 0x7f78a06e8040>: File does not exist or is not a regular file (possibly a pipe?). Traceback (most recent call last): File "/home/yangcheng/anaconda3/envs/fairseq/bin/accelerate", line 8, in sys.exit(main()) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/accelerate/commands/accelerate_cli.py", line 48, in main args.func(args) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/accelerate/commands/launch.py", line 1106, in launch_command simple_launcher(args) File "/home/yangcheng/anaconda3/envs/fairseq/lib/python3.10/site-packages/accelerate/commands/launch.py", line 704, in simple_launcher raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) subprocess.CalledProcessError: Command '['/home/yangcheng/anaconda3/envs/fairseq/bin/python', 'run_vits_finetuning.py', './training_config_examples/finetune_mms_bod.json']' returned non-zero exit status 1.