Closed wjyfelicity closed 7 months ago
check your audio
egs:
import argparse
import wave
import os
def short_wav(file_path, duration_threshold):
with wave.open(file_path, 'rb') as wav_file:
duration = wav_file.getnframes() / float(wav_file.getframerate())
return duration < duration_threshold
def find_short_wavs(directory, duration_threshold):
short_wavs = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.wav'):
file_path = os.path.join(root, file)
if short_wav(file_path, duration_threshold):
short_wavs.append(file_path)
return short_wavs
def main():
parser = argparse.ArgumentParser(description="Check Audio")
parser.add_argument("dir", help="search for .wav files")
parser.add_argument("--threshold", type=float, default=0.4, help="Duration threshold for considering a .wav file short (in seconds)")
args = parser.parse_args()
short_wavs = find_short_wavs(args.directory, args.threshold)
print("Audio wav ".format(args.threshold))
for file_path in short_wavs:
print(file_path)
if __name__ == "__main__":
main()
已解决,谢谢!
已解决,谢谢!
请问你是怎么解决的
是因为时长太短了吗
环境安装: python==3.7.13 torch==1.11.0+cu113 funasr==1.0.15 modelscope==1.9.5 使用模型: https://www.modelscope.cn/models/iic/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/summary 参考示例: https://github.com/alibaba-damo-academy/FunASR/blob/main/examples/industrial_data_pretraining/contextual_paraformer/finetune_from_local.sh 完整错误: Traceback (most recent call last): File "../../../funasr/bin/train.py", line 42, in main_hydra main(kwargs) File "../../../funasr/bin/train.py", line 192, in main trainer.run() File "/code/zhili_test/new/FunASR-main/funasr/train_utils/trainer.py", line 181, in run self._train_epoch(epoch) File "/code/zhili_test/new/FunASR-main/funasr/train_utils/trainer.py", line 234, in _train_epoch for batch_idx, batch in enumerate(self.dataloader_train): File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 530, in next data = self._next_data() File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1224, in _next_data return self._process_data(data) File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1250, in _process_data data.reraise() File "/opt/conda/lib/python3.7/site-packages/torch/_utils.py", line 457, in reraise raise exception AssertionError: Caught AssertionError in DataLoader worker process 2. Original Traceback (most recent call last): File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop data = fetcher.fetch(index) File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch data = [self.dataset[idx] for idx in possibly_batched_index] File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 49, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/code/zhili_test/new/FunASR-main/funasr/datasets/audio_datasets/datasets.py", line 123, in getitem
speech, speech_lengths = extract_fbank(data_src, data_type=self.data_type, frontend=self.frontend, is_final=True) # speech: [b, T, d]
File "/code/zhili_test/new/FunASR-main/funasr/utils/load_utils.py", line 110, in extract_fbank
data, data_len = frontend(data, data_len, kwargs)
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/code/zhili_test/new/FunASR-main/funasr/frontends/wav_frontend.py", line 142, in forward
snip_edges=self.snip_edges)
File "/opt/conda/lib/python3.7/site-packages/torchaudio/compliance/kaldi.py", line 594, in fbank
waveform, channel, sample_frequency, frame_shift, frame_length, round_to_power_of_two, preemphasis_coefficient
File "/opt/conda/lib/python3.7/site-packages/torchaudio/compliance/kaldi.py", line 143, in _get_waveform_and_window_properties
window_size, len(waveform)
AssertionError: choose a window size 400 that is [2, 0]