KevinWang676 / Bark-Voice-Cloning

Bark Voice Cloning and Voice Cloning for Chinese Speech
MIT License
2.8k stars 401 forks source link

提示: FileNotFoundError: [WinError 2] 系统找不到指定的文件。 #81

Closed vivisol closed 11 months ago

vivisol commented 11 months ago

我将音频素材文件01.wav和笔记文件Voice_Cloning_for_Chinese_Speech_v2.ipynb放在同一个路径下,再运行语句:

split_long_audio(whisper_model, "01.wav", "test", "dataset_raw") # 请在{filename}处填写您上传的wav文件名

时,提示找不到文件:

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[5], line 1
----> 1 split_long_audio(whisper_model, "01.wav", "test", "dataset_raw")

Cell In[2], line 11, in split_long_audio(model, filepaths, character_name, save_dir, out_sr)
      8 save_path.mkdir(exist_ok=True, parents=True)
     10 print(f"Transcribing file {file_idx}: '{filepath}' to segments...")
---> 11 result = model.transcribe(filepath, word_timestamps=True, task="transcribe", beam_size=5, best_of=5)
     12 segments = result['segments']
     14 wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True)

File D:\ProgramData\miniconda3\envs\bark\lib\site-packages\whisper\transcribe.py:123, in transcribe(model, audio, verbose, temperature, compression_ratio_threshold, logprob_threshold, no_speech_threshold, condition_on_previous_text, initial_prompt, word_timestamps, prepend_punctuations, append_punctuations, **decode_options)
    121 # Pad 30-seconds of silence to the input audio, for slicing
    122 print("[DEGUB:audio:]",audio)
--> 123 mel = log_mel_spectrogram(audio, model.dims.n_mels, padding=N_SAMPLES)
    124 content_frames = mel.shape[-1] - N_FRAMES
    126 if decode_options.get("language", None) is None:

File D:\ProgramData\miniconda3\envs\bark\lib\site-packages\whisper\audio.py:140, in log_mel_spectrogram(audio, n_mels, padding, device)
    138 if not torch.is_tensor(audio):
    139     if isinstance(audio, str):
--> 140         audio = load_audio(audio)
    141     audio = torch.from_numpy(audio)
    143 if device is not None:

File D:\ProgramData\miniconda3\envs\bark\lib\site-packages\whisper\audio.py:58, in load_audio(file, sr)
     56 # fmt: on
     57 try:
---> 58     out = run(cmd, capture_output=True, check=True).stdout
     59 except CalledProcessError as e:
     60     raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e

File D:\ProgramData\miniconda3\envs\bark\lib\subprocess.py:505, in run(input, capture_output, timeout, check, *popenargs, **kwargs)
    502     kwargs['stdout'] = PIPE
    503     kwargs['stderr'] = PIPE
--> 505 with Popen(*popenargs, **kwargs) as process:
    506     try:
    507         stdout, stderr = process.communicate(input, timeout=timeout)

File D:\ProgramData\miniconda3\envs\bark\lib\subprocess.py:951, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask)
    947         if self.text_mode:
    948             self.stderr = io.TextIOWrapper(self.stderr,
    949                     encoding=encoding, errors=errors)
--> 951     self._execute_child(args, executable, preexec_fn, close_fds,
    952                         pass_fds, cwd, env,
    953                         startupinfo, creationflags, shell,
    954                         p2cread, p2cwrite,
    955                         c2pread, c2pwrite,
    956                         errread, errwrite,
    957                         restore_signals,
    958                         gid, gids, uid, umask,
    959                         start_new_session)
    960 except:
    961     # Cleanup if the child failed starting.
    962     for f in filter(None, (self.stdin, self.stdout, self.stderr)):

File D:\ProgramData\miniconda3\envs\bark\lib\subprocess.py:1436, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_gid, unused_gids, unused_uid, unused_umask, unused_start_new_session)
   1434 # Start the process
   1435 try:
-> 1436     hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
   1437                              # no special security
   1438                              None, None,
   1439                              int(not close_fds),
   1440                              creationflags,
   1441                              env,
   1442                              cwd,
   1443                              startupinfo)
   1444 finally:
   1445     # Child is launched. Close the parent's copy of those pipe
   1446     # handles that only the child should have open.  You need
   (...)
   1449     # pipe will not close when the child process exits and the
   1450     # ReadFile will hang.
   1451     self._close_pipe_fds(p2cread, p2cwrite,
   1452                          c2pread, c2pwrite,
   1453                          errread, errwrite)

FileNotFoundError: [WinError 2] 系统找不到指定的文件。

这非常奇怪,不知道原因出在哪里? 我在当前路径下直接使用ffmpeg去读取音频文件是成功的

vivisol commented 11 months ago

出现这个问题的原因是因为虽然我已经安装ffmpeg,但是没有在jupyter kernel里生效,重新启动jupyter就没有这个问题啦。