PlayVoice / whisper-vits-svc

Core Engine of Singing Voice Conversion & Singing Voice Clone
https://huggingface.co/spaces/maxmax20160403/sovits5.0
MIT License
2.55k stars 914 forks source link

silerovad 在多进程中会卡主 #196

Open panxin801 opened 2 weeks ago

panxin801 commented 2 weeks ago

请问vad在多进程会卡主要怎么解决呢?

def post_process(vad_model, refWavPath: str, svcWav: str):
    """_summary_

    Args:
        ref_wave_path (str): Path of ref audio.
        svc_wave_path (str): Path of svc audio.
    Returns:
        _type_: _description_
    """
    ref_wave, _ = librosa.load(refWavPath, sr=16000)
    tmp_wave = torch.from_numpy(ref_wave).squeeze(0)
    tag_wave = get_speech_timestamps(
        tmp_wave, vad_model, threshold=0.2, sampling_rate=16000
    )

    ref_wave[:] = 0
    for tag in tag_wave:
        ref_wave[tag["start"]: tag["end"]] = 1

    ref_wave = np.repeat(ref_wave, 2, -1)

    min_len = min(len(ref_wave), len(svcWav))
    ref_wave = ref_wave[:min_len]
    svc_wave = svcWav[:min_len]
    return svc_wave, 32000

def inference_with_auto_slice(self, tgtWav: str, savePath: str, spkName: str = None, f0scale: float = 1.0):
        bret = False
        logger.info(
            f"Input params: tgtWav={tgtWav}, savePath={savePath}, spkName={spkName}, f0scale={f0scale}")

        savePath = Path(savePath)
        savePath.parent.mkdir(parents=True, exist_ok=True)

        try:
            vadModel = init_jit_model("vad/assets/silero_vad.jit")
            vadModel.eval()

            ppg = self.whisper_inference(tgtWav)
            vec = self.hubert_inference(tgtWav)
            pit = compute_f0_sing(tgtWav, self.device)

            # Auto speaker select depend on target spk F0 and target wav F0
            spkNpy, pit = self._spk_auto_select(pit, f0scale)
            spk = torch.FloatTensor(spkNpy)

            ppg = np.repeat(ppg, 2, 0)
            ppg = torch.FloatTensor(ppg)

            vec = np.repeat(vec, 2, 0)
            vec = torch.FloatTensor(vec)

            retrieval = DummyRetrieval()

            logger.info(f"Will run svc infer {tgtWav}")
            out_audio = self.svc_infer(retrieval, spk, pit, ppg, vec)
            logger.info(f"Finish run svc infer {tgtWav}")

            # new_wav, _ = post_process(
            #     __class__.vadModel, refWavPath=tgtWav, svcWav=out_audio)
            logger.info(f"Will run post_process {tgtWav}")
            # 会卡在这里
            new_wav, _ = post_process(vadModel, refWavPath=tgtWav, svcWav=out_audio)
            logger.info(f"Finish run post_process {tgtWav}")

            wavfile.write(savePath,
                          self.svcModelConfig.data.sampling_rate, new_wav)
            bret = True

        except Exception as e:
            logger.error(traceback.format_exc())
        finally:
            return bret

请问有哪位朋友遇到这个问题吗? 并且是怎么决绝的呢?谢谢您的回复