NextAudioGen / ultimatevocalremover_api

API for a Vocal Remover that uses Deep Neural Networks.
MIT License
78 stars 9 forks source link

Error when separating opus files #14

Open ElizavetaSedova opened 1 month ago

ElizavetaSedova commented 1 month ago

I tried to use hdemucs_mmi, UVR-MDX-NET-Inst_1, MDX23C And an error is appiered. I suspect that this has something to do with the audio format. Because I don't get this error with other formats or after audio conversion. But converting is not very convenient.

File /workspace/vocal_remover/ultimatevocalremover_api/src/models.py:150, in Demucs.__call__(self, audio, sampling_rate, **kwargs)
    148 def __call__(self, audio:Union[npt.NDArray, str], sampling_rate:int=None, **kwargs)->dict:
    149     if isinstance(audio, str):
--> 150         return self.predict_path(audio)
    151     return self.predict(audio, sampling_rate)

File /workspace/vocal_remover/ultimatevocalremover_api/src/models.py:146, in Demucs.predict_path(self, audio, **kwargs)
    144 audio, sampling_rate = read(audio)
    145 audio = torch.tensor(audio, dtype=torch.float32)
--> 146 return self.predict(audio, sampling_rate)

File /workspace/vocal_remover/ultimatevocalremover_api/src/models.py:128, in Demucs.predict(self, audio, sampling_rate, **kwargs)
    125 elif isinstance(audio, list): 
    126     audio = torch.tensor(audio, dtype=torch.float32)
--> 128 origin, separated = self.model_api.separate_tensor(audio, sampling_rate)
    129 return separated

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/api.py:270, in Separator.separate_tensor(self, wav, sr)
    268 wav -= ref.mean()
    269 wav /= ref.std() + 1e-8
--> 270 out = apply_model(
    271         self._model,
    272         wav[None],
    273         segment=self._segment,
    274         shifts=self._shifts,
    275         split=self._split,
    276         overlap=self._overlap,
    277         device=self._device,
    278         num_workers=self._jobs,
    279         callback=self._callback,
    280         callback_arg=_replace_dict(
    281             self._callback_arg, ("audio_length", wav.shape[1])
    282         ),
    283         progress=self._progress,
    284     )
    285 if out is None:
    286     raise KeyboardInterrupt

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/apply.py:216, in apply_model(model, mix, shifts, split, overlap, transition_power, progress, device, num_workers, segment, pool, lock, callback, callback_arg)
    213 original_model_device = next(iter(sub_model.parameters())).device
    214 sub_model.to(device)
--> 216 res = apply_model(sub_model, mix, **kwargs, callback_arg=callback_arg)
    217 out = res
    218 sub_model.to(original_model_device)

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/apply.py:251, in apply_model(model, mix, shifts, split, overlap, transition_power, progress, device, num_workers, segment, pool, lock, callback, callback_arg)
    246 shifted = TensorChunk(padded_mix, offset, length + max_shift - offset)
    247 kwargs["callback"] = (
    248         (lambda d, i=shift_idx: callback(_replace_dict(d, ("shift_idx", i)))
    249          if callback else None)
    250     )
--> 251 res = apply_model(model, shifted, **kwargs, callback_arg=callback_arg)
    252 shifted_out = res
    253 out += shifted_out[..., max_shift - offset:]

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/apply.py:290, in apply_model(model, mix, shifts, split, overlap, transition_power, progress, device, num_workers, segment, pool, lock, callback, callback_arg)
    288 for future, offset in futures:
    289     try:
--> 290         chunk_out = future.result()  # type: th.Tensor
    291     except Exception:
    292         pool.shutdown(wait=True, cancel_futures=True)

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/utils.py:132, in DummyPoolExecutor.DummyResult.result(self)
    130 def result(self):
    131     if self._dict["run"]:
--> 132         return self.func(*self.args, **self.kwargs)
    133     else:
    134         raise CancelledError()

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/apply.py:317, in apply_model(model, mix, shifts, split, overlap, transition_power, progress, device, num_workers, segment, pool, lock, callback, callback_arg)
    315         callback(_replace_dict(callback_arg, ("state", "start")))  # type: ignore
    316 with th.no_grad():
--> 317     out = model(padded_mix)
    318 with lock:
    319     if callback is not None:

File ~/.conda/envs/my_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
   1496 # If we don't have any hooks, we want to skip the rest of the logic in
   1497 # this function, and just call forward.
   1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1499         or _global_backward_pre_hooks or _global_backward_hooks
   1500         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501     return forward_call(*args, **kwargs)
   1502 # Do not call functions when jit is used
   1503 full_backward_hooks, non_full_backward_hooks = [], []

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/hdemucs.py:693, in HDemucs.forward(self, mix)
    690 x = mix
    691 length = x.shape[-1]
--> 693 z = self._spec(mix)
    694 mag = self._magnitude(z).to(mix.device)
    695 x = mag

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/hdemucs.py:604, in HDemucs._spec(self, x)
    602 pad = hl // 2 * 3
    603 if not self.hybrid_old:
--> 604     x = pad1d(x, (pad, pad + le * hl - x.shape[-1]), mode='reflect')
    605 else:
    606     x = pad1d(x, (pad, pad + le * hl - x.shape[-1]))

File /workspace/vocal_remover/ultimatevocalremover_api/src/models_dir/demucs/demucs/hdemucs.py:39, in pad1d(x, paddings, mode, value)
     37 out = F.pad(x, paddings, mode, value)
     38 assert out.shape[-1] == length + padding_left + padding_right
---> 39 assert (out[..., padding_left: padding_left + length] == x0).all()
     40 return out
ElizavetaSedova commented 1 month ago

I've added "opus" in the line of fastio.py to solve this problem:

if ext in ['wav', 'flac', 'ogg', 'mp3', 'opus']:

It might be worth expanding the list of extensions that are supported by the audiofile library.