Closed hywhuangyuwei closed 1 year ago
try torch nightly
Having the same problem with IndexError: index out of range in self
(m1 pro). Running with torch nightly gives me:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
Cell In[2], line 1
----> 1 from bark import SAMPLE_RATE, generate_audio, preload_models
2 from IPython.display import Audio
4 # download and load all models
File ~/Documents/Code/bark-ai/bark/bark/__init__.py:1
----> 1 from .api import generate_audio, text_to_semantic, semantic_to_waveform, save_as_prompt
2 from .generation import SAMPLE_RATE, preload_models
File ~/Documents/Code/bark-ai/bark/bark/api.py:5
1 from typing import Optional
3 import numpy as np
----> 5 from .generation import codec_decode, generate_coarse, generate_fine, generate_text_semantic
8 def text_to_semantic(
9 text: str,
10 history_prompt: Optional[str] = None,
11 temp: float = 0.7,
12 silent: bool = False,
13 ):
14 """Generate semantic array from text.
15
16 Args:
(...)
23 numpy semantic array to be fed into `semantic_to_waveform`
24 """
File ~/Documents/Code/bark-ai/bark/bark/generation.py:8
5 import re
6 import requests
----> 8 from encodec import EncodecModel
9 import funcy
10 import logging
File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/encodec/__init__.py:12
8 """EnCodec neural audio codec."""
10 __version__ = "0.1.1"
---> 12 from .model import EncodecModel
13 from .compress import compress, decompress
File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/encodec/model.py:19
17 from . import quantization as qt
18 from . import modules as m
---> 19 from .utils import _check_checksum, _linear_overlap_add, _get_checkpoint_url
22 ROOT_URL = 'https://dl.fbaipublicfiles.com/encodec/v0/'
24 EncodedFrame = tp.Tuple[torch.Tensor, tp.Optional[torch.Tensor]]
File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/encodec/utils.py:14
11 import typing as tp
13 import torch
---> 14 import torchaudio
17 def _linear_overlap_add(frames: tp.List[torch.Tensor], stride: int):
18 # Generic overlap add, with linear fade-in/fade-out, supporting complex scenario
19 # e.g., more than 2 frames per position.
(...)
34 # - if more than 2 frames overlap at a given point, we hope that by induction
35 # something sensible happens.
36 assert len(frames)
File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torchaudio/__init__.py:1
----> 1 from torchaudio import ( # noqa: F401
2 _extension,
3 compliance,
4 datasets,
5 functional,
6 io,
7 kaldi_io,
8 models,
9 pipelines,
10 sox_effects,
11 transforms,
12 utils,
13 )
15 from torchaudio.backend import get_audio_backend, list_audio_backends, set_audio_backend
17 try:
File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torchaudio/_extension/__init__.py:43
41 _IS_KALDI_AVAILABLE = False
42 if _IS_TORCHAUDIO_EXT_AVAILABLE:
---> 43 _load_lib("libtorchaudio")
45 import torchaudio.lib._torchaudio # noqa
47 _check_cuda_version()
File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torchaudio/_extension/utils.py:61, in _load_lib(lib)
59 if not path.exists():
60 return False
---> 61 torch.ops.load_library(path)
62 torch.classes.load_library(path)
63 return True
File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torch/_ops.py:787, in _Ops.load_library(self, path)
782 path = _utils_internal.resolve_library_path(path)
783 with dl_open_guard():
784 # Import the shared library into the process, thus running its
785 # static (global) initialization code in order to register custom
786 # operators with the JIT.
--> 787 ctypes.CDLL(path)
788 self.loaded_libraries.add(path)
File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/ctypes/__init__.py:366, in CDLL.__init__(self, name, mode, handle, use_errno, use_last_error, winmode)
363 self._FuncPtr = _FuncPtr
365 if handle is None:
--> 366 self._handle = _dlopen(self._name, mode)
367 else:
368 self._handle = handle
OSError: dlopen(/Users/ph/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torchaudio/lib/libtorchaudio.so, 0x0006): Symbol not found: __ZN2at4_ops9fft_irfft4callERKNS_6TensorEN3c108optionalIxEExNS6_INS5_17basic_string_viewIcEEEE
Referenced from: <BBBCC85A-CF40-37F8-B811-463E29724353> /Users/ph/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torchaudio/lib/libtorchaudio.so
Expected in: <6BE2250B-52C1-3362-ABF0-734C19B4356D> /Users/ph/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torch/lib/libtorch_cpu.dylib
are you sure you did pip uninstall torch torchaudio torchvision
before? looks to be a torch issue without anything to do with bark..
Hi! Same problem here.
Using SUNO_ENABLE_MPS = True
and torch 2.0.0 results in the following error:
Traceback (most recent call last):
File "test.py", line 16, in <module>
audio_array = generate_audio(text_prompt)
File "/Users/user/workspace/ai/py/bark/bark/api.py", line 113, in generate_audio
out = semantic_to_waveform(
File "/Users/user/workspace/ai/py/bark/bark/api.py", line 66, in semantic_to_waveform
audio_arr = codec_decode(fine_tokens)
File "/Users/user/workspace/ai/py/bark/bark/generation.py", line 824, in codec_decode
emb = model.quantizer.decode(arr)
File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/encodec/quantization/vq.py", line 112, in decode
quantized = self.vq.decode(codes)
File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/encodec/quantization/core_vq.py", line 361, in decode
quantized = layer.decode(indices)
File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/encodec/quantization/core_vq.py", line 288, in decode
quantize = self._codebook.decode(embed_ind)
File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/encodec/quantization/core_vq.py", line 202, in decode
quantize = self.dequantize(embed_ind)
File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/encodec/quantization/core_vq.py", line 188, in dequantize
quantize = F.embedding(embed_ind, self.embed)
File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/torch/nn/functional.py", line 2210, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
IndexError: index out of range in self
And using torch nightly:
Traceback (most recent call last):
File "test.py", line 4, in <module>
from bark import SAMPLE_RATE, generate_audio, preload_models
File "/Users/user/workspace/ai/py/bark/bark/__init__.py", line 1, in <module>
from .api import generate_audio, text_to_semantic, semantic_to_waveform, save_as_prompt
File "/Users/user/workspace/ai/py/bark/bark/api.py", line 5, in <module>
from .generation import codec_decode, generate_coarse, generate_fine, generate_text_semantic
File "/Users/user/workspace/ai/py/bark/bark/generation.py", line 6, in <module>
from encodec import EncodecModel
File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/encodec/__init__.py", line 12, in <module>
from .model import EncodecModel
File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/encodec/model.py", line 19, in <module>
from .utils import _check_checksum, _linear_overlap_add, _get_checkpoint_url
File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/encodec/utils.py", line 14, in <module>
import torchaudio
File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torchaudio/__init__.py", line 1, in <module>
from torchaudio import ( # noqa: F401
File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torchaudio/_extension/__init__.py", line 43, in <module>
_load_lib("libtorchaudio")
File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torchaudio/_extension/utils.py", line 61, in _load_lib
torch.ops.load_library(path)
File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torch/_ops.py", line 787, in load_library
ctypes.CDLL(path)
File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/ctypes/__init__.py", line 373, in __init__
self._handle = _dlopen(self._name, mode)
OSError: dlopen(/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torchaudio/lib/libtorchaudio.so, 0x0006): Symbol not found: __ZN2at4_ops9fft_irfft4callERKNS_6TensorEN3c108optionalIxEExNS6_INS5_17basic_string_viewIcEEEE
Referenced from: <6D80FE4D-3DC4-3CA2-85A9-C02FE346BA28> /opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torchaudio/lib/libtorchaudio.so
Expected in: <F328CA24-D854-3CF5-9375-EF3FE2A68B8C> /opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torch/lib/libtorch_cpu.dylib
looks like a torch audio issue. when you install nightly make sure to first uninstall torch, torchvision AND torchaudio and then reinstall all 3 with nightly. I'll try to find some time in the next few days to trouble shoot this. Was running for me fine yesterday, but probably slightly different setups
Hi @gkucsko,
I did pip uninstall torch, torchvision and torchaudio before installing the nightly build. I also rm -rf
the entire virtual environment, created a new one and only installed the nightly build to make sure that everything is properly setup - still the same error.
Just tried again with the latest master branch and for some reason it worked. Git clone the repo, pip install .
the package, pip uninstall torch torchvision torchaudio
and pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
great! gonna close for now, feel free to reopen if needed
My device: M1 Pro MBP
torch.__version__
: 2.0.0Originally posted by @hywhuangyuwei in https://github.com/suno-ai/bark/issues/22#issuecomment-1523963160