Issue with the streaming mode

Haurrus commented 10 months ago

Here's my issue when I try to use the streaming mode, I'm on windows :

2024-01-19 13:33:23.733 | WARNING | mp_main::78 - 'Streaming Mode' has certain limitations, you can read about them here https://github.com/daswer123/xtts-api-server#about-streaming-mode 2024-01-19 13:33:23.733 | INFO | mp_main::81 - You launched an improved version of streaming, this version features an improved tokenizer and more context when processing sentences, which can be good for complex languages like Chinese Traceback (most recent call last): File "", line 1, in File "C:\Users\MrHaurrus\AppData\Local\Programs\Python\Python311\Lib\multiprocessing\spawn.py", line 122, in spawn_mai exitcode = _main(fd, parent_sentinel) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\MrHaurrus\AppData\Local\Programs\Python\Python311\Lib\multiprocessing\spawn.py", line 131, in _main prepare(preparation_data) File "C:\Users\MrHaurrus\AppData\Local\Programs\Python\Python311\Lib\multiprocessing\spawn.py", line 246, in prepare _fixup_main_from_path(data['init_main_from_path']) File "C:\Users\MrHaurrus\AppData\Local\Programs\Python\Python311\Lib\multiprocessing\spawn.py", line 297, in _fixup_main_from_path main_content = runpy.run_path(main_path, ^^^^^^^^^^^^^^^^^^^^^^^^^ File "", line 291, in run_path File "", line 98, in _run_module_code File "", line 88, in _run_code File "D:\Modelisation_IA\xtts-api-server\xtts_api_server\server.py", line 85, in engine = CoquiEngine(specific_model=MODEL_VERSION,use_deepspeed=DEEPSPEED,local_models_path=str(model_path)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\Modelisation_IA\xtts-api-server\xtts_api_server\RealtimeTTS\engines\base_engine.py", line 11, in call instance = super().call(*args, *kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\Modelisation_IA\xtts-api-server\xtts_api_server\RealtimeTTS\engines\coqui_engine.py", line 83, in init set_start_method('spawn') File "C:\Users\MrHaurrus\AppData\Local\Programs\Python\Python311\Lib\multiprocessing\context.py", line 247, in set_start_method raise RuntimeError('context has already been set') RuntimeError: context has already been set Traceback (most recent call last): File "D:\Modelisation_IA\xtts-api-server\xtts_api_server\server.py", line 85, in engine = CoquiEngine(specific_model=MODEL_VERSION,use_deepspeed=DEEPSPEED,local_models_path=str(model_path)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\Modelisation_IA\xtts-api-server\xtts_api_server\RealtimeTTS\engines\base_engine.py", line 11, in call instance = super().call(args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\Modelisation_IA\xtts-api-server\xtts_api_server\RealtimeTTS\engines\coqui_engine.py", line 113, in init self.main_synthesize_ready_event.wait() File "C:\Users\MrHaurrus\AppData\Local\Programs\Python\Python311\Lib\multiprocessing\synchronize.py", line 356, in wait self._cond.wait(timeout) File "C:\Users\MrHaurrus\AppData\Local\Programs\Python\Python311\Lib\multiprocessing\synchronize.py", line 268, in wait return self._wait_semaphore.acquire(True, timeout) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ KeyboardInterrupt

FatalErrorVXD commented 9 months ago

Same: ERROR: Exception in ASGI application Traceback (most recent call last): File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\responses.py", line 259, in call await wrap(partial(self.listen_for_disconnect, receive)) File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\responses.py", line 255, in wrap await func() File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\responses.py", line 232, in listen_for_disconnect message = await receive() ^^^^^^^^^^^^^^^ File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\uvicorn\protocols\http\h11_impl.py", line 538, in receive await self.message_event.wait() File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2288.0_x64__qbz5n2kfra8p0\Lib\asyncio\locks.py", line 213, in wait await fut asyncio.exceptions.CancelledError: Cancelled by cancel scope 1d857217bd0

During handling of the above exception, another exception occurred:

Exception Group Traceback (most recent call last): | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\uvicorn\protocols\http\h11_impl.py", line 408, in run_asgi | result = await app( # type: ignore[func-returns-value] | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 84, in call | return await self.app(scope, receive, send) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\fastapi\applications.py", line 1054, in call | await super().call(scope, receive, send) | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\applications.py", line 116, in call | await self.middleware_stack(scope, receive, send) | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\middleware\errors.py", line 186, in call | raise exc | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\middleware\errors.py", line 164, in call | await self.app(scope, receive, _send) | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\middleware\cors.py", line 83, in call | await self.app(scope, receive, send) | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\middleware\exceptions.py", line 62, in call | await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send) | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette_exception_handler.py", line 55, in wrapped_app | raise exc | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette_exception_handler.py", line 44, in wrapped_app | await app(scope, receive, sender) | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\routing.py", line 746, in call | await route.handle(scope, receive, send) | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\routing.py", line 288, in handle | await self.app(scope, receive, send) | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\routing.py", line 75, in app | await wrap_app_handling_exceptions(app, request)(scope, receive, send) | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette_exception_handler.py", line 55, in wrapped_app | raise exc | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette_exception_handler.py", line 44, in wrapped_app | await app(scope, receive, sender) | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\routing.py", line 73, in app | await response(scope, receive, send) | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\responses.py", line 252, in call | async with anyio.create_task_group() as task_group: | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\anyio_backends_asyncio.py", line 678, in aexit | raise BaseExceptionGroup( | ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception) +-+---------------- 1 ---------------- | Traceback (most recent call last): | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\responses.py", line 255, in wrap | await func() | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\starlette\responses.py", line 244, in stream_response | async for chunk in self.body_iterator: | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\xtts_api_server\server.py", line 239, in generator | async for chunk in chunks: | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\xtts_api_server\tts_funcs.py", line 591, in stream_fn | async for chunk in self.stream_generation(clear_text,speaker_name_or_path,speaker_wav,language,output_file): | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\xtts_api_server\tts_funcs.py", line 456, in stream_generation | gpt_cond_latent, speaker_embedding = self.get_or_create_latents(speaker_name, speaker_wav) | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | File "C:\ai\SillyTavern\xtts2\venv\Lib\site-packages\xtts_api_server\tts_funcs.py", line 260, in get_or_create_latents | gpt_cond_latent, speaker_embedding = self.model.get_conditioning_latents(speaker_wav) | ^^^^^^^^^^ | AttributeError: 'TTSWrapper' object has no attribute 'model' +------------------------------------

clockworkwhale commented 9 months ago

I'm also getting the exact same error as the two users above when attempting to generate with --streaming-mode or --streaming-mode-improve enabled. Everything works normally if the server is launched without one of those flags.

theobjectivedad commented 8 months ago

+1, same issue ... tried with and w/o deepspeed.

scalar27 commented 8 months ago

+1, on Mac M1

Kirinxxx commented 7 months ago

Had the same issue. Found the solution hidden in the instructions. When you activate streaming mode for the server you need to uncheck streaming within sillytavern itself.

hoangbv15 commented 6 months ago

Had the same issue. Found the solution hidden in the instructions. When you activate streaming mode for the server you need to uncheck streaming within sillytavern itself.

I don't think this is the same issue as OP's. The one you are talking about is tts not working AFTER the server successfully started up. OP's issue is that the server does not start up successfully at all, so it doesn't matter if you have streaming mode on or off on SillyTavern.

I am having the same issue on my Macbook M2. Here's an interesting bit:

streaming_mode and streaming_mode_improve works fine on the prebuilt xtts_api_server package that we can get via pip
streaming_mode and streaming_mode_improve results in the below error if I run the server from the latest code pulled from github main branch:
```
File "/opt/miniconda3/envs/xtts/lib/python3.10/multiprocessing/context.py", line 247, in set_start_method
raise RuntimeError('context has already been set')
```
I wonder what the differences are between the prebuilt one and the latest code

Why don't I just use the prebuilt one you asked? Because I couldn't find anyway to enable Metal (use_mps parameter in coqui_engine.py). This is not exposed on the entry point at all, and it's just defaulting to False. So I cloned the repo and forced it to True.

hoangbv15 commented 6 months ago

I figured out the solution. If I try to run the server from the latest code, but using the same conda env as the prebuilt version, then I get that error. So I created a new conda env, reinstall the dependencies using the requirements.txt file from the repo, and it works now.

Note that fastapi is set to >=0.104.1 and I needed to set that strictly to ==0.104.1 to work, otherwise it fails to install.

However, with this, streaming_mode_improve still seems to use CPU instead of GPU INFO:stanza:Using device: cpu Although torch seems to correctly use metal now with use_mps set to True.

Arche151 commented 5 months ago

Same issue. @hoangbv15's approach didn't help unfortunately.

I still get AttributeError: 'TTSWrapper' object has no attribute 'model' andException in ASGI application`

@daswer123 Do you maybe know how to fix this error?

m1ll10n commented 4 months ago

Same issue. @hoangbv15's approach didn't help unfortunately.

I still get AttributeError: 'TTSWrapper' object has no attribute 'model' andException in ASGI application`

@daswer123 Do you maybe know how to fix this error?

I got it running by adding model attribute in the class with load_local_model(load=False) (could probably get away without parameters because the variable is not utilised) and changing the function to return a model inside the TTSWrapper class in xtts_api_server/tts_funcs.py. So far it's working for me and only time will tell if I broke something at a different part of the codebase.

def __init__(self,output_folder = "./output", speaker_folder="./speakers",model_folder="./xtts_folder",lowvram = False,model_source = "local",model_version = "2.0.2",device = "cuda",deepspeed = False,enable_cache_results = True):

    self.cuda = device # If the user has chosen what to use, we rewrite the value to the value we want to use
    self.device = 'cpu' if lowvram else (self.cuda if torch.cuda.is_available() else "cpu")
    self.lowvram = lowvram  # Store whether we want to run in low VRAM mode.

    self.latents_cache = {} 

    self.model_source = model_source
    self.model_version = model_version
    self.tts_settings = default_tts_settings
    self.stream_chunk_size = 100

    self.deepspeed = deepspeed

    self.speaker_folder = speaker_folder
    self.output_folder = output_folder
    self.model_folder = model_folder

    self.create_directories()
    check_tts_version()

    self.enable_cache_results = enable_cache_results
    self.cache_file_path = os.path.join(output_folder, "cache.json")

    self.is_official_model = True

    if self.enable_cache_results:
        # Reset the contents of the cache file at each initialization.
        with open(self.cache_file_path, 'w') as cache_file:
            json.dump({}, cache_file)

    self.model = self.load_local_model(load=False)

def load_local_model(self,load=True):
    this_model_dir = Path(self.model_folder)

    if self.isModelOfficial(self.model_version):
        download_model(this_model_dir,self.model_version)
        this_model_dir = this_model_dir

    config = XttsConfig()
    config_path = this_model_dir /  f'{self.model_version}' / 'config.json'
    checkpoint_dir = this_model_dir / f'{self.model_version}'

    config.load_json(str(config_path))

    self.model = Xtts.init_from_config(config)
    self.model.load_checkpoint(config,use_deepspeed=self.deepspeed, checkpoint_dir=str(checkpoint_dir))
    self.model.to(self.device)

    return self.model

daswer123 / xtts-api-server

Issue with the streaming mode #47