Replace OpenAI GPT with another LLM in your app by changing a single line of code. Xinference gives you the freedom to use any LLM you need. With Xinference, you're empowered to run inference with any open-source language models, speech recognition models, and multimodal models, whether in the cloud, on-premises, or even on your laptop.
Running Xinference with Docker? / 是否使用 Docker 运行 Xinfernece?
[X] docker / docker
[ ] pip install / 通过 pip install 安装
[ ] installation from source / 从源码安装
Version info / 版本信息
v0.16.0
The command used to start Xinference / 用以启动 xinference 的命令
通过webui启动
无法启动,提示错误
2024-10-21 19:18:13,130 xinference.api.restful_api 1 ERROR [address=0.0.0.0:39107, pid=420] No GPU found. A GPU is needed for quantization.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/api/restful_api.py", line 977, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in __on_receive__
return await super().__on_receive__(message) # type: ignore
File "xoscar/core.pyx", line 558, in __on_receive__
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.__on_receive__
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.__on_receive__
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.__on_receive__
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1040, in launch_builtin_model
await _launch_model()
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1004, in _launch_model
await _launch_one_model(rep_model_uid)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 983, in _launch_one_model
await worker_ref.launch_builtin_model(
File "xoscar/core.pyx", line 284, in __pyx_actor_method_wrapper
async with lock:
File "xoscar/core.pyx", line 287, in xoscar.core.__pyx_actor_method_wrapper
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/utils.py", line 78, in wrapped
ret = await func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/worker.py", line 894, in launch_builtin_model
await model_ref.load()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in __on_receive__
return await super().__on_receive__(message) # type: ignore
File "xoscar/core.pyx", line 558, in __on_receive__
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.__on_receive__
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.__on_receive__
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.__on_receive__
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 375, in load
self._model.load()
File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/transformers/minicpmv26.py", line 85, in load
model = AutoModel.from_pretrained(
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 559, in from_pretrained
return model_class.from_pretrained(
File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 3398, in from_pretrained
hf_quantizer.validate_environment(
File "/usr/local/lib/python3.10/dist-packages/transformers/quantizers/quantizer_bnb_4bit.py", line 62, in validate_environment
raise RuntimeError("No GPU found. A GPU is needed for quantization.")
RuntimeError: [address=0.0.0.0:39107, pid=420] No GPU found. A GPU is needed for quantization.
Reproduction / 复现过程
通过webui启动
无法启动,提示错误
2024-10-21 19:18:13,130 xinference.api.restful_api 1 ERROR [address=0.0.0.0:39107, pid=420] No GPU found. A GPU is needed for quantization.
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/api/restful_api.py", line 977, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in __on_receive__
return await super().__on_receive__(message) # type: ignore
File "xoscar/core.pyx", line 558, in __on_receive__
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.__on_receive__
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.__on_receive__
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.__on_receive__
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1040, in launch_builtin_model
await _launch_model()
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1004, in _launch_model
await _launch_one_model(rep_model_uid)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 983, in _launch_one_model
await worker_ref.launch_builtin_model(
File "xoscar/core.pyx", line 284, in __pyx_actor_method_wrapper
async with lock:
File "xoscar/core.pyx", line 287, in xoscar.core.__pyx_actor_method_wrapper
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/utils.py", line 78, in wrapped
ret = await func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/worker.py", line 894, in launch_builtin_model
await model_ref.load()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 231, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 656, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 367, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 384, in __on_receive__
return await super().__on_receive__(message) # type: ignore
File "xoscar/core.pyx", line 558, in __on_receive__
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.__on_receive__
async with self._lock:
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.__on_receive__
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.__on_receive__
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/model.py", line 375, in load
self._model.load()
File "/usr/local/lib/python3.10/dist-packages/xinference/model/llm/transformers/minicpmv26.py", line 85, in load
model = AutoModel.from_pretrained(
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 559, in from_pretrained
return model_class.from_pretrained(
File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 3398, in from_pretrained
hf_quantizer.validate_environment(
File "/usr/local/lib/python3.10/dist-packages/transformers/quantizers/quantizer_bnb_4bit.py", line 62, in validate_environment
raise RuntimeError("No GPU found. A GPU is needed for quantization.")
RuntimeError: [address=0.0.0.0:39107, pid=420] No GPU found. A GPU is needed for quantization.
System Info / 系統信息
v0.16.0 docker版本 ubutun 24.04 TLS
cuda版本
Running Xinference with Docker? / 是否使用 Docker 运行 Xinfernece?
Version info / 版本信息
v0.16.0
The command used to start Xinference / 用以启动 xinference 的命令
通过webui启动
无法启动,提示错误
Reproduction / 复现过程
通过webui启动
无法启动,提示错误
Expected behavior / 期待表现
期望能够正常启动