I'm trying to run the VLM example that uses SGLang library - https://modal.com/docs/examples/sgl_vlm -
modal run 06_gpu_and_ml/llm-serving/sgl_vlm.py from modal-examples.
I get the following error :
Traceback (most recent call last):
File "/pkg/modal/_container_io_manager.py", line 699, in handle_user_exception
yield
File "/pkg/modal/_container_entrypoint.py", line 690, in call_lifecycle_functions
res = func(*args)
^^^^^^^^^^^
File "/root/t.py", line 118, in start_runtime
self.runtime = sgl.Runtime(
^^^^^^^^^^^^
File "/usr/local/lib/python3.11/site-packages/sglang/api.py", line 38, in Runtime
from sglang.srt.server import Runtime
File "/usr/local/lib/python3.11/site-packages/sglang/srt/server.py", line 45, in
from sglang.srt.managers.controller_multi import (
File "/usr/local/lib/python3.11/site-packages/sglang/srt/managers/controller_multi.py", line 29, in
from sglang.srt.managers.controller_single import (
File "/usr/local/lib/python3.11/site-packages/sglang/srt/managers/controller_single.py", line 24, in
from sglang.srt.managers.tp_worker import (
File "/usr/local/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 56, in
from sglang.srt.model_executor.model_runner import ModelRunner
File "/usr/local/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 28, in
from flashinfer import (
ModuleNotFoundError: No module named 'flashinfer'
Runner failed with exception: ModuleNotFoundError("No module named 'flashinfer'")
Stopping app - uncaught exception raised locally: AssertionError(500).
Please advise, thanks very much.
I'm trying to run the VLM example that uses SGLang library - https://modal.com/docs/examples/sgl_vlm - modal run 06_gpu_and_ml/llm-serving/sgl_vlm.py from modal-examples. I get the following error : Traceback (most recent call last): File "/pkg/modal/_container_io_manager.py", line 699, in handle_user_exception yield File "/pkg/modal/_container_entrypoint.py", line 690, in call_lifecycle_functions res = func(*args) ^^^^^^^^^^^ File "/root/t.py", line 118, in start_runtime self.runtime = sgl.Runtime( ^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/sglang/api.py", line 38, in Runtime from sglang.srt.server import Runtime File "/usr/local/lib/python3.11/site-packages/sglang/srt/server.py", line 45, in
from sglang.srt.managers.controller_multi import (
File "/usr/local/lib/python3.11/site-packages/sglang/srt/managers/controller_multi.py", line 29, in
from sglang.srt.managers.controller_single import (
File "/usr/local/lib/python3.11/site-packages/sglang/srt/managers/controller_single.py", line 24, in
from sglang.srt.managers.tp_worker import (
File "/usr/local/lib/python3.11/site-packages/sglang/srt/managers/tp_worker.py", line 56, in
from sglang.srt.model_executor.model_runner import ModelRunner
File "/usr/local/lib/python3.11/site-packages/sglang/srt/model_executor/model_runner.py", line 28, in
from flashinfer import (
ModuleNotFoundError: No module named 'flashinfer'
Runner failed with exception: ModuleNotFoundError("No module named 'flashinfer'")
Stopping app - uncaught exception raised locally: AssertionError(500).
Please advise, thanks very much.