lmql serve-model meta-llama/Llama-2-7b-hf --cuda --port 9999 --trust_remote_code True
[Loading meta-llama/Llama-2-7b-hf with AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", trust_remote_code=True, device_map=auto)]]
[Serving LMTP endpoint on ws://localhost:9999/]
Exception in thread scheduler-worker:
Traceback (most recent call last):
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1390, in _get_module
return importlib.import_module("." + module_name, self.__name__)
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 883, in exec_module
File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/generation/utils.py", line 93, in <module>
from accelerate.hooks import AlignDevicesHook, add_hook_to_module
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/accelerate/__init__.py", line 3, in <module>
from .accelerator import Accelerator
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/accelerate/accelerator.py", line 35, in <module>
from .checkpointing import load_accelerator_state, load_custom_state, save_accelerator_state, save_custom_state
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/accelerate/checkpointing.py", line 24, in <module>
from .utils import (
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/accelerate/utils/__init__.py", line 158, in <module>
from .fsdp_utils import load_fsdp_model, load_fsdp_optimizer, save_fsdp_model, save_fsdp_optimizer
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/accelerate/utils/fsdp_utils.py", line 26, in <module>
import torch.distributed.checkpoint as dist_cp
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/torch/distributed/checkpoint/__init__.py", line 7, in <module>
from .state_dict_loader import load_state_dict, load
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/torch/distributed/checkpoint/state_dict_loader.py", line 12, in <module>
from .default_planner import DefaultLoadPlanner
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/torch/distributed/checkpoint/default_planner.py", line 14, in <module>
from torch.distributed._tensor import DTensor
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/torch/distributed/_tensor/__init__.py", line 346, in <module>
import torch.distributed._tensor._dynamo_utils
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/torch/distributed/_tensor/_dynamo_utils.py", line 1, in <module>
from torch._dynamo import allow_in_graph
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/torch/_dynamo/__init__.py", line 2, in <module>
from . import allowed_functions, convert_frame, eval_frame, resume_execution
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/torch/_dynamo/convert_frame.py", line 62, in <module>
from .output_graph import OutputGraph
ImportError: cannot import name 'OutputGraph' from 'torch._dynamo.output_graph' (/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/torch/_dynamo/output_graph.py)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1390, in _get_module
return importlib.import_module("." + module_name, self.__name__)
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 883, in exec_module
File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 39, in <module>
from ...modeling_utils import PreTrainedModel
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/modeling_utils.py", line 44, in <module>
from .generation import GenerationConfig, GenerationMixin
File "<frozen importlib._bootstrap>", line 1075, in _handle_fromlist
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1380, in __getattr__
module = self._get_module(self._class_to_module[name])
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1392, in _get_module
raise RuntimeError(
RuntimeError: Failed to import transformers.generation.utils because of the following error (look up to see its traceback):
cannot import name 'OutputGraph' from 'torch._dynamo.output_graph' (/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/torch/_dynamo/output_graph.py)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
self.run()
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/threading.py", line 953, in run
self._target(*self._args, **self._kwargs)
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/lmql/models/lmtp/lmtp_scheduler.py", line 269, in worker
model = LMTPModel.load(self.model_identifier, **self.model_args)
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/lmql/models/lmtp/backends/lmtp_model.py", line 51, in load
return LMTPModel.registry["transformers"](model_name, **kwargs)
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/lmql/models/lmtp/backends/transformers_model.py", line 43, in __init__
self.model = AutoModelForCausalLM.from_pretrained(self.model_identifier, **self.model_args)
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 560, in from_pretrained
model_class = _get_model_class(config, cls._model_mapping)
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 381, in _get_model_class
supported_models = model_mapping[type(config)]
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 732, in __getitem__
return self._load_attr_from_module(model_type, model_name)
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 746, in _load_attr_from_module
return getattribute_from_module(self._modules[module_name], attr)
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 690, in getattribute_from_module
if hasattr(module, attr):
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1380, in __getattr__
module = self._get_module(self._class_to_module[name])
File "/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1392, in _get_module
raise RuntimeError(
RuntimeError: Failed to import transformers.models.llama.modeling_llama because of the following error (look up to see its traceback):
Failed to import transformers.generation.utils because of the following error (look up to see its traceback):
cannot import name 'OutputGraph' from 'torch._dynamo.output_graph' (/home/scruel/Code/Python/llm-test/.conda/lib/python3.10/site-packages/torch/_dynamo/output_graph.py)
This sounds like an issue with your version of transformers and or pytorch. Please make sure these are up to date. From the stacktrace, this cannot be fixed from within LMQL.