Closed YicongTan closed 1 hour ago
My previous version was 0.5.0.post1.
{
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 128000,
"eos_token_id": [
128001,
128008,
128009
],
"hidden_act": "silu",
"hidden_size": 8192,
"initializer_range": 0.02,
"intermediate_size": 28672,
"max_position_embeddings": 131072,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 64,
"num_hidden_layers": 80,
"num_key_value_heads": 8,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": {
"factor": 8.0,
"low_freq_factor": 1.0,
"high_freq_factor": 4.0,
"original_max_position_embeddings": 8192,
"rope_type": "llama3"
},
"rope_theta": 500000.0,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.42.3",
"use_cache": true,
"vocab_size": 128256
}
Can you show the full stack trace? There should be a warning about not being able to import the model.
Thanks for your comment. Here is the full stack trace before the unsupported model error info:
ERROR 10-19 11:25:47 registry.py:267] Error in inspecting model architecture 'LlamaForCausalLM'
ERROR 10-19 11:25:47 registry.py:267] Traceback (most recent call last):
ERROR 10-19 11:25:47 registry.py:267] File "/opt/conda/lib/python3.10/site-packages/vllm/model_executor/models/registry.py", line 429, in _run_in_subprocess
ERROR 10-19 11:25:47 registry.py:267] returned.check_returncode()
ERROR 10-19 11:25:47 registry.py:267] File "/opt/conda/lib/python3.10/subprocess.py", line 457, in check_returncode
ERROR 10-19 11:25:47 registry.py:267] raise CalledProcessError(self.returncode, self.args, self.stdout,
ERROR 10-19 11:25:47 registry.py:267] subprocess.CalledProcessError: Command '['/opt/conda/bin/python', '-m', 'vllm.model_executor.models.registry']' returned non-zero exit status 1.
ERROR 10-19 11:25:47 registry.py:267]
ERROR 10-19 11:25:47 registry.py:267] The above exception was the direct cause of the following exception:
ERROR 10-19 11:25:47 registry.py:267]
ERROR 10-19 11:25:47 registry.py:267] Traceback (most recent call last):
ERROR 10-19 11:25:47 registry.py:267] File "/opt/conda/lib/python3.10/site-packages/vllm/model_executor/models/registry.py", line 265, in _try_inspect_model_cls
ERROR 10-19 11:25:47 registry.py:267] return model.inspect_model_cls()
ERROR 10-19 11:25:47 registry.py:267] File "/opt/conda/lib/python3.10/site-packages/vllm/model_executor/models/registry.py", line 227, in inspect_model_cls
ERROR 10-19 11:25:47 registry.py:267] return _run_in_subprocess(
ERROR 10-19 11:25:47 registry.py:267] File "/opt/conda/lib/python3.10/site-packages/vllm/model_executor/models/registry.py", line 432, in _run_in_subprocess
ERROR 10-19 11:25:47 registry.py:267] raise RuntimeError(f"Error raised in subprocess:\n"
ERROR 10-19 11:25:47 registry.py:267] RuntimeError: Error raised in subprocess:
ERROR 10-19 11:25:47 registry.py:267] Error: mkl-service + Intel(R) MKL: MKL_THREADING_LAYER=INTEL is incompatible with libgomp-a34b3233.so.1 library.
ERROR 10-19 11:25:47 registry.py:267] Try to import numpy first or set the threading layer accordingly. Set MKL_SERVICE_FORCE_INTEL to force it.
ERROR 10-19 11:25:47 registry.py:267]
Traceback (most recent call last):
File "/opt/conda/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/home/c01yita/.vscode-server/extensions/ms-python.debugpy-2024.12.0-linux-x64/bundled/libs/debugpy/adapter/../../debugpy/launcher/../../debugpy/__main__.py", line 71, in <module>
cli.main()
File "/home/c01yita/.vscode-server/extensions/ms-python.debugpy-2024.12.0-linux-x64/bundled/libs/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 501, in main
run()
File "/home/c01yita/.vscode-server/extensions/ms-python.debugpy-2024.12.0-linux-x64/bundled/libs/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 351, in run_file
runpy.run_path(target, run_name="__main__")
File "/home/c01yita/.vscode-server/extensions/ms-python.debugpy-2024.12.0-linux-x64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 310, in run_path
return _run_module_code(code, init_globals, run_name, pkg_name=pkg_name, script_name=fname)
File "/home/c01yita/.vscode-server/extensions/ms-python.debugpy-2024.12.0-linux-x64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 127, in _run_module_code
_run_code(code, mod_globals, init_globals, mod_name, mod_spec, pkg_name, script_name)
File "/home/c01yita/.vscode-server/extensions/ms-python.debugpy-2024.12.0-linux-x64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 118, in _run_code
exec(code, run_globals)
File "/home/c01yita/CISPA-az6/llmgraph-2024/Memory_Injection_Attack/ReAct/run_strategyqa.py", line 55, in <module>
llm = LLM(model='llm_models/meta-llama/Llama-3.1-70B-Instruct', tensor_parallel_size=4)
File "/opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py", line 177, in __init__
self.llm_engine = LLMEngine.from_engine_args(
File "/opt/conda/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 570, in from_engine_args
engine_config = engine_args.create_engine_config()
File "/opt/conda/lib/python3.10/site-packages/vllm/engine/arg_utils.py", line 903, in create_engine_config
model_config = self.create_model_config()
File "/opt/conda/lib/python3.10/site-packages/vllm/engine/arg_utils.py", line 839, in create_model_config
return ModelConfig(
File "/opt/conda/lib/python3.10/site-packages/vllm/config.py", line 200, in __init__
self.multimodal_config = self._init_multimodal_config(
File "/opt/conda/lib/python3.10/site-packages/vllm/config.py", line 219, in _init_multimodal_config
if ModelRegistry.is_multimodal_model(architectures):
File "/opt/conda/lib/python3.10/site-packages/vllm/model_executor/models/registry.py", line 387, in is_multimodal_model
return self.inspect_model_cls(architectures).supports_multimodal
File "/opt/conda/lib/python3.10/site-packages/vllm/model_executor/models/registry.py", line 356, in inspect_model_cls
return self._raise_for_unsupported(architectures)
File "/opt/conda/lib/python3.10/site-packages/vllm/model_executor/models/registry.py", line 317, in _raise_for_unsupported
Try reinstalling numpy. python -m pip install --upgrade numpy
I really appreciate it! This seems to work and begins to load shards.
Your current environment
ValueError: Model architectures ['LlamaForCausalLM'] are not supported for now. Supported architectures
```text I updated the vLLM to use Llama3.1-70B models by pip install --upgrade typing_extensions pip install -U vllm However, I found that it raised errors not only for Llama3.1-70B but also for Llama3-70B, which works fine for the previous version. ``` ```python from vllm import LLM, SamplingParams llm = LLM(model='llm_models/meta-llama/Llama-3.1-70B-Instruct', tensor_parallel_size=4) ``` ## Error info ``` ValueError: Model architectures ['LlamaForCausalLM'] are not supported for now. Supported architectures: ['AquilaModel', 'AquilaForCausalLM', 'ArcticForCausalLM', 'BaiChuanForCausalLM', 'BaichuanForCausalLM', 'BloomForCausalLM', 'CohereForCausalLM', 'DbrxForCausalLM', 'DeciLMForCausalLM', 'DeepseekForCausalLM', 'DeepseekV2ForCausalLM', 'ExaoneForCausalLM', 'FalconForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTJForCausalLM', 'GPTNeoXForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'InternLMForCausalLM', 'InternLM2ForCausalLM', 'JAISLMHeadModel', 'JambaForCausalLM', 'LlamaForCausalLM', 'LLaMAForCausalLM', 'MambaForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'QuantMixtralForCausalLM', 'MptForCausalLM', 'MPTForCausalLM', 'MiniCPMForCausalLM', 'MiniCPM3ForCausalLM', 'NemotronForCausalLM', 'OlmoForCausalLM', 'OlmoeForCausalLM', 'OPTForCausalLM', 'OrionForCausalLM', 'PersimmonForCausalLM', 'PhiForCausalLM', 'Phi3ForCausalLM', 'Phi3SmallForCausalLM', 'PhiMoEForCausalLM', 'Qwen2ForCausalLM', 'Qwen2MoeForCausalLM', 'RWForCausalLM', 'StableLMEpochForCausalLM', 'StableLmForCausalLM', 'Starcoder2ForCausalLM', 'SolarForCausalLM', 'XverseForCausalLM', 'BartModel', 'BartForConditionalGeneration', 'Gemma2Model', 'MistralModel', 'Qwen2ForRewardModel', 'Phi3VForCausalLM', 'Blip2ForConditionalGeneration', 'ChameleonForConditionalGeneration', 'ChatGLMModel', 'ChatGLMForConditionalGeneration', 'FuyuForCausalLM', 'InternVLChatModel', 'LlavaForConditionalGeneration', 'LlavaNextForConditionalGeneration', 'LlavaNextVideoForConditionalGeneration', 'LlavaOnevisionForConditionalGeneration', 'MiniCPMV', 'MolmoForCausalLM', 'NVLM_D', 'PaliGemmaForConditionalGeneration', 'PixtralForConditionalGeneration', 'QWenLMHeadModel', 'Qwen2VLForConditionalGeneration', 'UltravoxModel', 'MllamaForConditionalGeneration', 'EAGLEModel', 'MedusaModel', 'MLPSpeculatorPreTrainedModel'] ```Model Input Dumps
vLLM version: 0.6.3.post1
+---------------------------------------------------------------------------------------+ | NVIDIA-SMI 535.161.07 Driver Version: 535.161.07 CUDA Version: 12.2 | |-----------------------------------------+----------------------+----------------------+ | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |=========================================+======================+======================| | 0 NVIDIA A100-SXM4-80GB On | 00000000:47:00.0 Off | 0 | | N/A 31C P0 64W / 400W | 0MiB / 81920MiB | 0% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+ | 1 NVIDIA A100-SXM4-80GB On | 00000000:4E:00.0 Off | 0 | | N/A 31C P0 63W / 400W | 0MiB / 81920MiB | 0% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+ | 2 NVIDIA A100-SXM4-80GB On | 00000000:87:00.0 Off | 0 | | N/A 34C P0 64W / 400W | 0MiB / 81920MiB | 0% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+ | 3 NVIDIA A100-SXM4-80GB On | 00000000:90:00.0 Off | 0 | | N/A 34C P0 63W / 400W | 0MiB / 81920MiB | 0% Default | | | | Disabled | +-----------------------------------------+----------------------+----------------------+
+---------------------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=======================================================================================| | No running processes found | +---------------------------------------------------------------------------------------+
š Describe the bug
ValueError: Model architectures ['LlamaForCausalLM'] are not supported for now. Supported
Error info
Before submitting a new issue...