from vllm import LLM, SamplingParams
from time import time
# Sample prompts.
prompts = [
"Hello, my name is",
"The president of the United States is",
"The capital of France is",
"The future of AI is",
]
# Create a sampling params object.
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
# Create an LLM.
llm = LLM(model="DiscoResearch/DiscoLM-mixtral-8x7b-v2", tensor_parallel_size=2)
# Generate texts from the prompts. The output is a list of RequestOutput objects
# that contain the prompt, generated text, and other information.
t = time()
outputs = llm.generate(prompts, sampling_params)
print(f"Finish all prompts in total {time()-t} s")
# Print the outputs.
for output in outputs:
prompt = output.prompt
generated_text = output.outputs[0].text
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
The latest version of transformers (4.36.0) is installed, CUDA Version: 12.2, 4*NVIDIA A100-SXM4-40GB but I am getting the following error, could you please help?
error:
2023-12-11 18:23:58,175 INFO worker.py:1489 -- Connecting to existing Ray cluster at address: 10.0.1.105:6379...
2023-12-11 18:23:58,182 INFO worker.py:1673 -- Connected to Ray cluster.
INFO 12-11 18:23:58 llm_engine.py:73] Initializing an LLM engine with config: model='mistralai/Mixtral-8x7B-Instruct-v0.1', tokenizer='mistralai/Mixtral-8x7B-Instruct-v0.1', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=2, quantization=None, seed=0)
Traceback (most recent call last):
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mix.py", line 15, in
llm = LLM(model="mistralai/Mixtral-8x7B-Instruct-v0.1", tensor_parallel_size=2)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/entrypoints/llm.py", line 93, in init
self.llm_engine = LLMEngine.from_engine_args(engine_args)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/llm_engine.py", line 246, in from_engine_args
engine = cls(engine_configs,
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/llm_engine.py", line 107, in init
self._init_workers_ray(placement_group)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/llm_engine.py", line 194, in _init_workers_ray
self._run_workers(
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/llm_engine.py", line 750, in _run_workers
self._run_workers_in_batch(workers, method, args, kwargs))
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/llm_engine.py", line 727, in _run_workers_in_batch
all_outputs = ray.get(all_outputs)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
return fn(*args, *kwargs)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(args, kwargs)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/ray/_private/worker.py", line 2563, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ValueError): ray::RayWorkerVllm.execute_method() (pid=3668385, ip=10.0.1.105, actor_id=e107b40e2809d80330a4550406000000, repr=<vllm.engine.ray_utils.RayWorkerVllm object at 0x15244194f0d0>)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/ray_utils.py", line 31, in execute_method
return executor(*args, **kwargs)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/worker/worker.py", line 72, in load_model
self.model_runner.load_model()
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/worker/model_runner.py", line 36, in load_model
self.model = get_model(self.model_config)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/model_executor/model_loader.py", line 62, in get_model
model_class = _get_model_architecture(model_config.hf_config)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/model_executor/model_loader.py", line 56, in _get_model_architecture
raise ValueError(
ValueError: Model architectures ['MixtralForCausalLM'] are not supported for now. Supported architectures: ['AquilaModel', 'AquilaForCausalLM', 'BaiChuanForCausalLM', 'BaichuanForCausalLM', 'BloomForCausalLM', 'ChatGLMModel', 'FalconForCausalLM', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTJForCausalLM', 'GPTNeoXForCausalLM', 'InternLMForCausalLM', 'LlamaForCausalLM', 'LLaMAForCausalLM', 'MistralForCausalLM', 'MptForCausalLM', 'MPTForCausalLM', 'OPTForCausalLM', 'PhiForCausalLM', 'QWenLMHeadModel', 'RWForCausalLM', 'YiForCausalLM']
code snippet:
The latest version of transformers (4.36.0) is installed, CUDA Version: 12.2, 4*NVIDIA A100-SXM4-40GB but I am getting the following error, could you please help?
error:
2023-12-11 18:23:58,175 INFO worker.py:1489 -- Connecting to existing Ray cluster at address: 10.0.1.105:6379... 2023-12-11 18:23:58,182 INFO worker.py:1673 -- Connected to Ray cluster. INFO 12-11 18:23:58 llm_engine.py:73] Initializing an LLM engine with config: model='mistralai/Mixtral-8x7B-Instruct-v0.1', tokenizer='mistralai/Mixtral-8x7B-Instruct-v0.1', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=2, quantization=None, seed=0) Traceback (most recent call last): File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mix.py", line 15, in
llm = LLM(model="mistralai/Mixtral-8x7B-Instruct-v0.1", tensor_parallel_size=2)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/entrypoints/llm.py", line 93, in init
self.llm_engine = LLMEngine.from_engine_args(engine_args)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/llm_engine.py", line 246, in from_engine_args
engine = cls(engine_configs,
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/llm_engine.py", line 107, in init
self._init_workers_ray(placement_group)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/llm_engine.py", line 194, in _init_workers_ray
self._run_workers(
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/llm_engine.py", line 750, in _run_workers
self._run_workers_in_batch(workers, method, args, kwargs))
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/llm_engine.py", line 727, in _run_workers_in_batch
all_outputs = ray.get(all_outputs)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
return fn(*args, *kwargs)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(args, kwargs)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/ray/_private/worker.py", line 2563, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ValueError): ray::RayWorkerVllm.execute_method() (pid=3668385, ip=10.0.1.105, actor_id=e107b40e2809d80330a4550406000000, repr=<vllm.engine.ray_utils.RayWorkerVllm object at 0x15244194f0d0>)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/engine/ray_utils.py", line 31, in execute_method
return executor(*args, **kwargs)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/worker/worker.py", line 72, in load_model
self.model_runner.load_model()
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/worker/model_runner.py", line 36, in load_model
self.model = get_model(self.model_config)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/model_executor/model_loader.py", line 62, in get_model
model_class = _get_model_architecture(model_config.hf_config)
File "/hkfs/home/project/hk-project-test-socialgroups/st_ac141953/mistral/lib64/python3.9/site-packages/vllm/model_executor/model_loader.py", line 56, in _get_model_architecture
raise ValueError(
ValueError: Model architectures ['MixtralForCausalLM'] are not supported for now. Supported architectures: ['AquilaModel', 'AquilaForCausalLM', 'BaiChuanForCausalLM', 'BaichuanForCausalLM', 'BloomForCausalLM', 'ChatGLMModel', 'FalconForCausalLM', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTJForCausalLM', 'GPTNeoXForCausalLM', 'InternLMForCausalLM', 'LlamaForCausalLM', 'LLaMAForCausalLM', 'MistralForCausalLM', 'MptForCausalLM', 'MPTForCausalLM', 'OPTForCausalLM', 'PhiForCausalLM', 'QWenLMHeadModel', 'RWForCausalLM', 'YiForCausalLM']