Closed renjingneng closed 1 month ago
下面是完整报错
AttributeError Traceback (most recent call last) Cell In[1], line 11 6 os.environ['CUDA_VISIBLE_DEVICES'] = '3' 8 MODEL_NAME = "/aixunlian/renjingneng/tasks/task_6/model/openbmb-MiniCPM-Llama3-V-2_5" ---> 11 llm = LLM(model=MODEL_NAME, 12 gpu_memory_utilization=1, 13 trust_remote_code=True, 14 max_model_len=4096) 15 exit() 16 IMAGES = [ 17 "/aixunlian/renjingneng/tasks/task_6/MiniCPM-V/assets/airplane.jpeg", 18 ]
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/entrypoints/llm.py:156, in LLM.init(self, model, tokenizer, tokenizer_mode, skip_tokenizer_init, trust_remote_code, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, cpu_offload_gb, enforce_eager, max_context_len_to_capture, max_seq_len_to_capture, disable_custom_all_reduce, kwargs) 133 raise TypeError( 134 "There is no need to pass vision-related arguments anymore.") 135 engine_args = EngineArgs( 136 model=model, 137 tokenizer=tokenizer, (...) 154 kwargs, 155 ) --> 156 self.llm_engine = LLMEngine.from_engine_args( 157 engine_args, usage_context=UsageContext.LLM_CLASS) 158 self.request_counter = Counter()
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/engine/llm_engine.py:426, in LLMEngine.from_engine_args(cls, engine_args, usage_context, stat_loggers) 424 executor_class = GPUExecutor 425 # Create the LLM engine. --> 426 engine = cls( 427 **engine_config.to_dict(), 428 executor_class=executor_class, 429 log_stats=not engine_args.disable_log_stats, 430 usage_context=usage_context, 431 stat_loggers=stat_loggers, 432 ) 434 return engine
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/engine/llm_engine.py:264, in LLMEngine.init(self, model_config, cache_config, parallel_config, scheduler_config, device_config, load_config, lora_config, multimodal_config, speculative_config, decoding_config, observability_config, prompt_adapter_config, executor_class, log_stats, usage_context, stat_loggers) 250 self.model_executor = executor_class( 251 model_config=model_config, 252 cache_config=cache_config, (...) 260 prompt_adapter_config=prompt_adapter_config, 261 ) 263 if not self.model_config.embedding_mode: --> 264 self._initialize_kv_caches() 266 # If usage stat is enabled, collect relevant info. 267 if is_usage_stats_enabled():
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/engine/llm_engine.py:363, in LLMEngine._initialize_kv_caches(self) 356 def _initialize_kv_caches(self) -> None: 357 """Initialize the KV cache in the worker(s). 358 359 The workers will determine the number of blocks in both the GPU cache 360 and the swap CPU cache. 361 """ 362 num_gpu_blocks, num_cpu_blocks = ( --> 363 self.model_executor.determine_num_available_blocks()) 365 if self.cache_config.num_gpu_blocks_override is not None: 366 num_gpu_blocks_override = self.cache_config.num_gpu_blocks_override
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/executor/gpu_executor.py:92, in GPUExecutor.determine_num_available_blocks(self) 88 def determine_num_available_blocks(self) -> Tuple[int, int]: 89 """Determine the number of available KV blocks by invoking the 90 underlying worker. 91 """ ---> 92 return self.driver_worker.determine_num_available_blocks()
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/torch/utils/_contextlib.py:115, in context_decorator.
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/worker/worker.py:179, in Worker.determine_num_available_blocks(self) 175 torch.cuda.empty_cache() 177 # Execute a forward pass with dummy inputs to profile the memory usage 178 # of the model. --> 179 self.model_runner.profile_run() 181 # Calculate the number of blocks that can be allocated with the 182 # profiled peak memory. 183 torch.cuda.synchronize()
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/torch/utils/_contextlib.py:115, in context_decorator.
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/worker/model_runner.py:759, in GPUModelRunnerBase.profile_run(self) 757 kv_caches = [None] * num_layers 758 finished_requests_ids = [seq.request_id for seq in seqs] --> 759 model_input = self.prepare_model_input( 760 seqs, finished_requests_ids=finished_requests_ids) 761 intermediate_tensors = None 762 if not get_pp_group().is_first_rank:
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/worker/model_runner.py:1096, in ModelRunner.prepare_model_input(self, seq_group_metadata_list, virtual_engine, finished_requests_ids) 1077 def prepare_model_input( 1078 self, 1079 seq_group_metadata_list: List[SequenceGroupMetadata], 1080 virtual_engine: int = 0, 1081 finished_requests_ids: Optional[List[str]] = None 1082 ) -> ModelInputForGPUWithSamplingMetadata: 1083 """Prepare the model input based on a given sequence group, including 1084 metadata for the sampling step. 1085 (...) 1094 If cuda graph is required, this API automatically pads inputs. 1095 """ -> 1096 model_input = self._prepare_model_input_tensors( 1097 seq_group_metadata_list, finished_requests_ids) 1098 sampling_metadata = SamplingMetadata.prepare(seq_group_metadata_list, 1099 model_input.seq_lens, 1100 model_input.query_lens, 1101 self.device, 1102 self.pin_memory) 1103 is_prompt = (seq_group_metadata_list[0].is_prompt 1104 if seq_group_metadata_list else None)
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/worker/model_runner.py:672, in GPUModelRunnerBase._prepare_model_input_tensors(self, seq_group_metadata_list, finished_requests_ids) 670 for seq_group_metadata in seq_group_metadata_list: 671 builder.add_seq_group(seq_group_metadata) --> 672 return builder.build()
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/worker/model_runner.py:444, in ModelInputForGPUBuilder.build(self) 441 prompt_adapter_mapping = None 443 # Multi-modal data. --> 444 multi_modal_kwargs = MultiModalInputs.batch( 445 self.multi_modal_inputs_list, device=self.runner.device) 447 return self.model_input_cls( 448 input_tokens=input_tokens_tensor, 449 input_positions=input_positions_tensor, (...) 458 prompt_adapter_mapping=prompt_adapter_mapping, 459 prompt_adapter_requests=self.prompt_adapter_requests)
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/multimodal/base.py:87, in MultiModalInputs.batch(inputs_list, device) 84 for k, v in inputs.items(): 85 item_lists[k].append(v) ---> 87 return { 88 k: MultiModalInputs.try_concat(item_list, device=device) 89 for k, item_list in item_lists.items() 90 }
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/multimodal/base.py:88, in
File ~/anaconda3/envs/MiniCPMV/lib/python3.10/site-packages/vllm/multimodal/base.py:54, in MultiModalInputs.try_concat(tensors, device) 52 for new_tensor in tensors: 53 for new_t in new_tensor: ---> 54 new_tensors.append(new_t.to(device)) 55 return new_tensors 56 unbatched_shape = tensors[0].shape[1:]
AttributeError: 'list' object has no attribute 'to'
same error
same error
These are things I've discussed with vllm teams yesterday and we've got our PR merged into main
branch of vllm official repo. For now maybe you can just try using the official code.
And I'll update the main
branch and delete minicpmv
branch.
These are things I've discussed with vllm teams yesterday and we've got our PR merged into
main
branch of vllm official repo. For now maybe you can just try using the official code. And I'll update themain
branch and deleteminicpmv
branch.
感谢大佬
Your current environment
我是 MiniCPM-Llama3-V 2.5 都不能用 运行minicpmv_example.py 报错:AttributeError: 'list' object has no attribute 'to'
环境:
How would you like to use vllm
I want to run inference of a [ MiniCPM-Llama3-V 2.5].