vllm-project / vllm

A high-throughput and memory-efficient inference and serving engine for LLMs
https://docs.vllm.ai
Apache License 2.0
29.65k stars 4.47k forks source link

[Bug]: MiniCPM3-4B loads error #10067

Closed ArlanCooper closed 2 hours ago

ArlanCooper commented 2 hours ago

Your current environment

The output of `python collect_env.py` ```text Your output of `python collect_env.py` here ```

Model Input Dumps

No response

🐛 Describe the bug

# Sample prompts.
prompts = [
    "Hello, my name is",
    "The president of the United States is",
    "The capital of France is",
    "The future of AI is",
]
# Create a sampling params object.
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)

# Create an LLM
llm = LLM(model="/data/share/rwq/MiniCPM3-4B/",trust_remote_code=True)
# Generate texts from the prompts. The output is a list of RequestOutput objects
# that contain the prompt, generated text, and other information.
outputs = llm.generate(prompts, sampling_params)
# Print the outputs.
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")

the error:

Encountered exception while importing datamodel_code_generator: No module named 'tomli'
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
Cell In[3], line 12
      9 sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
     11 # Create an LLM.
---> 12 llm = LLM(model="/home/powerop/work/rwq/finetune_models/minicpm3-4b-hb-classify/",trust_remote_code=True)
     13 outputs = llm.generate(prompts, sampling_params)
     14 # Print the outputs.

File ~/.local/lib/python3.10/site-packages/vllm/entrypoints/llm.py:177, in LLM.__init__(self, model, tokenizer, tokenizer_mode, skip_tokenizer_init, trust_remote_code, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, cpu_offload_gb, enforce_eager, max_context_len_to_capture, max_seq_len_to_capture, disable_custom_all_reduce, disable_async_output_proc, mm_processor_kwargs, **kwargs)
    152     kwargs["disable_log_stats"] = True
    154 engine_args = EngineArgs(
    155     model=model,
    156     tokenizer=tokenizer,
   (...)
    175     **kwargs,
    176 )
--> 177 self.llm_engine = LLMEngine.from_engine_args(
    178     engine_args, usage_context=UsageContext.LLM_CLASS)
    179 self.request_counter = Counter()

File ~/.local/lib/python3.10/site-packages/vllm/engine/llm_engine.py:573, in LLMEngine.from_engine_args(cls, engine_args, usage_context, stat_loggers)
    571 executor_class = cls._get_executor_cls(engine_config)
    572 # Create the LLM engine.
--> 573 engine = cls(
    574     **engine_config.to_dict(),
    575     executor_class=executor_class,
    576     log_stats=not engine_args.disable_log_stats,
    577     usage_context=usage_context,
    578     stat_loggers=stat_loggers,
    579 )
    581 return engine

File ~/.local/lib/python3.10/site-packages/vllm/engine/llm_engine.py:308, in LLMEngine.__init__(self, model_config, cache_config, parallel_config, scheduler_config, device_config, load_config, lora_config, speculative_config, decoding_config, observability_config, prompt_adapter_config, executor_class, log_stats, usage_context, stat_loggers, input_registry, use_cached_outputs)
    305 self.use_cached_outputs = use_cached_outputs
    307 if not self.model_config.skip_tokenizer_init:
--> 308     self.tokenizer = self._init_tokenizer()
    309     self.detokenizer = Detokenizer(self.tokenizer)
    310     tokenizer_group = self.get_tokenizer_group()

File ~/.local/lib/python3.10/site-packages/vllm/engine/llm_engine.py:617, in LLMEngine._init_tokenizer(self)
    616 def _init_tokenizer(self) -> BaseTokenizerGroup:
--> 617     return init_tokenizer_from_configs(
    618         model_config=self.model_config,
    619         scheduler_config=self.scheduler_config,
    620         parallel_config=self.parallel_config,
    621         enable_lora=bool(self.lora_config))

File ~/.local/lib/python3.10/site-packages/vllm/transformers_utils/tokenizer_group/__init__.py:28, in init_tokenizer_from_configs(model_config, scheduler_config, parallel_config, enable_lora)
     16 def init_tokenizer_from_configs(model_config: ModelConfig,
     17                                 scheduler_config: SchedulerConfig,
     18                                 parallel_config: ParallelConfig,
     19                                 enable_lora: bool):
     20     init_kwargs = dict(tokenizer_id=model_config.tokenizer,
     21                        enable_lora=enable_lora,
     22                        max_num_seqs=scheduler_config.max_num_seqs,
   (...)
     25                        trust_remote_code=model_config.trust_remote_code,
     26                        revision=model_config.tokenizer_revision)
---> 28     return get_tokenizer_group(parallel_config.tokenizer_pool_config,
     29                                **init_kwargs)

File ~/.local/lib/python3.10/site-packages/vllm/transformers_utils/tokenizer_group/__init__.py:49, in get_tokenizer_group(tokenizer_pool_config, **init_kwargs)
     46 else:
     47     raise ValueError(
     48         f"Unknown pool type: {tokenizer_pool_config.pool_type}")
---> 49 return tokenizer_cls.from_config(tokenizer_pool_config, **init_kwargs)

File ~/.local/lib/python3.10/site-packages/vllm/transformers_utils/tokenizer_group/tokenizer_group.py:30, in TokenizerGroup.from_config(cls, tokenizer_pool_config, **init_kwargs)
     27 @classmethod
     28 def from_config(cls, tokenizer_pool_config: Optional[TokenizerPoolConfig],
     29                 **init_kwargs) -> "TokenizerGroup":
---> 30     return cls(**init_kwargs)

File ~/.local/lib/python3.10/site-packages/vllm/transformers_utils/tokenizer_group/tokenizer_group.py:23, in TokenizerGroup.__init__(self, tokenizer_id, enable_lora, max_num_seqs, max_input_length, **tokenizer_config)
     21 self.enable_lora = enable_lora
     22 self.max_input_length = max_input_length
---> 23 self.tokenizer = get_tokenizer(self.tokenizer_id, **tokenizer_config)
     24 self.lora_tokenizers = LRUCache[AnyTokenizer](
     25     capacity=max_num_seqs if enable_lora else 0)

File ~/.local/lib/python3.10/site-packages/vllm/transformers_utils/tokenizer.py:139, in get_tokenizer(tokenizer_name, tokenizer_mode, trust_remote_code, revision, download_dir, *args, **kwargs)
    137 else:
    138     try:
--> 139         tokenizer = AutoTokenizer.from_pretrained(
    140             tokenizer_name,
    141             *args,
    142             trust_remote_code=trust_remote_code,
    143             revision=revision,
    144             **kwargs,
    145         )
    146     except ValueError as e:
    147         # If the error pertains to the tokenizer class not existing or not
    148         # currently being imported,
    149         # suggest using the --trust-remote-code flag.
    150         if not trust_remote_code and (
    151                 "does not exist or is not currently imported." in str(e)
    152                 or "requires you to execute the tokenizer file" in str(e)):

File ~/.local/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:888, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
    886 else:
    887     class_ref = tokenizer_auto_map[0]
--> 888 tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
    889 _ = kwargs.pop("code_revision", None)
    890 if os.path.isdir(pretrained_model_name_or_path):

File ~/.local/lib/python3.10/site-packages/transformers/dynamic_module_utils.py:540, in get_class_from_dynamic_module(class_reference, pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, repo_type, code_revision, **kwargs)
    538     code_revision = revision
    539 # And lastly we get the class inside our newly created module
--> 540 final_module = get_cached_module_file(
    541     repo_id,
    542     module_file + ".py",
    543     cache_dir=cache_dir,
    544     force_download=force_download,
    545     resume_download=resume_download,
    546     proxies=proxies,
    547     token=token,
    548     revision=code_revision,
    549     local_files_only=local_files_only,
    550     repo_type=repo_type,
    551 )
    552 return get_class_in_module(class_name, final_module, force_reload=force_download)

File ~/.local/lib/python3.10/site-packages/transformers/dynamic_module_utils.py:365, in get_cached_module_file(pretrained_model_name_or_path, module_file, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, repo_type, _commit_hash, **deprecated_kwargs)
    362     raise
    364 # Check we have all the requirements in our environment
--> 365 modules_needed = check_imports(resolved_module_file)
    367 # Now we move the module inside our cached dynamic modules.
    368 full_submodule = TRANSFORMERS_DYNAMIC_MODULE_NAME + os.path.sep + submodule

File ~/.local/lib/python3.10/site-packages/transformers/dynamic_module_utils.py:197, in check_imports(filename)
    194             raise
    196 if len(missing_packages) > 0:
--> 197     raise ImportError(
    198         "This modeling file requires the following packages that were not found in your environment: "
    199         f"{', '.join(missing_packages)}. Run `pip install {' '.join(missing_packages)}`"
    200     )
    202 return get_relative_imports(filename)

ImportError: This modeling file requires the following packages that were not found in your environment: datamodel_code_generator. Run `pip install datamodel_code_generator`

but I have installed datamodel_code_generator, it's so hard for me to solve it

Before submitting a new issue...

DarkLight1337 commented 2 hours ago

Encountered exception while importing datamodel_code_generator: No module named 'tomli'

You should also install tomli, based on this error message.

ArlanCooper commented 2 hours ago

Encountered exception while importing datamodel_code_generator: No module named 'tomli'

You should also install tomli, based on this error message.

I tried to lower the version of datamodel_comde_generationor from 0.26.2 to 0.24.0, and then it works, thanks