vllm-project / vllm

A high-throughput and memory-efficient inference and serving engine for LLMs
https://docs.vllm.ai
Apache License 2.0
31.3k stars 4.75k forks source link

[Usage]: multi image inference for "OpenGVLab/InternVL2-8B" not working #8276

Closed dahwin closed 2 months ago

dahwin commented 2 months ago

multi image inference for "OpenGVLab/InternVL2-8B" not working

I got this inference code from here https://github.com/vllm-project/vllm/blob/main/examples/offline_inference_vision_language_multi_image.py

from typing import List
from vllm import LLM, SamplingParams
from vllm.multimodal.utils import fetch_image
from transformers import AutoTokenizer
import torch
num_device = 2
QUESTION = "What is the content of each image?"
IMAGE_URLS = [
    "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg",
    "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg",
]

def load_internvl(question: str, image_urls: List[str]):
    model_name = "OpenGVLab/InternVL2-8B"

    llm = LLM(
        model=model_name,
        trust_remote_code=True,
        max_num_seqs=5,
        max_model_len=3096,
        limit_mm_per_prompt={"image": len(image_urls)}
, tensor_parallel_size=num_device,worker_use_ray=num_device,dtype=torch.float16
          , enable_chunked_prefill=True
        ,gpu_memory_utilization = 0.99
          , enforce_eager=True 
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

    placeholders = "\n".join(f"Image-{i}: <image>" for i, _ in enumerate(image_urls, start=1))
    messages = [{'role': 'user', 'content': f"{placeholders}\n{question}"}]

    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    # Stop tokens for InternVL
    stop_tokens = ["<|endoftext|>", "<|im_start|>", "<|im_end|>", "<|end|>"]
    stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]

    return llm, prompt, stop_token_ids

def run_generate(question: str, image_urls: List[str]):
    llm, prompt, stop_token_ids = load_internvl(question, image_urls)

    sampling_params = SamplingParams(temperature=0.0, max_tokens=128, stop_token_ids=stop_token_ids)

    outputs = llm.generate(
        {
            "prompt": prompt,
            "multi_modal_data": {
                "image": [fetch_image(url) for url in image_urls]
            },
        },
        sampling_params=sampling_params)

    for o in outputs:
        generated_text = o.outputs[0].text
        print(generated_text)

if __name__ == "__main__":
    run_generate(QUESTION, IMAGE_URLS)

Please help me to fix this error

Erros

yWorkerWrapper pid=3366) ERROR 09-08 23:43:14 worker_base.py:464]   File "/opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py", line 56, in _try_stack
(RayWorkerWrapper pid=3366) ERROR 09-08 23:43:14 worker_base.py:464]     stacked = [MultiModalInputs._try_stack(t) for t in nested_tensors]
(RayWorkerWrapper pid=3366) ERROR 09-08 23:43:14 worker_base.py:464] TypeError: 'Image' object is not iterable
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[1], line 60
     57         print(generated_text)
     59 if __name__ == "__main__":
---> 60     run_generate(QUESTION, IMAGE_URLS)

Cell In[1], line 42, in run_generate(question, image_urls)
     41 def run_generate(question: str, image_urls: List[str]):
---> 42     llm, prompt, stop_token_ids = load_internvl(question, image_urls)
     44     sampling_params = SamplingParams(temperature=0.0, max_tokens=128, stop_token_ids=stop_token_ids)
     46     outputs = llm.generate(
     47         {
     48             "prompt": prompt,
   (...)
     52         },
     53         sampling_params=sampling_params)

Cell In[1], line 16, in load_internvl(question, image_urls)
     13 def load_internvl(question: str, image_urls: List[str]):
     14     model_name = "OpenGVLab/InternVL2-8B"
---> 16     llm = LLM(
     17         model=model_name,
     18         trust_remote_code=True,
     19         max_num_seqs=5,
     20         max_model_len=3096,
     21         limit_mm_per_prompt={"image": len(image_urls)}
     22 , tensor_parallel_size=num_device,worker_use_ray=num_device,dtype=torch.float16
     23           , enable_chunked_prefill=True
     24         ,gpu_memory_utilization = 0.99
     25           , enforce_eager=True 
     26     )
     28     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     30     placeholders = "\n".join(f"Image-{i}: <image>" for i, _ in enumerate(image_urls, start=1))

File /opt/conda/lib/python3.10/site-packages/vllm/entrypoints/llm.py:177, in LLM.__init__(self, model, tokenizer, tokenizer_mode, skip_tokenizer_init, trust_remote_code, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, cpu_offload_gb, enforce_eager, max_context_len_to_capture, max_seq_len_to_capture, disable_custom_all_reduce, disable_async_output_proc, **kwargs)
    153     raise TypeError(
    154         "There is no need to pass vision-related arguments anymore.")
    155 engine_args = EngineArgs(
    156     model=model,
    157     tokenizer=tokenizer,
   (...)
    175     **kwargs,
    176 )
--> 177 self.llm_engine = LLMEngine.from_engine_args(
    178     engine_args, usage_context=UsageContext.LLM_CLASS)
    179 self.request_counter = Counter()

File /opt/conda/lib/python3.10/site-packages/vllm/engine/llm_engine.py:538, in LLMEngine.from_engine_args(cls, engine_args, usage_context, stat_loggers)
    536 executor_class = cls._get_executor_cls(engine_config)
    537 # Create the LLM engine.
--> 538 engine = cls(
    539     **engine_config.to_dict(),
    540     executor_class=executor_class,
    541     log_stats=not engine_args.disable_log_stats,
    542     usage_context=usage_context,
    543     stat_loggers=stat_loggers,
    544 )
    546 return engine

File /opt/conda/lib/python3.10/site-packages/vllm/engine/llm_engine.py:319, in LLMEngine.__init__(self, model_config, cache_config, parallel_config, scheduler_config, device_config, load_config, lora_config, speculative_config, decoding_config, observability_config, prompt_adapter_config, executor_class, log_stats, usage_context, stat_loggers, input_registry, step_return_finished_only)
    305 self.model_executor = executor_class(
    306     model_config=model_config,
    307     cache_config=cache_config,
   (...)
    315     observability_config=self.observability_config,
    316 )
    318 if not self.model_config.embedding_mode:
--> 319     self._initialize_kv_caches()
    321 # If usage stat is enabled, collect relevant info.
    322 if is_usage_stats_enabled():

File /opt/conda/lib/python3.10/site-packages/vllm/engine/llm_engine.py:448, in LLMEngine._initialize_kv_caches(self)
    441 def _initialize_kv_caches(self) -> None:
    442     """Initialize the KV cache in the worker(s).
    443 
    444     The workers will determine the number of blocks in both the GPU cache
    445     and the swap CPU cache.
    446     """
    447     num_gpu_blocks, num_cpu_blocks = (
--> 448         self.model_executor.determine_num_available_blocks())
    450     if self.cache_config.num_gpu_blocks_override is not None:
    451         num_gpu_blocks_override = self.cache_config.num_gpu_blocks_override

File /opt/conda/lib/python3.10/site-packages/vllm/executor/distributed_gpu_executor.py:39, in DistributedGPUExecutor.determine_num_available_blocks(self)
     29 """Determine the number of available KV blocks.
     30 
     31 This invokes `determine_num_available_blocks` on each worker and takes
   (...)
     36     - tuple[num_gpu_blocks, num_cpu_blocks]
     37 """
     38 # Get the maximum number of blocks that can be allocated on GPU and CPU.
---> 39 num_blocks = self._run_workers("determine_num_available_blocks", )
     41 # Since we use a shared centralized controller, we take the minimum
     42 # number of blocks across all workers to make sure all the memory
     43 # operators can be applied to all workers.
     44 num_gpu_blocks = min(b[0] for b in num_blocks)

File /opt/conda/lib/python3.10/site-packages/vllm/executor/ray_gpu_executor.py:408, in RayGPUExecutor._run_workers(self, method, async_run_tensor_parallel_workers_only, all_args, all_kwargs, use_dummy_driver, max_concurrent_workers, *args, **kwargs)
    405 # Start the driver worker after all the ray workers.
    406 if not use_dummy_driver:
    407     driver_worker_output = [
--> 408         self.driver_worker.execute_method(method, *driver_args,
    409                                           **driver_kwargs)
    410     ]
    411 else:
    412     assert self.driver_dummy_worker is not None

File /opt/conda/lib/python3.10/site-packages/vllm/worker/worker_base.py:465, in WorkerWrapperBase.execute_method(self, method, *args, **kwargs)
    462 msg = (f"Error executing method {method}. "
    463        "This might cause deadlock in distributed execution.")
    464 logger.exception(msg)
--> 465 raise e

File /opt/conda/lib/python3.10/site-packages/vllm/worker/worker_base.py:456, in WorkerWrapperBase.execute_method(self, method, *args, **kwargs)
    454     target = self if self.worker is None else self.worker
    455     executor = getattr(target, method)
--> 456     return executor(*args, **kwargs)
    457 except Exception as e:
    458     # if the driver worker also execute methods,
    459     # exceptions in the rest worker may cause deadlock in rpc like ray
    460     # see [https://github.com/vllm-project/vllm/issues/3455](https://github.com/vllm-project/vllm/issues/3455%3C/span%3E)
    461     # print the error and inform the user to solve the error
    462     msg = (f"Error executing method {method}. "
    463            "This might cause deadlock in distributed execution.")

File /opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
    113 @functools.wraps(func)
    114 def decorate_context(*args, **kwargs):
    115     with ctx_factory():
--> 116         return func(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/vllm/worker/worker.py:222, in Worker.determine_num_available_blocks(self)
    218 torch.cuda.empty_cache()
    220 # Execute a forward pass with dummy inputs to profile the memory usage
    221 # of the model.
--> 222 self.model_runner.profile_run()
    224 # Calculate the number of blocks that can be allocated with the
    225 # profiled peak memory.
    226 torch.cuda.synchronize()

File /opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
    113 @functools.wraps(func)
    114 def decorate_context(*args, **kwargs):
    115     with ctx_factory():
--> 116         return func(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/vllm/worker/model_runner.py:1125, in GPUModelRunnerBase.profile_run(self)
   1123 kv_caches = [None] * num_layers
   1124 finished_requests_ids = [seq.request_id for seq in seqs]
-> 1125 model_input = self.prepare_model_input(
   1126     seqs, finished_requests_ids=finished_requests_ids)
   1127 intermediate_tensors = None
   1128 if not get_pp_group().is_first_rank:

File /opt/conda/lib/python3.10/site-packages/vllm/worker/model_runner.py:1380, in ModelRunner.prepare_model_input(self, seq_group_metadata_list, virtual_engine, finished_requests_ids)
   1361 def prepare_model_input(
   1362     self,
   1363     seq_group_metadata_list: List[SequenceGroupMetadata],
   1364     virtual_engine: int = 0,
   1365     finished_requests_ids: Optional[List[str]] = None,
   1366 ) -> ModelInputForGPUWithSamplingMetadata:
   1367     """Prepare the model input based on a given sequence group, including
   1368     metadata for the sampling step.
   1369 
   (...)
   1378     If cuda graph is required, this API automatically pads inputs.
   1379     """
-> 1380     model_input = self._prepare_model_input_tensors(
   1381         seq_group_metadata_list, finished_requests_ids)
   1382     if get_pp_group().is_last_rank:
   1383         # Sampling metadata is only required for the final pp group
   1384         generators = self.get_generators(finished_requests_ids)

File /opt/conda/lib/python3.10/site-packages/vllm/worker/model_runner.py:1042, in GPUModelRunnerBase._prepare_model_input_tensors(self, seq_group_metadata_list, finished_requests_ids)
   1038     builder.add_seq_group(seq_group_metadata)
   1040 builder.reset_cached_inter_data()
-> 1042 return builder.build()

File /opt/conda/lib/python3.10/site-packages/vllm/worker/model_runner.py:795, in ModelInputForGPUBuilder.build(self)
    790 # Multi-modal data.
    791 multi_modal_inputs_list = [
    792     data.multi_modal_inputs for data in self.inter_data_list
    793     if data.multi_modal_inputs is not None
    794 ]
--> 795 multi_modal_kwargs = MultiModalInputs.batch(multi_modal_inputs_list)
    797 return self.model_input_cls(
    798     input_tokens=input_tokens_tensor,
    799     input_positions=input_positions_tensor,
   (...)
    808     prompt_adapter_mapping=prompt_adapter_mapping,
    809     prompt_adapter_requests=prompt_adapter_requests)

File /opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py:94, in MultiModalInputs.batch(inputs_list)
     91     for k, v in inputs.items():
     92         item_lists[k].append(v)
---> 94 return {
     95     k: MultiModalInputs._try_stack(item_list)
     96     for k, item_list in item_lists.items()
     97 }

File /opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py:95, in <dictcomp>(.0)
     91     for k, v in inputs.items():
     92         item_lists[k].append(v)
     94 return {
---> 95     k: MultiModalInputs._try_stack(item_list)
     96     for k, item_list in item_lists.items()
     97 }

File /opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py:56, in MultiModalInputs._try_stack(nested_tensors)
     53 if isinstance(nested_tensors, torch.Tensor):
     54     return nested_tensors
---> 56 stacked = [MultiModalInputs._try_stack(t) for t in nested_tensors]
     57 if not is_list_of(stacked, torch.Tensor, check="all"):
     58     # Only tensors (not lists) can be stacked.
     59     return stacked

File /opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py:56, in <listcomp>(.0)
     53 if isinstance(nested_tensors, torch.Tensor):
     54     return nested_tensors
---> 56 stacked = [MultiModalInputs._try_stack(t) for t in nested_tensors]
     57 if not is_list_of(stacked, torch.Tensor, check="all"):
     58     # Only tensors (not lists) can be stacked.
     59     return stacked

File /opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py:56, in MultiModalInputs._try_stack(nested_tensors)
     53 if isinstance(nested_tensors, torch.Tensor):
     54     return nested_tensors
---> 56 stacked = [MultiModalInputs._try_stack(t) for t in nested_tensors]
     57 if not is_list_of(stacked, torch.Tensor, check="all"):
     58     # Only tensors (not lists) can be stacked.
     59     return stacked

File /opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py:56, in <listcomp>(.0)
     53 if isinstance(nested_tensors, torch.Tensor):
     54     return nested_tensors
---> 56 stacked = [MultiModalInputs._try_stack(t) for t in nested_tensors]
     57 if not is_list_of(stacked, torch.Tensor, check="all"):
     58     # Only tensors (not lists) can be stacked.
     59     return stacked

File /opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py:56, in MultiModalInputs._try_stack(nested_tensors)
     53 if isinstance(nested_tensors, torch.Tensor):
     54     return nested_tensors
---> 56 stacked = [MultiModalInputs._try_stack(t) for t in nested_tensors]
     57 if not is_list_of(stacked, torch.Tensor, check="all"):
     58     # Only tensors (not lists) can be stacked.
     59     return stacked

TypeError: 'Image' object is not iterable
2024-09-08 23:43:19,826 ERROR worker.py:409 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::RayWorkerWrapper.execute_method() (pid=3366, ip=172.19.2.2, actor_id=9b67af953bdda1832f1d2e0c01000000, repr=<vllm.executor.ray_utils.RayWorkerWrapper object at 0x7f36965676d0>)
  File "/opt/conda/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 465, in execute_method
    raise e
  File "/opt/conda/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 456, in execute_method
    return executor(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/vllm/worker/worker.py", line 222, in determine_num_available_blocks
    self.model_runner.profile_run()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 1125, in profile_run
    model_input = self.prepare_model_input(
  File "/opt/conda/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 1380, in prepare_model_input
    model_input = self._prepare_model_input_tensors(
  File "/opt/conda/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 1042, in _prepare_model_input_tensors
    return builder.build()  # type: ignore
  File "/opt/conda/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 795, in build
    multi_modal_kwargs = MultiModalInputs.batch(multi_modal_inputs_list)
  File "/opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py", line 94, in batch
    return {
  File "/opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py", line 95, in <dictcomp>
    k: MultiModalInputs._try_stack(item_list)
  File "/opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py", line 56, in _try_stack
    stacked = [MultiModalInputs._try_stack(t) for t in nested_tensors]
  File "/opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py", line 56, in <listcomp>
    stacked = [MultiModalInputs._try_stack(t) for t in nested_tensors]
  File "/opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py", line 56, in _try_stack
    stacked = [MultiModalInputs._try_stack(t) for t in nested_tensors]
  File "/opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py", line 56, in <listcomp>
    stacked = [MultiModalInputs._try_stack(t) for t in nested_tensors]
  File "/opt/conda/lib/python3.10/site-packages/vllm/multimodal/base.py", line 56, in _try_stack
    stacked = [MultiModalInputs._try_stack(t) for t in nested_tensors]
TypeError: 'Image' object is not iterable

Before submitting a new issue...

DarkLight1337 commented 2 months ago

Multi-image support for InternVL2 is currently only available if you build vLLM from source. It will be in the next release!

dahwin commented 2 months ago

!git clone https://github.com/vllm-project/vllm.git %cd vllm !pip install -e . # This may take 5-10 minutes.

i'm getting error when i try to build

I have cuda nvcc installed !which nvcc /opt/conda/bin/nvcc

Downloading pyairports-2.1.1-py3-none-any.whl (371 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 371.7/371.7 kB 28.1 MB/s eta 0:00:00
Downloading pycountry-24.6.1-py3-none-any.whl (6.3 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.3/6.3 MB 82.0 MB/s eta 0:00:00:00:0100:01
Building wheels for collected packages: vllm
  Building editable for vllm (pyproject.toml) ... error
  error: subprocess-exited-with-error

  × Building editable for vllm (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [150 lines of output]
      /tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/torch/_subclasses/functional_tensor.py:258: UserWarning: Failed to initialize NumPy: No module named 'numpy' (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:84.)
        cpu = _conversion_method_template(device=torch.device("cpu"))
      running editable_wheel
      creating /tmp/pip-wheel-8119yhpq/.tmp-v_6fhrd0/vllm.egg-info
      writing /tmp/pip-wheel-8119yhpq/.tmp-v_6fhrd0/vllm.egg-info/PKG-INFO
      writing dependency_links to /tmp/pip-wheel-8119yhpq/.tmp-v_6fhrd0/vllm.egg-info/dependency_links.txt
      writing entry points to /tmp/pip-wheel-8119yhpq/.tmp-v_6fhrd0/vllm.egg-info/entry_points.txt
      writing requirements to /tmp/pip-wheel-8119yhpq/.tmp-v_6fhrd0/vllm.egg-info/requires.txt
      writing top-level names to /tmp/pip-wheel-8119yhpq/.tmp-v_6fhrd0/vllm.egg-info/top_level.txt
      writing manifest file '/tmp/pip-wheel-8119yhpq/.tmp-v_6fhrd0/vllm.egg-info/SOURCES.txt'
      reading manifest file '/tmp/pip-wheel-8119yhpq/.tmp-v_6fhrd0/vllm.egg-info/SOURCES.txt'
      reading manifest template 'MANIFEST.in'
      adding license file 'LICENSE'
      writing manifest file '/tmp/pip-wheel-8119yhpq/.tmp-v_6fhrd0/vllm.egg-info/SOURCES.txt'
      creating '/tmp/pip-wheel-8119yhpq/.tmp-v_6fhrd0/vllm-0.6.0+cu123.dist-info'
      creating /tmp/pip-wheel-8119yhpq/.tmp-v_6fhrd0/vllm-0.6.0+cu123.dist-info/WHEEL
      running build_py
      running build_ext
      -- The CXX compiler identification is GNU 11.4.0
      -- Detecting CXX compiler ABI info
      -- Detecting CXX compiler ABI info - done
      -- Check for working CXX compiler: /usr/bin/c++ - skipped
      -- Detecting CXX compile features
      -- Detecting CXX compile features - done
      -- Build type: RelWithDebInfo
      -- Target device: cuda
      -- Found Python: /opt/conda/bin/python3.10 (found version "3.10.14") found components: Interpreter Development.Module Development.SABIModule
      -- Found python matching: /opt/conda/bin/python3.10.
      -- Could NOT find CUDA (missing: CUDA_INCLUDE_DIRS) (found version "12.3")
      CMake Warning at /tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/torch/share/cmake/Caffe2/public/cuda.cmake:31 (message):
        Caffe2: CUDA cannot be found.  Depending on whether you are building Caffe2
        or a Caffe2 dependent library, the next warning / error will give you more
        info.
      Call Stack (most recent call first):
        /tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/torch/share/cmake/Caffe2/Caffe2Config.cmake:86 (include)
        /tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/torch/share/cmake/Torch/TorchConfig.cmake:68 (find_package)
        CMakeLists.txt:70 (find_package)

      CMake Error at /tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/torch/share/cmake/Caffe2/Caffe2Config.cmake:90 (message):
        Your installed Caffe2 version uses CUDA but I cannot find the CUDA
        libraries.  Please set the proper CUDA prefixes and / or install CUDA.
      Call Stack (most recent call first):
        /tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/torch/share/cmake/Torch/TorchConfig.cmake:68 (find_package)
        CMakeLists.txt:70 (find_package)

      -- Configuring incomplete, errors occurred!
      Traceback (most recent call last):
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/command/editable_wheel.py", line 138, in run
          self._create_wheel_file(bdist_wheel)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/command/editable_wheel.py", line 341, in _create_wheel_file
          files, mapping = self._run_build_commands(dist_name, unpacked, lib, tmp)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/command/editable_wheel.py", line 264, in _run_build_commands
          self._run_build_subcommands()
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/command/editable_wheel.py", line 291, in _run_build_subcommands
          self.run_command(name)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_distutils/cmd.py", line 316, in run_command
          self.distribution.run_command(command)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/dist.py", line 950, in run_command
          super().run_command(command)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_distutils/dist.py", line 973, in run_command
          cmd_obj.run()
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/command/build_ext.py", line 98, in run
          _build_ext.run(self)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_distutils/command/build_ext.py", line 359, in run
          self.build_extensions()
        File "<string>", line 226, in build_extensions
        File "<string>", line 208, in configure
        File "/opt/conda/lib/python3.10/subprocess.py", line 369, in check_call
          raise CalledProcessError(retcode, cmd)
      subprocess.CalledProcessError: Command '['cmake', '/kaggle/working/vllm', '-G', 'Ninja', '-DCMAKE_BUILD_TYPE=RelWithDebInfo', '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=/tmp/tmp_29de3ur.build-lib/vllm', '-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=/tmp/tmpwymftevp.build-temp', '-DVLLM_TARGET_DEVICE=cuda', '-DVLLM_PYTHON_EXECUTABLE=/opt/conda/bin/python3.10', '-DVLLM_PYTHON_PATH=/opt/conda/lib/python3.10/site-packages/pip/_vendor/pyproject_hooks/_in_process:/tmp/pip-build-env-kdqvosxr/site:/opt/conda/lib/python310.zip:/opt/conda/lib/python3.10:/opt/conda/lib/python3.10/lib-dynload:/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages:/tmp/pip-build-env-kdqvosxr/normal/lib/python3.10/site-packages:/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_vendor', '-DNVCC_THREADS=1', '-DCMAKE_JOB_POOL_COMPILE:STRING=compile', '-DCMAKE_JOB_POOLS:STRING=compile=4']' returned non-zero exit status 1.
      /tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_distutils/dist.py:973: _DebuggingTips: Problem in editable installation.
      !!

              ********************************************************************************
              An error happened while installing `vllm` in editable mode.

              The following steps are recommended to help debug this problem:

              - Try to install the project normally, without using the editable mode.
                Does the error still persist?
                (If it does, try fixing the problem before attempting the editable mode).
              - If you are using binary extensions, make sure you have all OS-level
                dependencies installed (e.g. compilers, toolchains, binary libraries, ...).
              - Try the latest version of setuptools (maybe the error was already fixed).
              - If you (or your project dependencies) are using any setuptools extension
                or customization, make sure they support the editable mode.

              After following the steps above, if the problem still persists and
              you think this is related to how setuptools handles editable installations,
              please submit a reproducible example
              (see https://stackoverflow.com/help/minimal-reproducible-example) to:

                  https://github.com/pypa/setuptools/issues

              See https://setuptools.pypa.io/en/latest/userguide/development_mode.html for details.
              ********************************************************************************

      !!
        cmd_obj.run()
      Traceback (most recent call last):
        File "/opt/conda/lib/python3.10/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 353, in <module>
          main()
        File "/opt/conda/lib/python3.10/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 335, in main
          json_out['return_val'] = hook(**hook_input['kwargs'])
        File "/opt/conda/lib/python3.10/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 273, in build_editable
          return hook(wheel_directory, config_settings, metadata_directory)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/build_meta.py", line 459, in build_editable
          return self._build_with_temp_dir(
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/build_meta.py", line 403, in _build_with_temp_dir
          self.run_setup()
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/build_meta.py", line 318, in run_setup
          exec(code, locals())
        File "<string>", line 475, in <module>
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/__init__.py", line 117, in setup
          return distutils.core.setup(**attrs)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_distutils/core.py", line 184, in setup
          return run_commands(dist)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_distutils/core.py", line 200, in run_commands
          dist.run_commands()
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_distutils/dist.py", line 954, in run_commands
          self.run_command(cmd)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/dist.py", line 950, in run_command
          super().run_command(command)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_distutils/dist.py", line 973, in run_command
          cmd_obj.run()
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/command/editable_wheel.py", line 138, in run
          self._create_wheel_file(bdist_wheel)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/command/editable_wheel.py", line 341, in _create_wheel_file
          files, mapping = self._run_build_commands(dist_name, unpacked, lib, tmp)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/command/editable_wheel.py", line 264, in _run_build_commands
          self._run_build_subcommands()
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/command/editable_wheel.py", line 291, in _run_build_subcommands
          self.run_command(name)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_distutils/cmd.py", line 316, in run_command
          self.distribution.run_command(command)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/dist.py", line 950, in run_command
          super().run_command(command)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_distutils/dist.py", line 973, in run_command
          cmd_obj.run()
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/command/build_ext.py", line 98, in run
          _build_ext.run(self)
        File "/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_distutils/command/build_ext.py", line 359, in run
          self.build_extensions()
        File "<string>", line 226, in build_extensions
        File "<string>", line 208, in configure
        File "/opt/conda/lib/python3.10/subprocess.py", line 369, in check_call
          raise CalledProcessError(retcode, cmd)
      subprocess.CalledProcessError: Command '['cmake', '/kaggle/working/vllm', '-G', 'Ninja', '-DCMAKE_BUILD_TYPE=RelWithDebInfo', '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=/tmp/tmp_29de3ur.build-lib/vllm', '-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=/tmp/tmpwymftevp.build-temp', '-DVLLM_TARGET_DEVICE=cuda', '-DVLLM_PYTHON_EXECUTABLE=/opt/conda/bin/python3.10', '-DVLLM_PYTHON_PATH=/opt/conda/lib/python3.10/site-packages/pip/_vendor/pyproject_hooks/_in_process:/tmp/pip-build-env-kdqvosxr/site:/opt/conda/lib/python310.zip:/opt/conda/lib/python3.10:/opt/conda/lib/python3.10/lib-dynload:/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages:/tmp/pip-build-env-kdqvosxr/normal/lib/python3.10/site-packages:/tmp/pip-build-env-kdqvosxr/overlay/lib/python3.10/site-packages/setuptools/_vendor', '-DNVCC_THREADS=1', '-DCMAKE_JOB_POOL_COMPILE:STRING=compile', '-DCMAKE_JOB_POOLS:STRING=compile=4']' returned non-zero exit status 1.
      [end of output]

  note: This error originates from a subprocess, and is likely not a problem with pip.
  ERROR: Failed building editable for vllm
Failed to build vllm
ERROR: Could not build wheels for vllm, which is required to install pyproject.toml-based projects
dahwin commented 2 months ago

!nvcc --version # verify that nvcc is in your PATH !/usr/local/cuda/bin/nvcc --version # verify that nvcc is in your CUDA_HOME

nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2023 NVIDIA Corporation Built on Wed_Nov_22_10:17:15_PST_2023 Cuda compilation tools, release 12.3, V12.3.107 Build cuda_12.3.r12.3/compiler.33567101_0 nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2023 NVIDIA Corporation Built on Wed_Nov_22_10:17:15_PST_2023 Cuda compilation tools, release 12.3, V12.3.107 Build cuda_12.3.r12.3/compiler.33567101_0

DarkLight1337 commented 2 months ago

I'm not familiar with the installation process of vLLM. Can @youkaichao help with this?

DarkLight1337 commented 2 months ago

It should be supported in vLLM 0.6.1 now so you can try installing directly from PyPI.

dahwin commented 2 months ago

okay!