Open LaBaZh opened 2 weeks ago
import os os.environ["CUDA_VISIBLE_DEVICES"] = "0"
Add this to the start and set everything to "cuda:0". This will force the processes to only utilize CUDA rank 0
May potentially be a "hacky" way, but got it working for me
tokenizer, model, processor, context_len = load_pretrained_model(model_path, None, model_name,device="cuda:6") Here you should add the specified device idx as the same as others.
When running inference script, I encountered error saying: Traceback (most recent call last): File "inference.py", line 72, in
inference()
File "inference.py", line 57, in inference
output_ids = model.generate(
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, kwargs)
File "/home/lee/Oscar/codebase/VideoLLaMA2/videollama2/model/language_model/videollama2_mistral.py", line 121, in generate
) = self.prepare_inputs_labels_for_multimodal(
File "/home/lee/Oscar/codebase/VideoLLaMA2/videollama2/model/videollama2_arch.py", line 164, in prepare_inputs_labels_for_multimodal
X_features = self.encode_images_or_videos(Xs, keys)
File "/home/lee/Oscar/codebase/VideoLLaMA2/videollama2/model/videollama2_arch.py", line 126, in encode_images_or_videos
return self.temporal_aggregator(frames_features)
File "/home/lee/Oscar/codebase/VideoLLaMA2/videollama2/model/videollama2_arch.py", line 147, in temporal_aggregator
video_features = self.get_model().mm_projector(frames_features)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(args, kwargs)
File "/home/lee/Oscar/codebase/VideoLLaMA2/videollama2/model/multimodal_projector/builder.py", line 205, in forward
x = self.s1(x)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, *kwargs)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/timm/models/regnet.py", line 369, in forward
x = block(x)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, kwargs)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/timm/models/regnet.py", line 279, in forward
x = self.conv1(x)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(args, kwargs)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/timm/models/layers/conv_bn_act.py", line 36, in forward
x = self.conv(x)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(args, **kwargs)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 463, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/home/lee/.conda/envs/videollama2/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 459, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0! (when checking argument for argument weight in method wrapper_CUDA__cudnn_convolution)