Open sunbi-s opened 7 months ago
Could you print your model? It may need to change auto_configure_device_map
Could you print your model? It may need to change auto_configure_device_map
what means ' print your model?' I can't understand sorry
show the model architecture by print(model).
Also, could you share the whole error log?
show the model architecture by print(model).
Also, could you share the whole error log?
This is my full error log
/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py:2025: UserWarning: for vision_model.post_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)
warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '
/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py:2025: UserWarning: for vision_model.post_layernorm.bias: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)
warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:17<00:00, 8.58s/it]
Some weights of InternLMXComposer2ForCausalLM were not initialized from the model checkpoint at /home/jovyan/insik/LVLM/internlm-xcomposer2-vl-7b and are newly initialized: ['vit.vision_tower.vision_model.post_layernorm.bias', 'vit.vision_tower.vision_model.post_layernorm.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You shouldn't move a model when it is dispatched on multiple devices.
Traceback (most recent call last):
File "/home/jovyan/insik/LVLM/InternLM-XComposer/examples/example_lora.py", line 43, in <module>
response, _ = model.chat(tokenizer, query=text, image=image, history=[], do_sample=False)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/.cache/huggingface/modules/transformers_modules/internlm-xcomposer2-vl-7b/modeling_internlm_xcomposer2.py", line 500, in chat
image = self.encode_img(image)
^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/.cache/huggingface/modules/transformers_modules/internlm-xcomposer2-vl-7b/modeling_internlm_xcomposer2.py", line 118, in encode_img
img_embeds, atts_img, img_target = self.img2emb(image)
^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/.cache/huggingface/modules/transformers_modules/internlm-xcomposer2-vl-7b/modeling_internlm_xcomposer2.py", line 122, in img2emb
img_embeds = self.vision_proj(self.vit(image.to(self.device)))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/.cache/huggingface/modules/transformers_modules/internlm-xcomposer2-vl-7b/build_mlp.py", line 132, in forward
image_forward_outs = self.vision_tower(
^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/transformers/models/clip/modeling_clip.py", line 958, in forward
return self.vision_model(
^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/transformers/models/clip/modeling_clip.py", line 883, in forward
hidden_states = self.embeddings(pixel_values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/transformers/models/clip/modeling_clip.py", line 196, in forward
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/accelerate/hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/conv.py", line 460, in forward
return self._conv_forward(input, self.weight, self.bias)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jovyan/anaconda3/envs/py11/lib/python3.11/site-packages/torch/nn/modules/conv.py", line 456, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:3 and cuda:0! (when checking argument for argument weight in method wrapper_CUDA__cudnn_convolution)
This is my model(+lora)
InternLMXComposer2ForCausalLM(
(model): InternLM2Model(
(tok_embeddings): Embedding(92544, 4096, padding_idx=2)
(layers): ModuleList(
(0-31): 32 x InternLM2DecoderLayer(
(attention): InternLM2Attention(
(wqkv): lora.Linear(
(base_layer): PLoRA(
in_features=4096, out_features=6144, bias=False
(lora_dropout): Dropout(p=0.05, inplace=False)
(Plora_A): Linear(in_features=4096, out_features=256, bias=False)
(Plora_B): Linear(in_features=256, out_features=6144, bias=False)
)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=4096, out_features=64, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=64, out_features=6144, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
)
(wo): lora.Linear(
(base_layer): PLoRA(
in_features=4096, out_features=4096, bias=False
(lora_dropout): Dropout(p=0.05, inplace=False)
(Plora_A): Linear(in_features=4096, out_features=256, bias=False)
(Plora_B): Linear(in_features=256, out_features=4096, bias=False)
)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=4096, out_features=64, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=64, out_features=4096, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
)
(rotary_emb): InternLM2DynamicNTKScalingRotaryEmbedding()
)
(feed_forward): InternLM2MLP(
(w1): lora.Linear(
(base_layer): PLoRA(
in_features=4096, out_features=14336, bias=False
(lora_dropout): Dropout(p=0.05, inplace=False)
(Plora_A): Linear(in_features=4096, out_features=256, bias=False)
(Plora_B): Linear(in_features=256, out_features=14336, bias=False)
)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=4096, out_features=64, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=64, out_features=14336, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
)
(w3): lora.Linear(
(base_layer): PLoRA(
in_features=4096, out_features=14336, bias=False
(lora_dropout): Dropout(p=0.05, inplace=False)
(Plora_A): Linear(in_features=4096, out_features=256, bias=False)
(Plora_B): Linear(in_features=256, out_features=14336, bias=False)
)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=4096, out_features=64, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=64, out_features=14336, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
)
(w2): lora.Linear(
(base_layer): PLoRA(
in_features=14336, out_features=4096, bias=False
(lora_dropout): Dropout(p=0.05, inplace=False)
(Plora_A): Linear(in_features=14336, out_features=256, bias=False)
(Plora_B): Linear(in_features=256, out_features=4096, bias=False)
)
(lora_dropout): ModuleDict(
(default): Dropout(p=0.05, inplace=False)
)
(lora_A): ModuleDict(
(default): Linear(in_features=14336, out_features=64, bias=False)
)
(lora_B): ModuleDict(
(default): Linear(in_features=64, out_features=4096, bias=False)
)
(lora_embedding_A): ParameterDict()
(lora_embedding_B): ParameterDict()
)
(act_fn): SiLUActivation()
)
(attention_norm): InternLM2RMSNorm()
(ffn_norm): InternLM2RMSNorm()
)
)
(norm): InternLM2RMSNorm()
)
(output): Linear(in_features=4096, out_features=92544, bias=False)
(vit): CLIPVisionTower(
(vision_tower): CLIPVisionModel(
(vision_model): CLIPVisionTransformer(
(embeddings): CLIPVisionEmbeddings(
(patch_embedding): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14), bias=False)
(position_embedding): Embedding(1226, 1024)
)
(pre_layrnorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(encoder): CLIPEncoder(
(layers): ModuleList(
(0-23): 24 x CLIPEncoderLayer(
(self_attn): CLIPAttention(
(k_proj): Linear(in_features=1024, out_features=1024, bias=True)
(v_proj): Linear(in_features=1024, out_features=1024, bias=True)
(q_proj): Linear(in_features=1024, out_features=1024, bias=True)
(out_proj): Linear(in_features=1024, out_features=1024, bias=True)
)
(layer_norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(mlp): CLIPMLP(
(activation_fn): QuickGELUActivation()
(fc1): Linear(in_features=1024, out_features=4096, bias=True)
(fc2): Linear(in_features=4096, out_features=1024, bias=True)
)
(layer_norm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
(post_layernorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
)
)
)
(vision_proj): Sequential(
(0): Linear(in_features=1024, out_features=4096, bias=True)
(1): GELU(approximate='none')
(2): Linear(in_features=4096, out_features=4096, bias=True)
)
)
I run this model with two GPU
Thank you!
I encountered the same problem, have you solved it?
A cuda error occurs when loading a model using lora and executing muti gpu inference.
Below is the code I used.
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:3 and cuda:0! (when checking argument for argument weight in method wrapper_CUDA__cudnn_convolution)