I am using 4 3090ti cards, and I have set the batch size to very small, but this situation occurs every time the first epoch is clicked
Traceback (most recent call last):
File "train.py", line 522, in
main(args)
File "train.py", line 316, in main
prec1, output_list, labels_list = validate(epoch, val_loader, classes, device, model, video_head, config, n_class, logger, save_score)
File "train.py", line 433, in validate
cls_feature, text_features = model.module.encode_text(text_inputs, return_token=True) # [n_cls, feat_dim]
File "/home/chenshengyi/depthstudy/gesturecode/BIKE-main/clip/model.py", line 443, in encode_text
x = self.transformer(x)
File "/opt/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, kwargs)
File "/home/chenshengyi/depthstudy/gesturecode/BIKE-main/clip/model.py", line 253, in forward
x = checkpoint(r, x)
File "/opt/anaconda3/lib/python3.8/site-packages/torch/utils/checkpoint.py", line 235, in checkpoint
return CheckpointFunction.apply(function, preserve, args)
File "/opt/anaconda3/lib/python3.8/site-packages/torch/utils/checkpoint.py", line 96, in forward
outputs = run_function(args)
File "/opt/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, *kwargs)
File "/home/chenshengyi/depthstudy/gesturecode/BIKE-main/clip/model.py", line 227, in forward
x = x + self.drop_path(self.attention(self.ln_1(x)))
File "/home/chenshengyi/depthstudy/gesturecode/BIKE-main/clip/model.py", line 219, in attention
return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0]
File "/opt/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/opt/anaconda3/lib/python3.8/site-packages/torch/nn/modules/activation.py", line 1153, in forward
attn_output, attn_output_weights = F.multi_head_attention_forward(
File "/opt/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 5131, in multi_head_attention_forward
v = v.contiguous().view(v.shape[0], bsz * num_heads, head_dim).transpose(0, 1)
RuntimeError: CUDA out of memory. Tried to allocate 1.96 GiB (GPU 0; 23.70 GiB total capacity; 15.94 GiB already allocated; 657.56 MiB free; 16.10 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
I am using 4 3090ti cards, and I have set the batch size to very small, but this situation occurs every time the first epoch is clicked
Traceback (most recent call last): File "train.py", line 522, in
main(args)
File "train.py", line 316, in main
prec1, output_list, labels_list = validate(epoch, val_loader, classes, device, model, video_head, config, n_class, logger, save_score)
File "train.py", line 433, in validate
cls_feature, text_features = model.module.encode_text(text_inputs, return_token=True) # [n_cls, feat_dim]
File "/home/chenshengyi/depthstudy/gesturecode/BIKE-main/clip/model.py", line 443, in encode_text
x = self.transformer(x)
File "/opt/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, kwargs)
File "/home/chenshengyi/depthstudy/gesturecode/BIKE-main/clip/model.py", line 253, in forward
x = checkpoint(r, x)
File "/opt/anaconda3/lib/python3.8/site-packages/torch/utils/checkpoint.py", line 235, in checkpoint
return CheckpointFunction.apply(function, preserve, args)
File "/opt/anaconda3/lib/python3.8/site-packages/torch/utils/checkpoint.py", line 96, in forward
outputs = run_function(args)
File "/opt/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, *kwargs)
File "/home/chenshengyi/depthstudy/gesturecode/BIKE-main/clip/model.py", line 227, in forward
x = x + self.drop_path(self.attention(self.ln_1(x)))
File "/home/chenshengyi/depthstudy/gesturecode/BIKE-main/clip/model.py", line 219, in attention
return self.attn(x, x, x, need_weights=False, attn_mask=self.attn_mask)[0]
File "/opt/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/opt/anaconda3/lib/python3.8/site-packages/torch/nn/modules/activation.py", line 1153, in forward
attn_output, attn_output_weights = F.multi_head_attention_forward(
File "/opt/anaconda3/lib/python3.8/site-packages/torch/nn/functional.py", line 5131, in multi_head_attention_forward
v = v.contiguous().view(v.shape[0], bsz * num_heads, head_dim).transpose(0, 1)
RuntimeError: CUDA out of memory. Tried to allocate 1.96 GiB (GPU 0; 23.70 GiB total capacity; 15.94 GiB already allocated; 657.56 MiB free; 16.10 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF