是否已有关于该错误的issue或讨论? | Is there an existing issue / discussion for this?
[X] 我已经搜索过已有的issues和讨论 | I have searched the existing issues / discussions
该问题是否在FAQ中有解答? | Is there an existing answer for this in FAQ?
[X] 我已经搜索过FAQ | I have searched FAQ
当前行为 | Current Behavior
2张32GB显卡,对Qwen-14B进行lora微调,报错torch.cuda.0utOfMemoryError: CUDA outof memory
Traceback (most recentcall last):
File "/code/Qwen-main/finetune.py", line 362, in
train()
File "/code/Qwen-main/finetune.py",line 337, in train
model = get_peft_model(model,lora_config)
File "/root/anaconda3/envs/qwen-14b-env/Lib/python3.10/site-packages/peft/mapping.py", Line 133, in get_peft_model
return MODEL_TYPE_TO_PEFT_MODEL_MAPPING[peft_config.task_type](model, peft_config, adapter_name=adapter_name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/site-packages/peft/peft_model.py",line 1Θ43, in init
super(). init (model,peft config, adapter name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/site-packages/peft/peft model.py", line 125, ininit
self.base model = cls(model,{adapter. name: peft config},adapter_name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.10/site-packages/peft/tuners/lora/model.py", line 11l, in init
super(). init (model,config, adapter name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1Θ/site-packages/peft/tuners/tuners_utils.py", line 9Θ, in init
self.inject_adapter(self.model,adapter name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1Θ/site-packages/peft/tuners/tuners_utils.py", line 228, in inject_adapter
new_module = ModulesToSavewrapper(target, adapter_name)
File w/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/site-packages/peft/utils/other.py", line 177, in init
self.update(adapter_name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/site-packages/peft/utils/other.py", line 19θ,in update
self.modules_to_save.update(torch.nn.ModuleDict({adapter_name: copy.deepcopy(self.original_module)}))
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, rv)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1Θ/copy.py",line 271, in _reconstruct
state = deepcopy(state, memo)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1Θ/copy.py",line 231, in _deepcopy_dict
y[deepcopy(key,memo)] =deepcopy(value, memo)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, rv)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/copy.py",Line 297, in reconstruct
value = deepcopy(value, memo)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1Θ/copy.py",line 153, in deepcopy
y = copier(memo)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.10/site-packages/torch/nn/parameter.py", line 55, indeepcopy
result = type(self)(self.data.clone(memory_format=torch.preserve_format), self.requires_grad)
torch.cuda.0utOfMemoryError: CUDA outof memory. Tried to allocate1.45 GiB (GPU 1; 31.75 GiBtotal capacity; 14.62 GiBaready allocated; 880.5θ MiB free;14.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory trysetting max split size mb to avoid fragmentation
See documentation for Memory Managementand PYTORCH_CUDA ALLOC_CONF
是否已有关于该错误的issue或讨论? | Is there an existing issue / discussion for this?
该问题是否在FAQ中有解答? | Is there an existing answer for this in FAQ?
当前行为 | Current Behavior
2张32GB显卡,对Qwen-14B进行lora微调,报错torch.cuda.0utOfMemoryError: CUDA outof memory Traceback (most recentcall last): File "/code/Qwen-main/finetune.py", line 362, in
train()
File "/code/Qwen-main/finetune.py",line 337, in train
model = get_peft_model(model,lora_config)
File "/root/anaconda3/envs/qwen-14b-env/Lib/python3.10/site-packages/peft/mapping.py", Line 133, in get_peft_model
return MODEL_TYPE_TO_PEFT_MODEL_MAPPING[peft_config.task_type](model, peft_config, adapter_name=adapter_name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/site-packages/peft/peft_model.py",line 1Θ43, in init
super(). init (model,peft config, adapter name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/site-packages/peft/peft model.py", line 125, ininit
self.base model = cls(model,{adapter. name: peft config},adapter_name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.10/site-packages/peft/tuners/lora/model.py", line 11l, in init
super(). init (model,config, adapter name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1Θ/site-packages/peft/tuners/tuners_utils.py", line 9Θ, in init
self.inject_adapter(self.model,adapter name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1Θ/site-packages/peft/tuners/tuners_utils.py", line 228, in inject_adapter
new_module = ModulesToSavewrapper(target, adapter_name)
File w/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/site-packages/peft/utils/other.py", line 177, in init
self.update(adapter_name)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/site-packages/peft/utils/other.py", line 19θ,in update
self.modules_to_save.update(torch.nn.ModuleDict({adapter_name: copy.deepcopy(self.original_module)}))
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, rv)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1Θ/copy.py",line 271, in _reconstruct
state = deepcopy(state, memo)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/copy.py", line 146, in deepcopy
y = copier(x, memo)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1Θ/copy.py",line 231, in _deepcopy_dict
y[deepcopy(key,memo)] =deepcopy(value, memo)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/copy.py", line 172, in deepcopy
y = _reconstruct(x, memo, rv)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1θ/copy.py",Line 297, in reconstruct
value = deepcopy(value, memo)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.1Θ/copy.py",line 153, in deepcopy
y = copier(memo)
File "/root/anaconda3/envs/qwen-14b-env/lib/python3.10/site-packages/torch/nn/parameter.py", line 55, indeepcopy
result = type(self)(self.data.clone(memory_format=torch.preserve_format), self.requires_grad)
torch.cuda.0utOfMemoryError: CUDA outof memory. Tried to allocate1.45 GiB (GPU 1; 31.75 GiBtotal capacity; 14.62 GiBaready allocated; 880.5θ MiB free;14.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory trysetting max split size mb to avoid fragmentation
See documentation for Memory Managementand PYTORCH_CUDA ALLOC_CONF
期望行为 | Expected Behavior
No response
复现方法 | Steps To Reproduce
No response
运行环境 | Environment
备注 | Anything else?
No response