I have run your train.py code in google colab, but I keep getting this error below. I am already using A100 GPU in Colab Pro. Do you have any idea how to solve this one?
class carpet
args1.json defaultdict(<class 'str'>, {'img_size': [256, 256], 'Batch_Size': 8, 'EPOCHS': 3000, 'T': 1000, 'base_channels': 128, 'beta_schedule': 'linear', 'loss_type': 'l2', 'diffusion_lr': 0.0001, 'seg_lr': 1e-05, 'random_slice': True, 'weight_decay': 0.0, 'save_imgs': True, 'save_vids': False, 'dropout': 0, 'attention_resolutions': '32,16,8', 'num_heads': 4, 'num_head_channels': -1, 'noise_fn': 'gauss', 'channels': 3, 'mvtec_root_path': 'datasets/mvtec', 'visa_root_path': 'datasets/VisA_1class/1cls', 'dagm_root_path': 'datasets/dagm', 'mpdd_root_path': 'datasets/mpdd', 'anomaly_source_path': 'datasets/DTD', 'noisier_t_range': 600, 'less_t_range': 300, 'condition_w': 1, 'eval_normal_t': 200, 'eval_noisier_t': 400, 'output_path': 'outputs', 'arg_num': '1'})
0% 0/35 [00:16<?, ?it/s]
Traceback (most recent call last):
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/train.py", line 337, in
main()
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/train.py", line 332, in main
train(training_dataset_loader, test_loader, args, data_len,sub_class,class_type,device )
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/train.py", line 117, in train
noise_loss, pred_x0,normal_t,x_normal_t,x_noiser_t = ddpm_sample.norm_guided_one_step_denoising(unet_model, aug_image, anomaly_label,args)
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/models/DDPM.py", line 354, in norm_guided_one_step_denoising
noisier_loss, x_noiser_t, estimate_noise_noisier = self.calc_loss(model, x_0, noisier_t)
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/models/DDPM.py", line 333, in calc_loss
estimate_noise = model(x_t, t)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, kwargs)
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/models/Recon_subnetwork.py", line 403, in forward
h = module(h, time_embed)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, *kwargs)
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/models/Recon_subnetwork.py", line 32, in forward
x = layer(x, emb)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, kwargs)
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/models/Recon_subnetwork.py", line 217, in forward
return self.skip_connection(x) + h
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 463, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 459, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB (GPU 0; 39.56 GiB total capacity; 38.73 GiB already allocated; 8.81 MiB free; 38.99 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Hello!
I have run your train.py code in google colab, but I keep getting this error below. I am already using A100 GPU in Colab Pro. Do you have any idea how to solve this one?
class carpet args1.json defaultdict(<class 'str'>, {'img_size': [256, 256], 'Batch_Size': 8, 'EPOCHS': 3000, 'T': 1000, 'base_channels': 128, 'beta_schedule': 'linear', 'loss_type': 'l2', 'diffusion_lr': 0.0001, 'seg_lr': 1e-05, 'random_slice': True, 'weight_decay': 0.0, 'save_imgs': True, 'save_vids': False, 'dropout': 0, 'attention_resolutions': '32,16,8', 'num_heads': 4, 'num_head_channels': -1, 'noise_fn': 'gauss', 'channels': 3, 'mvtec_root_path': 'datasets/mvtec', 'visa_root_path': 'datasets/VisA_1class/1cls', 'dagm_root_path': 'datasets/dagm', 'mpdd_root_path': 'datasets/mpdd', 'anomaly_source_path': 'datasets/DTD', 'noisier_t_range': 600, 'less_t_range': 300, 'condition_w': 1, 'eval_normal_t': 200, 'eval_noisier_t': 400, 'output_path': 'outputs', 'arg_num': '1'}) 0% 0/35 [00:16<?, ?it/s] Traceback (most recent call last): File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/train.py", line 337, in
main()
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/train.py", line 332, in main
train(training_dataset_loader, test_loader, args, data_len,sub_class,class_type,device )
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/train.py", line 117, in train
noise_loss, pred_x0,normal_t,x_normal_t,x_noiser_t = ddpm_sample.norm_guided_one_step_denoising(unet_model, aug_image, anomaly_label,args)
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/models/DDPM.py", line 354, in norm_guided_one_step_denoising
noisier_loss, x_noiser_t, estimate_noise_noisier = self.calc_loss(model, x_0, noisier_t)
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/models/DDPM.py", line 333, in calc_loss
estimate_noise = model(x_t, t)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, kwargs)
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/models/Recon_subnetwork.py", line 403, in forward
h = module(h, time_embed)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, *kwargs)
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/models/Recon_subnetwork.py", line 32, in forward
x = layer(x, emb)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(input, kwargs)
File "/content/drive/MyDrive/DiffusionDA/DiffusionAD-main/models/Recon_subnetwork.py", line 217, in forward
return self.skip_connection(x) + h
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 463, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py", line 459, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB (GPU 0; 39.56 GiB total capacity; 38.73 GiB already allocated; 8.81 MiB free; 38.99 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF