Traceback (most recent call last):
File "main.py", line 522, in
fire.Fire(LSDTrainer)
File ".local/lib/python3.6/site-packages/fire/core.py", line 138, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File ".local/lib/python3.6/site-packages/fire/core.py", line 471, in _Fire
target=component.name)
File ".local/lib/python3.6/site-packages/fire/core.py", line 675, in _CallAndUpdateTrace
component = fn(*varargs, kwargs)
File "main.py", line 359, in train
self._train_epoch()
File "main.py", line 227, in _train_epoch
img, heatmap_gt, adj_mtx_gt, self.lambda_heatmap, self.lambda_adj, junctions_gt
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, *kwargs)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 152, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 162, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 83, in parallel_apply
raise output
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 59, in _worker
output = module(input, kwargs)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, kwargs)
File "LSD/PPGNet/models/lsd.py", line 291, in forward
adj_matrix_pred, loss_adj = block_adj_infer(feat_adj)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, *kwargs)
File "LSD/PPGNet/models/common.py", line 281, in forward
output = submodule(output)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(input, kwargs)
File "LSD/PPGNet/models/lsd.py", line 70, in forward
line_feat = self.line_pool(feat, junc_st, junc_ed)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, **kwargs)
File "LSD/PPGNet/models/graph.py", line 86, in forward
output = F.grid_sample(feat[int(bs)].view(1, ch, h, w).expand(num_st, ch, h, w), sample_grid)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/functional.py", line 2717, in grid_sample
return torch.grid_sampler(input, grid, mode_enum, padding_mode_enum)
RuntimeError: CUDA out of memory. Tried to allocate 44.00 MiB (GPU 0; 10.91 GiB total capacity; 1.53 GiB already allocated; 15.38 MiB free; 94.03 MiB cached)
I try to reproduce the result with 4 1080TI ,and i reduce the batch size=4, The detailed configuration `python main.py \ --exp-name line_weighted_wo_focal_junc --backbone resnet50 \ --backbone-kwargs '{"encoder_weights": "ckpt/backbone/encoder_epoch_20.pth", "decoder_weights": "ckpt/backbone/decoder_epoch_20.pth"}' \ --dim-embedding 256 --junction-pooling-threshold 0.2 \ --junc-pooling-size 64 --attention-sigma 1.5 --block-inference-size 4 \ --data-root "data/indoorDist" --junc-sigma 3 \ --batch-size 4 --gpus 0,1,2,3 --num-workers 10 --resume-epoch latest \ --is-train-junc True --is-train-adj True \ --vis-junc-th 0.1 --vis-line-th 0.1 \
Traceback (most recent call last): File "main.py", line 522, in
fire.Fire(LSDTrainer)
File ".local/lib/python3.6/site-packages/fire/core.py", line 138, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File ".local/lib/python3.6/site-packages/fire/core.py", line 471, in _Fire
target=component.name)
File ".local/lib/python3.6/site-packages/fire/core.py", line 675, in _CallAndUpdateTrace
component = fn(*varargs, kwargs)
File "main.py", line 359, in train
self._train_epoch()
File "main.py", line 227, in _train_epoch
img, heatmap_gt, adj_mtx_gt, self.lambda_heatmap, self.lambda_adj, junctions_gt
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, *kwargs)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 152, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 162, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 83, in parallel_apply
raise output
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 59, in _worker
output = module(input, kwargs)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, kwargs)
File "LSD/PPGNet/models/lsd.py", line 291, in forward
adj_matrix_pred, loss_adj = block_adj_infer(feat_adj)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, *kwargs)
File "LSD/PPGNet/models/common.py", line 281, in forward
output = submodule(output)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(input, kwargs)
File "LSD/PPGNet/models/lsd.py", line 70, in forward
line_feat = self.line_pool(feat, junc_st, junc_ed)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in call
result = self.forward(*input, **kwargs)
File "LSD/PPGNet/models/graph.py", line 86, in forward
output = F.grid_sample(feat[int(bs)].view(1, ch, h, w).expand(num_st, ch, h, w), sample_grid)
File "envs/r0.3.0/lib/python3.6/site-packages/torch/nn/functional.py", line 2717, in grid_sample
return torch.grid_sampler(input, grid, mode_enum, padding_mode_enum)
RuntimeError: CUDA out of memory. Tried to allocate 44.00 MiB (GPU 0; 10.91 GiB total capacity; 1.53 GiB already allocated; 15.38 MiB free; 94.03 MiB cached)