Open NoOneUST opened 3 years ago
Hello. I first run the command python sh/bld.py local, the sh/dir.json file is set as:
python sh/bld.py local
sh/dir.json
{ "local": { "bld_dir": "dataset_low_res", "dtu_dir": "", "tnt_dir": "tankandtemples", "tnt_training_dir": "", "save_dir": "save", "batch_size": 4, "num_workers": 4, "train_environ": "", "val_environ": "" } }
Then I set the sh/bld_val.py file as:
sh/bld_val.py
import os import json import argparse parser = argparse.ArgumentParser() parser.add_argument('machine', type=str) args = parser.parse_args() with open('sh/dir.json') as f: d = json.load(f) d = d[args.machine] for m in ['temp']: for ns in range(3, 3+1): cmd = f"""{d['val_environ']} python val.py --data_root {d['bld_dir']} --dataset_name blended --model_name model_cas --num_src {ns} --max_d 128 --interval_scale 1 --cas_depth_num 32,16,8 --cas_interv_scale 4,2,1 --resize 768,576 --crop 768,576 --mode soft --load_path {d['save_dir']}/{m} """ cmd = ' '.join(cmd.strip().split()) print(cmd) os.system(cmd)
Then, I run the command python sh/bld_val.py local and get:
python sh/bld_val.py local
(four cards condition) (base) bash-4.2$ python sh/bld_val.py local python val.py --data_root dataset_low_res --dataset_name blended --model_name model_cas --num_src 3 --max_d 128 --interval_scale 1 --cas_depth_num 32,16,8 --cas_interv_scale 4,2,1 --resize 768,576 --crop 768,576 --mode soft --load_path save/temp Number of samples: 915 Number of model parameters: 1162696 load save/temp/-1 0%| | 0/915 [00:23<?, ?it/s] Traceback (most recent call last): File "val.py", line 99, in <module> outputs, refined_depth, prob_maps = model(sample, cas_depth_num, cas_interv_scale, mode=args.mode) File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(*input, **kwargs) File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 161, in forward outputs = self.parallel_apply(replicas, inputs, kwargs) File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 171, in parallel_apply return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 86, in parallel_apply output.reraise() File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/_utils.py", line 428, in reraise raise self.exc_type(msg) utils.utils.NanError: Caught NanError in replica 0 on device 0. Original Traceback (most recent call last): File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 61, in _worker output = module(*input, **kwargs) File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(*input, **kwargs) File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 419, in forward est_depth_1, prob_map_1, pair_results_1 = self.stage1([ref_feat_1, ref_cam, srcs_feat_1, srcs_cam], depth_num=depth_nums[0], upsample=False, mem=mem, mode=mode, depth_start_override=None, depth_interval_override=depth_interval*interval_scales[0], s_scale=8) File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(*input, **kwargs) File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 317, in forward warped_src = self.build_cost_volume(ref_feat, ref_cam, src_feat, src_cam, depth_num, depth_start, depth_interval, s_scale, d_scale) File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 179, in build_cost_volume warped_src_nd_c_h_w = homography_warping(src_nd_c_h_w, Hs.view(-1, *Hs.size()[2:])) # n*d chw File "/export/data/lwangcg/Vis-MVSNet/core/homography.py", line 103, in homography_warping warped = interpolate(input, warped_coord) File "/export/data/lwangcg/Vis-MVSNet/core/homography.py", line 93, in interpolate raise NanError utils.utils.NanError
(single card condition) (base) bash-4.2$ CUDA_VISIBLE_DEVICES=0 python sh/bld_val.py local python val.py --data_root dataset_low_res --dataset_name blended --model_name model_cas --num_src 3 --max_d 128 --interval_scale 1 --cas_depth_num 32,16,8 --cas_interv_scale 4,2,1 --resize 768,576 --crop 768,576 --mode soft --load_path save/temp Number of samples: 915 Number of model parameters: 1162696 load save/temp/-1 0%| | 0/915 [00:00<?, ?it/s] Traceback (most recent call last): File "val.py", line 99, in <module> outputs, refined_depth, prob_maps = model(sample, cas_depth_num, cas_interv_scale, mode=args.mode) File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(*input, **kwargs) File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/data_parallel.py", line 159, in forward return self.module(*inputs[0], **kwargs[0]) File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(*input, **kwargs) File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 419, in forward est_depth_1, prob_map_1, pair_results_1 = self.stage1([ref_feat_1, ref_cam, srcs_feat_1, srcs_cam], depth_num=depth_nums[0], upsample=False, mem=mem, mode=mode, depth_start_override=None, depth_interval_override=depth_interval*interval_scales[0], s_scale=8) File "/export/data/lwangcg/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(*input, **kwargs) File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 317, in forward warped_src = self.build_cost_volume(ref_feat, ref_cam, src_feat, src_cam, depth_num, depth_start, depth_interval, s_scale, d_scale) File "/export/data/lwangcg/Vis-MVSNet/core/model_cas.py", line 179, in build_cost_volume warped_src_nd_c_h_w = homography_warping(src_nd_c_h_w, Hs.view(-1, *Hs.size()[2:])) # n*d chw File "/export/data/lwangcg/Vis-MVSNet/core/homography.py", line 103, in homography_warping warped = interpolate(input, warped_coord) File "/export/data/lwangcg/Vis-MVSNet/core/homography.py", line 93, in interpolate raise NanError utils.utils.NanError
Could you please give me some help? Thank you!
yes we may encounter nan when training with blendedmvs. but it should have been caught in the training loop
Hello. I first run the command
python sh/bld.py local
, thesh/dir.json
file is set as:Then I set the
sh/bld_val.py
file as:Then, I run the command
python sh/bld_val.py local
and get:Could you please give me some help? Thank you!