msracver / FCIS

Fully Convolutional Instance-aware Semantic Segmentation
MIT License
1.57k stars 414 forks source link

testing never finish #8

Open gaopeng-eugene opened 7 years ago

gaopeng-eugene commented 7 years ago

Hello, thank you for your great code. Right now, I am trying to evaluate the performance of your model. Here it the script : After iterating through the minival5000, the code run into deadlock and never finish.

Looking forward to your advice.

python experiments/fcis/fcis_end2end_test.py --cfg experiments/fcis/cfgs/resnet_v1_101_coco_fcis_end2end_ohem.yaml Namespace(cfg='experiments/fcis/cfgs/resnet_v1_101_coco_fcis_end2end_ohem.yaml', ignore_cache=False, shuffle=False, thresh=0.001, vis=False) {'BINARY_THRESH': 0.4, 'CLASS_AGNOSTIC': True, 'MASK_SIZE': 21, 'MXNET_VERSION': 'mxnet', 'SCALES': [(600, 1000)], 'TEST': {'BATCH_IMAGES': 1, 'CXX_PROPOSAL': False, 'HAS_RPN': True, 'ITER': 2, 'MASK_MERGE_THRESH': 0.5, 'MIN_DROP_SIZE': 2, 'NMS': 0.3, 'PROPOSAL_MIN_SIZE': 2, 'PROPOSAL_NMS_THRESH': 0.7, 'PROPOSAL_POST_NMS_TOP_N': 2000, 'PROPOSAL_PRE_NMS_TOP_N': 20000, 'RPN_MIN_SIZE': 2, 'RPN_NMS_THRESH': 0.7, 'RPN_POST_NMS_TOP_N': 300, 'RPN_PRE_NMS_TOP_N': 6000, 'USE_GPU_MASK_MERGE': True, 'USE_MASK_MERGE': True, 'test_epoch': 8}, 'TRAIN': {'ASPECT_GROUPING': True, 'BATCH_IMAGES': 1, 'BATCH_ROIS': -1, 'BATCH_ROIS_OHEM': 128, 'BBOX_MEANS': [0.0, 0.0, 0.0, 0.0], 'BBOX_NORMALIZATION_PRECOMPUTED': True, 'BBOX_REGRESSION_THRESH': 0.5, 'BBOX_STDS': [0.2, 0.2, 0.5, 0.5], 'BBOX_WEIGHTS': array([ 1., 1., 1., 1.]), 'BG_THRESH_HI': 0.5, 'BG_THRESH_LO': 0, 'BINARY_THRESH': 0.4, 'CONVNEW3': True, 'CXX_PROPOSAL': False, 'ENABLE_OHEM': True, 'END2END': True, 'FG_FRACTION': 0.25, 'FG_THRESH': 0.5, 'FLIP': True, 'GAP_SELECT_FROM_ALL': False, 'IGNORE_GAP': False, 'LOSS_WEIGHT': [1.0, 10.0, 1.0], 'RESUME': False, 'RPN_ALLOWED_BORDER': 0, 'RPN_BATCH_SIZE': 256, 'RPN_BBOX_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 'RPN_CLOBBER_POSITIVES': False, 'RPN_FG_FRACTION': 0.5, 'RPN_MIN_SIZE': 2, 'RPN_NEGATIVE_OVERLAP': 0.3, 'RPN_NMS_THRESH': 0.7, 'RPN_POSITIVE_OVERLAP': 0.7, 'RPN_POSITIVE_WEIGHT': -1.0, 'RPN_POST_NMS_TOP_N': 300, 'RPN_PRE_NMS_TOP_N': 6000, 'SHUFFLE': True, 'begin_epoch': 0, 'end_epoch': 8, 'lr': 0.0005, 'lr_step': '5.33', 'model_prefix': 'e2e', 'momentum': 0.9, 'warmup': True, 'warmup_lr': 5e-05, 'warmup_step': 250, 'wd': 0.0005}, 'dataset': {'NUM_CLASSES': 81, 'dataset': 'coco', 'dataset_path': './data/coco', 'image_set': 'train2014+valminusminival2014', 'proposal': 'rpn', 'root_path': './data', 'test_image_set': 'minival2014'}, 'default': {'frequent': 100, 'kvstore': 'device'}, 'gpus': '0', 'network': {'ANCHOR_RATIOS': [0.5, 1, 2], 'ANCHOR_SCALES': [4, 8, 16, 32], 'FIXED_PARAMS': ['conv1', 'bn_conv1', 'res2', 'bn2', 'gamma', 'beta'], 'FIXED_PARAMS_SHARED': ['conv1', 'bn_conv1', 'res2', 'bn2', 'res3', 'bn3', 'res4', 'bn4', 'gamma', 'beta'], 'IMAGE_STRIDE': 0, 'NUM_ANCHORS': 12, 'PIXEL_MEANS': array([ 103.06, 115.9 , 123.15]), 'RCNN_FEAT_STRIDE': 16, 'RPN_FEAT_STRIDE': 16, 'pretrained': './model/pretrained_model/resnet_v1_101', 'pretrained_epoch': 0}, 'output_path': './output/fcis/coco', 'symbol': 'resnet_v1_101_fcis'} loading annotations into memory... Done (t=0.47s) creating index... index created! num_images 5000 prepare gt_sdsdb using 1.46799492836 seconds generate cache_seg_inst using 59.4011361599 seconds testing 1/5000 data 0.0000s net 0.5483s post 0.1172s testing 2/5000 data 0.0002s net 0.5420s post 0.1336s testing 3/5000 data 0.0002s net 0.4863s post 0.1653s testing 4/5000 data 0.0002s net 0.4686s post 0.1625s testing 5/5000 data 0.0002s net 0.4714s post 0.1491s testing 6/5000 data 0.0002s net 0.5424s post 0.1557s testing 7/5000 data 0.0002s net 0.5172s post 0.1454s testing 8/5000 data 0.0002s net 0.4875s post 0.1205s testing 9/5000 data 0.0002s net 0.5642s post 0.0951s testing 10/5000 data 0.0002s net 0.5434s post 0.0537s testing 11/5000 data 0.0002s net 0.5247s post 0.1334s testing 12/5000 data 0.0002s net 0.4702s post 0.1176s testing 13/5000 data 0.0002s net 0.5445s post 0.0886s testing 14/5000 data 0.0002s net 0.5181s post 0.1277s testing 15/5000 data 0.0002s net 0.5344s post 0.1255s testing 16/5000 data 0.0002s net 0.5619s post 0.1394s testing 17/5000 data 0.0002s net 0.4823s post 0.0381s testing 18/5000 data 0.0002s net 0.5213s post 0.1499s testing 19/5000 data 0.0002s net 0.5446s post 0.1506s testing 20/5000 data 0.0002s net 0.4844s post 0.1120s testing 21/5000 data 0.0002s net 0.4683s post 0.1378s testing 22/5000 data 0.0002s net 0.5321s post 0.1471s testing 23/5000 data 0.0002s net 0.4832s post 0.1219s testing 24/5000 data 0.0003s net 0.4679s post 0.1161s testing 25/5000 data 0.0002s net 0.5483s post 0.1896s testing 26/5000 data 0.0002s net 0.4867s post 0.1112s .............................. testing 4987/5000 data 0.0002s net 0.4905s post 0.1353s testing 4988/5000 data 0.0002s net 0.5457s post 0.1393s testing 4989/5000 data 0.0002s net 0.5158s post 0.0691s testing 4990/5000 data 0.0002s net 0.4863s post 0.1257s testing 4991/5000 data 0.0002s net 0.4861s post 0.1124s testing 4992/5000 data 0.0002s net 0.4891s post 0.1937s testing 4993/5000 data 0.0002s net 0.5489s post 0.0714s testing 4994/5000 data 0.0002s net 0.4860s post 0.1524s testing 4995/5000 data 0.0002s net 0.4091s post 0.0633s testing 4996/5000 data 0.0002s net 0.4887s post 0.1297s testing 4997/5000 data 0.0002s net 0.5446s post 0.1039s testing 4998/5000 data 0.0002s net 0.4870s post 0.1028s testing 4999/5000 data 0.0002s net 0.4695s post 0.1214s testing 5000/5000 data 0.0002s net 0.5398s post 0.1317s

Never finish.

gaopeng-eugene commented 7 years ago

It seems that the problem is in lib/dataset/coco.py line 371: results = pool.map(coco_results_one_category_kernel, data_pack)

gaopeng-eugene commented 7 years ago

My solution right now is folllowing : coco.py line 371 : replace pool = mp.Pool(mp.cpu_count()) results = pool.map(coco_results_one_category_kernel, data_pack) pool.close() pool.join()

with results = [] for i in range(len(data_pack)): results.append(coco_results_one_category_kernel(data_pack[i]))

Thank you for your amazing code

liyi14 commented 7 years ago

Hi, @gaopeng-eugene , were you running it in debug mode?

gaopeng-eugene commented 7 years ago

no

mariolew commented 7 years ago

@gaopeng-eugene Hi, I also encounter the same problem with you, but after I change the code as you provided, the testing process becomes very very slow....Extremely slow...And I still don't know why the original code never finish testing.

gaopeng-eugene commented 7 years ago

Of course, my code running in single process while the original code running in multiple process. My guess is there is deadlock in the original code.

pqviet commented 7 years ago

Replacing "import multiprocessing" in coco.py by "import multiprocessing.dummy" fixed the problem.

xingbowei commented 7 years ago

I don't know what the reason is not going down, can someone help me to have a look? Thank you very much!

xbw@xbw-P65xRP:~/FCIS$ python experiments/fcis/fcis_end2end_train_test.py --cfg experiments/fcis/cfgs/resnet_v1_101_coco_fcis_end2end_ohem.yaml Called with argument: Namespace(cfg='experiments/fcis/cfgs/resnet_v1_101_coco_fcis_end2end_ohem.yaml', frequent=100) {'BINARY_THRESH': 0.4, 'CLASS_AGNOSTIC': True, 'MASK_SIZE': 21, 'MXNET_VERSION': 'mxnet', 'SCALES': [(600, 1000)], 'TEST': {'BATCH_IMAGES': 1, 'CXX_PROPOSAL': False, 'HAS_RPN': True, 'ITER': 2, 'MASK_MERGE_THRESH': 0.5, 'MIN_DROP_SIZE': 2, 'NMS': 0.3, 'PROPOSAL_MIN_SIZE': 2, 'PROPOSAL_NMS_THRESH': 0.7, 'PROPOSAL_POST_NMS_TOP_N': 2000, 'PROPOSAL_PRE_NMS_TOP_N': 20000, 'RPN_MIN_SIZE': 2, 'RPN_NMS_THRESH': 0.7, 'RPN_POST_NMS_TOP_N': 300, 'RPN_PRE_NMS_TOP_N': 6000, 'USE_GPU_MASK_MERGE': True, 'USE_MASK_MERGE': True, 'test_epoch': 8}, 'TRAIN': {'ASPECT_GROUPING': True, 'BATCH_IMAGES': 1, 'BATCH_ROIS': -1, 'BATCH_ROIS_OHEM': 128, 'BBOX_MEANS': [0.0, 0.0, 0.0, 0.0], 'BBOX_NORMALIZATION_PRECOMPUTED': True, 'BBOX_REGRESSION_THRESH': 0.5, 'BBOX_STDS': [0.2, 0.2, 0.5, 0.5], 'BBOX_WEIGHTS': array([ 1., 1., 1., 1.]), 'BG_THRESH_HI': 0.5, 'BG_THRESH_LO': 0, 'BINARY_THRESH': 0.4, 'CONVNEW3': True, 'CXX_PROPOSAL': False, 'ENABLE_OHEM': True, 'END2END': True, 'FG_FRACTION': 0.25, 'FG_THRESH': 0.5, 'FLIP': True, 'GAP_SELECT_FROM_ALL': False, 'IGNORE_GAP': False, 'LOSS_WEIGHT': [1.0, 10.0, 1.0], 'RESUME': False, 'RPN_ALLOWED_BORDER': 0, 'RPN_BATCH_SIZE': 256, 'RPN_BBOX_WEIGHTS': [1.0, 1.0, 1.0, 1.0], 'RPN_CLOBBER_POSITIVES': False, 'RPN_FG_FRACTION': 0.5, 'RPN_MIN_SIZE': 2, 'RPN_NEGATIVE_OVERLAP': 0.3, 'RPN_NMS_THRESH': 0.7, 'RPN_POSITIVE_OVERLAP': 0.7, 'RPN_POSITIVE_WEIGHT': -1.0, 'RPN_POST_NMS_TOP_N': 300, 'RPN_PRE_NMS_TOP_N': 6000, 'SHUFFLE': True, 'begin_epoch': 0, 'end_epoch': 8, 'lr': 0.0005, 'lr_step': '5.33', 'model_prefix': 'e2e', 'momentum': 0.9, 'warmup': True, 'warmup_lr': 5e-05, 'warmup_step': 250, 'wd': 0.0005}, 'dataset': {'NUM_CLASSES': 81, 'dataset': 'coco', 'dataset_path': './data/coco', 'image_set': 'train2014+valminusminival2014', 'proposal': 'rpn', 'root_path': './data', 'test_image_set': 'test-dev2015'}, 'default': {'frequent': 100, 'kvstore': 'device'}, 'gpus': '0,1,2,3,4,5,6,7', 'network': {'ANCHOR_RATIOS': [0.5, 1, 2], 'ANCHOR_SCALES': [4, 8, 16, 32], 'FIXED_PARAMS': ['conv1', 'bn_conv1', 'res2', 'bn2', 'gamma', 'beta'], 'FIXED_PARAMS_SHARED': ['conv1', 'bn_conv1', 'res2', 'bn2', 'res3', 'bn3', 'res4', 'bn4', 'gamma', 'beta'], 'IMAGE_STRIDE': 0, 'NUM_ANCHORS': 12, 'PIXEL_MEANS': array([ 103.06, 115.9 , 123.15]), 'RCNN_FEAT_STRIDE': 16, 'RPN_FEAT_STRIDE': 16, 'pretrained': './model/pretrained_model/resnet_v1_101', 'pretrained_epoch': 0}, 'output_path': './output/fcis/coco', 'symbol': 'resnet_v1_101_fcis'} loading annotations into memory... Done (t=12.97s) creating index... index created! num_images 82783 prepare gt_sdsdb using 30.2918889523 seconds

hyzwj commented 6 years ago

just wait ,it will take a long time to prepare image for train.