msracver / Flow-Guided-Feature-Aggregation

Flow-Guided Feature Aggregation for Video Object Detection
MIT License
723 stars 190 forks source link

Fail to open output .params file when the training begins #28

Closed painterdrown closed 6 years ago

painterdrown commented 6 years ago

The version of mxnet is v0.10.0 I managed to run the demo, but when I try to train, an error occurs:

Namespace(cfg='experiments/fgfa_rfcn/cfgs/resnet_v1_101_flownet_imagenet_vid_rfcn_end2end_ohem.yaml', ignore_cache=False, shuffle=False, thresh=0.001, vis=False)
{'CLASS_AGNOSTIC': True,
 'MXNET_VERSION': '',
 'SCALES': [(600, 1000)],
 'TEST': {'BATCH_IMAGES': 1,
          'CXX_PROPOSAL': True,
          'HAS_RPN': True,
          'KEY_FRAME_INTERVAL': 9,
          'NMS': 0.3,
          'RPN_MIN_SIZE': 0,
          'RPN_NMS_THRESH': 0.7,
          'RPN_POST_NMS_TOP_N': 300,
          'RPN_PRE_NMS_TOP_N': 6000,
          'SEQ_NMS': False,
          'max_per_image': 300,
          'test_epoch': 2},
 'TRAIN': {'ASPECT_GROUPING': True,
           'BATCH_IMAGES': 1,
           'BATCH_ROIS': -1,
           'BATCH_ROIS_OHEM': 128,
           'BBOX_MEANS': [0.0, 0.0, 0.0, 0.0],
           'BBOX_NORMALIZATION_PRECOMPUTED': True,
           'BBOX_REGRESSION_THRESH': 0.5,
           'BBOX_STDS': [0.1, 0.1, 0.2, 0.2],
           'BBOX_WEIGHTS': array([1., 1., 1., 1.]),
           'BG_THRESH_HI': 0.5,
           'BG_THRESH_LO': 0.0,
           'CXX_PROPOSAL': True,
           'ENABLE_OHEM': True,
           'END2END': True,
           'FG_FRACTION': 0.25,
           'FG_THRESH': 0.5,
           'FLIP': True,
           'MAX_OFFSET': 9,
           'MIN_OFFSET': -9,
           'RESUME': False,
           'RPN_BATCH_SIZE': 256,
           'RPN_BBOX_WEIGHTS': [1.0, 1.0, 1.0, 1.0],
           'RPN_CLOBBER_POSITIVES': False,
           'RPN_FG_FRACTION': 0.5,
           'RPN_MIN_SIZE': 0,
           'RPN_NEGATIVE_OVERLAP': 0.3,
           'RPN_NMS_THRESH': 0.7,
           'RPN_POSITIVE_OVERLAP': 0.7,
           'RPN_POSITIVE_WEIGHT': -1.0,
           'RPN_POST_NMS_TOP_N': 300,
           'RPN_PRE_NMS_TOP_N': 6000,
           'SHUFFLE': True,
           'begin_epoch': 0,
           'end_epoch': 2,
           'lr': 0.00025,
           'lr_factor': 0.1,
           'lr_step': '1.333',
           'model_prefix': 'fgfa_rfcn_vid',
           'momentum': 0.9,
           'warmup': False,
           'warmup_lr': 0,
           'warmup_step': 0,
           'wd': 0.0005},
 'dataset': {'NUM_CLASSES': 31,
             'dataset': 'ImageNetVID',
             'dataset_path': './data/ILSVRC2015',
             'enable_detailed_eval': True,
             'image_set': 'DET_train_30classes+VID_train_15frames',
             'motion_iou_path': './lib/dataset/imagenet_vid_groundtruth_motion_iou.mat',
             'proposal': 'rpn',
             'root_path': './data',
             'test_image_set': 'VID_val_videos'},
 'default': {'frequent': 100, 'kvstore': 'device'},
 'gpus': '0,1,2,3',
 'network': {'ANCHOR_MEANS': [0.0, 0.0, 0.0, 0.0],
             'ANCHOR_RATIOS': [0.5, 1, 2],
             'ANCHOR_SCALES': [8, 16, 32],
             'ANCHOR_STDS': [0.1, 0.1, 0.4, 0.4],
             'FGFA_FEAT_DIM': 3072,
             'FIXED_PARAMS': ['conv1', 'res2', 'bn'],
             'IMAGE_STRIDE': 0,
             'NORMALIZE_RPN': True,
             'NUM_ANCHORS': 9,
             'PIXEL_MEANS': array([103.06, 115.9 , 123.15]),
             'RCNN_FEAT_STRIDE': 16,
             'RPN_FEAT_STRIDE': 16,
             'pretrained': './model/pretrained_model/resnet_v1_101',
             'pretrained_epoch': 0,
             'pretrained_flow': './model/pretrained_model/flownet'},
 'output_path': './output/fgfa_rfcn/imagenet_vid',
 'symbol': 'resnet_v1_101_flownet_rfcn'}
num_images 555
ImageNetVID_VID_val_videos gt roidb loaded from ./data/cache/ImageNetVID_VID_val_videos_gt_roidb.pkl
[12:10:52] include/dmlc/logging.h:304: [12:10:52] src/io/local_filesys.cc:154: Check failed: allow_null  LocalFileSystem: fail to open "./output/fgfa_rfcn/imagenet_vid/resnet_v1_101_flownet_imagenet_vid_rfcn_end2end_ohem/VID_val_videos/../DET_train_30classes_VID_train_15frames/fgfa_rfcn_vid-0002.params"

Stack trace returned 10 entries:
[bt] (0) /usr/local/lib/python2.7/dist-packages/mxnet-0.10.0-py2.7.egg/mxnet/libmxnet.so(_ZN4dmlc2io15LocalFileSystem4OpenERKNS0_3URIEPKcb+0x640) [0x7f9adb858bf0]
[bt] (1) /usr/local/lib/python2.7/dist-packages/mxnet-0.10.0-py2.7.egg/mxnet/libmxnet.so(_ZN4dmlc6Stream6CreateEPKcS2_b+0x1c6) [0x7f9adb84a446]
[bt] (2) /usr/local/lib/python2.7/dist-packages/mxnet-0.10.0-py2.7.egg/mxnet/libmxnet.so(MXNDArrayLoad+0x4a5) [0x7f9adb7ddc25]
[bt] (3) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call_unix64+0x4c) [0x7f9af9b0de40]
[bt] (4) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call+0x2eb) [0x7f9af9b0d8ab]
[bt] (5) /usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so(_ctypes_callproc+0x48f) [0x7f9af9d1d3df]
[bt] (6) /usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so(+0x11d82) [0x7f9af9d21d82]
[bt] (7) python(PyEval_EvalFrameEx+0x578f) [0x4c15bf]
[bt] (8) python(PyEval_EvalCodeEx+0x306) [0x4b9ab6]
[bt] (9) python(PyEval_EvalFrameEx+0x603f) [0x4c1e6f]

Traceback (most recent call last):
  File "experiments/fgfa_rfcn/fgfa_rfcn_end2end_train_test.py", line 21, in <module>
    test.main()
  File "experiments/fgfa_rfcn/../../fgfa_rfcn/test.py", line 58, in main
    enable_detailed_eval=config.dataset.enable_detailed_eval)
  File "experiments/fgfa_rfcn/../../fgfa_rfcn/function/test_rcnn.py", line 83, in test_rcnn
    arg_params, aux_params = load_param(prefix, epoch, process=True)
  File "experiments/fgfa_rfcn/../../fgfa_rfcn/../lib/utils/load_model.py", line 57, in load_param
    arg_params, aux_params = load_checkpoint(prefix, epoch)
  File "experiments/fgfa_rfcn/../../fgfa_rfcn/../lib/utils/load_model.py", line 23, in load_checkpoint
    save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
  File "/usr/local/lib/python2.7/dist-packages/mxnet-0.10.0-py2.7.egg/mxnet/ndarray.py", line 2100, in load
    ctypes.byref(names)))
  File "/usr/local/lib/python2.7/dist-packages/mxnet-0.10.0-py2.7.egg/mxnet/base.py", line 84, in check_call
    raise MXNetError(py_str(_LIB.MXGetLastError()))
mxnet.base.MXNetError: [12:10:52] src/io/local_filesys.cc:154: Check failed: allow_null  LocalFileSystem: fail to open "./output/fgfa_rfcn/imagenet_vid/resnet_v1_101_flownet_imagenet_vid_rfcn_end2end_ohem/VID_val_videos/../DET_train_30classes_VID_train_15frames/fgfa_rfcn_vid-0002.params"

Stack trace returned 10 entries:
[bt] (0) /usr/local/lib/python2.7/dist-packages/mxnet-0.10.0-py2.7.egg/mxnet/libmxnet.so(_ZN4dmlc2io15LocalFileSystem4OpenERKNS0_3URIEPKcb+0x640) [0x7f9adb858bf0]
[bt] (1) /usr/local/lib/python2.7/dist-packages/mxnet-0.10.0-py2.7.egg/mxnet/libmxnet.so(_ZN4dmlc6Stream6CreateEPKcS2_b+0x1c6) [0x7f9adb84a446]
[bt] (2) /usr/local/lib/python2.7/dist-packages/mxnet-0.10.0-py2.7.egg/mxnet/libmxnet.so(MXNDArrayLoad+0x4a5) [0x7f9adb7ddc25]
[bt] (3) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call_unix64+0x4c) [0x7f9af9b0de40]
[bt] (4) /usr/lib/x86_64-linux-gnu/libffi.so.6(ffi_call+0x2eb) [0x7f9af9b0d8ab]
[bt] (5) /usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so(_ctypes_callproc+0x48f) [0x7f9af9d1d3df]
[bt] (6) /usr/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so(+0x11d82) [0x7f9af9d21d82]
[bt] (7) python(PyEval_EvalFrameEx+0x578f) [0x4c15bf]
[bt] (8) python(PyEval_EvalCodeEx+0x306) [0x4b9ab6]
[bt] (9) python(PyEval_EvalFrameEx+0x603f) [0x4c1e6f]

How can I solve this problem?

painterdrown commented 6 years ago

I found the problem!

The author might forget to uncomment the train code in experiments/fgfa_rfcn/fgfa_rfcn_end2end_train_test:

if __name__ == "__main__":
    # train_end2end.main()
    test.main()

should be uncomment as:

if __name__ == "__main__":
    train_end2end.main()
    test.main()