Open yeungkong opened 2 years ago
You can still use python tools/train.py config
to train models.
I follow the tutorial to start a train.
from mmcv import Config, mkdir_or_exist
from os.path import abspath
cfg = Config.fromfile('./configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py')
from mmdet.apis import set_random_seed
# Modify dataset type and path
cfg.dataset_type = 'CocoDataset'
cfg.data_root = 'data/'
cfg.data.test.type = 'CocoDataset'
cfg.data.test.data_root = 'data/'
cfg.data.test.ann_file = 'coco/annotations/instances_train2017.json'
cfg.data.test.img_prefix = 'coco/train2017'
cfg.data.train.type = 'CocoDataset'
cfg.data.train.data_root = 'data/'
cfg.data.train.ann_file = 'coco/annotations/instances_train2017.json'
cfg.data.train.img_prefix = 'coco/train2017'
cfg.data.val.type = 'CocoDataset'
cfg.data.val.data_root = 'data/'
cfg.data.val.ann_file = 'coco/annotations/instances_train2017.json'
cfg.data.val.img_prefix = 'coco/train2017'
# modify num classes of the model in box head
cfg.model.roi_head.bbox_head.num_classes = 80
# If we need to finetune a model based on a pre-trained detector, we need to
# use load_from to set the path of checkpoints.
# cfg.load_from = 'checkpoints/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth'
cfg.load_from = None
# Set up working dir to save files and logs.
cfg.work_dir = './tutorial_exps'
# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
cfg.optimizer.lr = 0.02 / 8
cfg.lr_config.warmup = None
cfg.log_config.interval = 10
# Change the evaluation metric since we use customized dataset.
cfg.evaluation.metric = 'mAP'
# We can set the evaluation interval to reduce the evaluation times
cfg.evaluation.interval = 12
# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 12
# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)
# We can also use tensorboard to log the training process
cfg.log_config.hooks = [
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')]
# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector
# Build dataset
datasets = [build_dataset(cfg.data.train)]
# Build the detector
model = build_detector(cfg.model)
# Add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES
# Create work_dir
mkdir_or_exist(abspath(cfg.work_dir))
train_detector(model, datasets, cfg, distributed=False, validate=True)
The following error occurs.
Traceback (most recent call last):
File "<string>", line 1, in <module>
Traceback (most recent call last):
File "D:/XXXXX/mmdetection/test_train.py", line 79, in <module>
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\spawn.py", line 105, in spawn_main
train_detector(model, datasets, cfg, distributed=False, validate=True)
File "D:\XXXXX\mmdetection\mmdet\apis\train.py", line 208, in train_detector
exitcode = _main(fd)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\spawn.py", line 114, in _main
prepare(preparation_data)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\spawn.py", line 225, in prepare
runner.run(data_loaders, cfg.workflow)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\site-packages\mmcv\runner\epoch_based_runner.py", line 127, in run
_fixup_main_from_path(data['init_main_from_path'])
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\spawn.py", line 277, in _fixup_main_from_path
epoch_runner(data_loaders[i], **kwargs)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\site-packages\mmcv\runner\epoch_based_runner.py", line 47, in train
run_name="__mp_main__")
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\runpy.py", line 263, in run_path
for i, data_batch in enumerate(self.data_loader):
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\site-packages\torch\utils\data\dataloader.py", line 368, in __iter__
pkg_name=pkg_name, script_name=fname)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\runpy.py", line 85, in _run_code
return self._get_iterator()
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\site-packages\torch\utils\data\dataloader.py", line 314, in _get_iterator
exec(code, run_globals)
File "D:\XXXXX\mmdetection\test_train.py", line 79, in <module>
train_detector(model, datasets, cfg, distributed=False, validate=True)
File "D:\XXXXX\mmdetection\mmdet\apis\train.py", line 208, in train_detector
return _MultiProcessingDataLoaderIter(self)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\site-packages\torch\utils\data\dataloader.py", line 927, in __init__
runner.run(data_loaders, cfg.workflow)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\site-packages\mmcv\runner\epoch_based_runner.py", line 127, in run
epoch_runner(data_loaders[i], **kwargs)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\site-packages\mmcv\runner\epoch_based_runner.py", line 47, in train
for i, data_batch in enumerate(self.data_loader):
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\site-packages\torch\utils\data\dataloader.py", line 368, in __iter__
return self._get_iterator()
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\site-packages\torch\utils\data\dataloader.py", line 314, in _get_iterator
w.start()
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\process.py", line 112, in start
return _MultiProcessingDataLoaderIter(self)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\site-packages\torch\utils\data\dataloader.py", line 927, in __init__
self._popen = self._Popen(self)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\popen_spawn_win32.py", line 89, in __init__
w.start()
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\process.py", line 112, in start
reduction.dump(process_obj, to_child)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\reduction.py", line 60, in dump
self._popen = self._Popen(self)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\context.py", line 223, in _Popen
ForkingPickler(file, protocol).dump(obj)
BrokenPipeError: [Errno 32] Broken pipe
return _default_context.get_context().Process._Popen(process_obj)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\popen_spawn_win32.py", line 46, in __init__
prep_data = spawn.get_preparation_data(process_obj._name)
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\spawn.py", line 143, in get_preparation_data
_check_not_importing_main()
File "D:\Program_Files\Anaconda3\envs\XXXXX\lib\multiprocessing\spawn.py", line 136, in _check_not_importing_main
is not going to be frozen to produce an executable.''')
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
Could you tell me what is wrong? I am using version 2.23.0 and pytorch 1.11
pytorch: 1.8.1+cu111 mmDet: 2.25.3 cuda 11.1
I had the same exact problem and other posts Suggestions didn't work for me.
Dear mmdetection users, I am used to use mmdetection v1.0.0 and now I am struggling to use mmdetection v2.x.x. In the old version, I can use the command python tools/train.py configs/xxxx.py to train my model. In newer version, I find the implementation codes have become more complicated. Can I use the old command to train a model? Thank you very much.
Have you solved it?
I have fix the issue,reflecting on my lack of familiarity with torch. solution: cfg.data.workers_per_gpu = 0
if occur further error: RuntimeError: Index put requires the source and destination dtypes match, got Long for the destination and int for the source. solution: please change “np.long” to “np.nt64” at the class “KittiTinyDataset”.
Dear mmdetection users, I am used to use mmdetection v1.0.0 and now I am struggling to use mmdetection v2.x.x. In the old version, I can use the command python tools/train.py configs/xxxx.py to train my model. In newer version, I find the implementation codes have become more complicated. Can I use the old command to train a model? Thank you very much.