Open nhatduy19599 opened 3 years ago
plz install the gpu version of mxnet
i tried installing each version of mxnet-cu80 or mxnet-cu100# 101 and also mxnet but when running the executable still gives me the above error.
Traceback (most recent call last): File "C:\Users\nhatd\AppData\Local\Programs\Python\Python36\lib\site-packages\mxnet\symbol\symbol.py", line 1623, in simple_bind ctypes.byref(exe_handle))) File "C:\Users\nhatd\AppData\Local\Programs\Python\Python36\lib\site-packages\mxnet\base.py", line 253, in check_call raise MXNetError(py_str(_LIB.MXGetLastError())) mxnet.base.MXNetError: [15:24:05] c:\jenkins\workspace\mxnet-tag\mxnet\src\common../common/cuda_utils.h:310: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading: CUDA: invalid device ordinal
During handling of the above exception, another exception occurred:
How should I deal with this error? thank you.
What is the problem I am having here? thanks my cuda version 10.1, i install . mxnet 1.7.0 post 2 and mxnet-cu101
plz install the gpu version of mxnet how to specify the pretrainmodel param in the trainsoftmax.py?
Me when running execute the command CUDA_VISIBLE_DEVICES='0,1,2,3' python -u train.py --network r100 --loss arcface --dataset emore. Error:
python -u train.py --network r100 --loss arcface --dataset emore gpu num: 4 prefix ./models\r100-arcface-emore\model image_size [112, 112] num_classes 85742 Called with argument: Namespace(batch_size=512, ckpt=3, ctx_num=4, dataset='emore', frequent=20, image_channel=3, kvstore='device', loss='arcface', lr=0.1, lr_steps='100000,160000,220000', models_root='./models', mom=0.9, network='r100', per_batch_size=128, pretrained='', pretrained_epoch=1, rescale_threshold=0, verbose=2000, wd=0.0005) {'bn_mom': 0.9, 'workspace': 256, 'emb_size': 512, 'ckpt_embedding': True, 'net_se': 0, 'net_act': 'prelu', 'net_unit': 3, 'net_input': 1, 'net_blocks': [1, 4, 6, 2], 'net_output': 'E', 'net_multiplier': 1.0, 'val_targets': ['lfw', 'cfp_fp', 'agedb_30'], 'ce_loss': True, 'fc7_lr_mult': 1.0, 'fc7_wd_mult': 1.0, 'fc7_no_bias': False, 'max_steps': 0, 'data_rand_mirror': True, 'data_cutoff': False, 'data_color': 0, 'data_images_filter': 0, 'count_flops': True, 'memonger': False, 'is_shuffled_rec': False, 'fp16': False, 'loss_name': 'margin_softmax', 'loss_s': 64.0, 'loss_m1': 1.0, 'loss_m2': 0.5, 'loss_m3': 0.0, 'net_name': 'fresnet', 'num_layers': 100, 'dataset': 'emore', 'dataset_path': '../datasets/faces_emore', 'num_classes': 85742, 'image_shape': [112, 112, 3], 'loss': 'arcface', 'network': 'r100', 'num_workers': 1, 'batch_size': 512, 'per_batch_size': 128} 0 1 E 3 prelu False Network FLOPs: 24.2G loading: ../datasets/faces_emore\train.rec False INFO:root:loading recordio ../datasets/faces_emore\train.rec... header0 label [5822654. 5908396.] id2range 85742 5822653 rand_mirror True call reset() loading bin 0 loading bin 1000 loading bin 2000 loading bin 3000 loading bin 4000 loading bin 5000 loading bin 6000 loading bin 7000 loading bin 8000 loading bin 9000 loading bin 10000 loading bin 11000 (12000, 3, 112, 112) ver lfw loading bin 0 loading bin 1000 loading bin 2000 loading bin 3000 loading bin 4000 loading bin 5000 loading bin 6000 loading bin 7000 loading bin 8000 loading bin 9000 loading bin 10000 loading bin 11000 loading bin 12000 loading bin 13000 (14000, 3, 112, 112) ver cfp_fp loading bin 0 loading bin 1000 loading bin 2000 loading bin 3000 loading bin 4000 loading bin 5000 loading bin 6000 loading bin 7000 loading bin 8000 loading bin 9000 loading bin 10000 loading bin 11000 (12000, 3, 112, 112) ver agedb_30 lr_steps [100000, 160000, 220000] Traceback (most recent call last): File "C:\Users\nhatd\AppData\Local\Programs\Python\Python36\lib\site-packages\mxnet\symbol\symbol.py", line 1832, in simple_bind ctypes.byref(exe_handle))) File "C:\Users\nhatd\AppData\Local\Programs\Python\Python36\lib\site-packages\mxnet\base.py", line 246, in check_call raise get_last_ffi_error() mxnet.base.MXNetError: Traceback (most recent call last): File "C:\Jenkins\workspace\mxnet-tag\mxnet\src\storage\storage.cc", line 119 MXNetError: Compile with USE_CUDA=1 to enable GPU usage
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "train.py", line 485, in
main()
File "train.py", line 481, in main
train_net(args)
File "train.py", line 475, in train_net
epoch_end_callback=epoch_cb)
File "C:\Users\nhatd\AppData\Local\Programs\Python\Python36\lib\site-packages\mxnet\module\base_module.py", line 498, in fit
for_training=True, force_rebind=force_rebind)
File "C:\Users\nhatd\AppData\Local\Programs\Python\Python36\lib\site-packages\mxnet\module\module.py", line 429, in bind
state_names=self._state_names)
File "C:\Users\nhatd\AppData\Local\Programs\Python\Python36\lib\site-packages\mxnet\module\executor_group.py", line 280, in init
self.bind_exec(data_shapes, label_shapes, shared_group)
File "C:\Users\nhatd\AppData\Local\Programs\Python\Python36\lib\site-packages\mxnet\module\executor_group.py", line 384, in bind_exec
shared_group))
File "C:\Users\nhatd\AppData\Local\Programs\Python\Python36\lib\site-packages\mxnet\module\executor_group.py", line 678, in _bind_ith_exec
shared_buffer=shared_data_arrays, **input_shapes)
File "C:\Users\nhatd\AppData\Local\Programs\Python\Python36\lib\site-packages\mxnet\symbol\symbol.py", line 1838, in simple_bind
raise RuntimeError(error_msg)
RuntimeError: simple_bind error. Arguments:
data: (128, 3, 112, 112)
softmax_label: (128,)
Traceback (most recent call last):
File "C:\Jenkins\workspace\mxnet-tag\mxnet\src\storage\storage.cc", line 119
MXNetError: Compile with USE_CUDA=1 to enable GPU usage
Can you help me see what's wrong with it?. Thank you