khanrc / pt.darts

PyTorch Implementation of DARTS: Differentiable Architecture Search
MIT License
439 stars 108 forks source link

AssertionError: can only join a child process #8

Open twangnh opened 5 years ago

twangnh commented 5 years ago

Hi @khanrc, could you pls have a look, thank you!

####### ALPHA #######
# Alpha - normal
tensor([[0.1218, 0.0975, 0.1138, 0.1351, 0.1548, 0.1235, 0.1320, 0.1215],
        [0.1139, 0.1003, 0.1164, 0.1334, 0.1465, 0.1337, 0.1122, 0.1436]],
       device='cuda:3', grad_fn=<SoftmaxBackward>)
tensor([[0.1214, 0.0977, 0.1129, 0.1615, 0.1369, 0.1158, 0.1270, 0.1268],
        [0.1156, 0.1016, 0.1173, 0.1353, 0.1403, 0.1248, 0.1239, 0.1412],
        [0.1093, 0.0945, 0.1168, 0.1412, 0.1366, 0.1320, 0.1244, 0.1452]],
       device='cuda:3', grad_fn=<SoftmaxBackward>)
tensor([[0.1237, 0.0983, 0.1121, 0.1561, 0.1244, 0.1320, 0.1224, 0.1310],
        [0.1165, 0.1013, 0.1166, 0.1399, 0.1313, 0.1222, 0.1181, 0.1541],
        [0.1115, 0.0964, 0.1188, 0.1337, 0.1248, 0.1320, 0.1303, 0.1525],
        [0.1046, 0.0929, 0.1095, 0.1312, 0.1279, 0.1366, 0.1376, 0.1597]],
       device='cuda:3', grad_fn=<SoftmaxBackward>)
tensor([[0.1203, 0.0975, 0.1106, 0.1398, 0.1291, 0.1238, 0.1283, 0.1506],
        [0.1149, 0.1007, 0.1136, 0.1293, 0.1449, 0.1309, 0.1268, 0.1389],
        [0.1023, 0.0916, 0.1123, 0.1368, 0.1317, 0.1337, 0.1295, 0.1621],
        [0.0976, 0.0884, 0.1021, 0.1404, 0.1391, 0.1330, 0.1297, 0.1698],
        [0.0925, 0.0855, 0.0926, 0.1443, 0.1380, 0.1312, 0.1340, 0.1819]],
       device='cuda:3', grad_fn=<SoftmaxBackward>)

# Alpha - reduce
tensor([[0.1393, 0.1220, 0.1175, 0.1296, 0.1360, 0.1224, 0.1201, 0.1131],
        [0.1237, 0.1151, 0.1221, 0.1237, 0.1376, 0.1152, 0.1246, 0.1379]],
       device='cuda:3', grad_fn=<SoftmaxBackward>)
tensor([[0.1412, 0.1234, 0.1178, 0.1322, 0.1343, 0.1160, 0.1214, 0.1138],
        [0.1200, 0.1119, 0.1241, 0.1250, 0.1303, 0.1242, 0.1288, 0.1358],
        [0.1238, 0.1044, 0.1233, 0.1229, 0.1382, 0.1205, 0.1391, 0.1277]],
       device='cuda:3', grad_fn=<SoftmaxBackward>)
tensor([[0.1366, 0.1207, 0.1217, 0.1282, 0.1400, 0.1193, 0.1187, 0.1148],
        [0.1253, 0.1168, 0.1290, 0.1238, 0.1278, 0.1233, 0.1187, 0.1354],
        [0.1167, 0.1024, 0.1208, 0.1316, 0.1365, 0.1304, 0.1352, 0.1264],
        [0.1172, 0.1006, 0.1191, 0.1271, 0.1343, 0.1302, 0.1401, 0.1314]],
       device='cuda:3', grad_fn=<SoftmaxBackward>)
tensor([[0.1443, 0.1285, 0.1273, 0.1274, 0.1273, 0.1195, 0.1124, 0.1133],
        [0.1248, 0.1172, 0.1232, 0.1251, 0.1308, 0.1228, 0.1199, 0.1360],
        [0.1244, 0.1066, 0.1240, 0.1254, 0.1320, 0.1290, 0.1335, 0.1251],
        [0.1219, 0.1057, 0.1230, 0.1340, 0.1291, 0.1253, 0.1327, 0.1284],
        [0.1173, 0.1051, 0.1216, 0.1347, 0.1382, 0.1252, 0.1289, 0.1289]],
       device='cuda:3', grad_fn=<SoftmaxBackward>)
#####################
04/05 09:25:34 AM | Train: [10/50] Step 000/390 Loss 0.460 Prec@(1,5) (87.5%, 98.4%)
04/05 09:30:35 AM | Train: [10/50] Step 050/390 Loss 0.436 Prec@(1,5) (85.2%, 99.3%)
04/05 09:35:18 AM | Train: [10/50] Step 100/390 Loss 0.445 Prec@(1,5) (84.7%, 99.1%)
04/05 09:40:06 AM | Train: [10/50] Step 150/390 Loss 0.449 Prec@(1,5) (84.4%, 99.2%)
04/05 09:44:48 AM | Train: [10/50] Step 200/390 Loss 0.460 Prec@(1,5) (84.1%, 99.2%)
04/05 09:49:28 AM | Train: [10/50] Step 250/390 Loss 0.464 Prec@(1,5) (83.8%, 99.2%)
04/05 09:54:09 AM | Train: [10/50] Step 300/390 Loss 0.463 Prec@(1,5) (84.0%, 99.3%)
04/05 09:58:48 AM | Train: [10/50] Step 350/390 Loss 0.465 Prec@(1,5) (83.9%, 99.3%)
04/05 10:02:31 AM | Train: [10/50] Step 390/390 Loss 0.469 Prec@(1,5) (83.8%, 99.3%)
04/05 10:02:31 AM | Train: [10/50] Final Prec@1 83.8000%
04/05 10:02:32 AM | Valid: [10/50] Step 000/390 Loss 0.688 Prec@(1,5) (75.0%, 96.9%)
^CTraceback (most recent call last):
  File "/home/wangtao/.pycharm_helpers/pydev/pydevd.py", line 1664, in <module>
    main()
  File "/home/wangtao/.pycharm_helpers/pydev/pydevd.py", line 1658, in main
Traceback (most recent call last):
  File "/home/wangtao/.pycharm_helpers/pydev/_pydevd_bundle/pydevd_comm.py", line 365, in _on_run
    r = self.sock.recv(1024)
    globals = debugger.run(setup['file'], None, None, is_module)
KeyboardInterrupt
  File "/home/wangtao/.pycharm_helpers/pydev/pydevd.py", line 1068, in run
    pydev_imports.execfile(file, globals, locals)  # execute the script
  File "/home/wangtao/.pycharm_helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
    exec(compile(contents+"\n", file, 'exec'), glob, loc)
  File "/home/wangtao/prj/pt.darts/search.py", line 201, in <module>
    main()
  File "/home/wangtao/prj/pt.darts/search.py", line 88, in main
    top1 = validate(valid_loader, model, epoch, cur_step)
  File "/home/wangtao/prj/pt.darts/search.py", line 172, in validate
    for step, (X, y) in enumerate(valid_loader):
  File "/home/wangtao/anaconda2/envs/pytorch1.0py3.5/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 819, in __iter__
    return _DataLoaderIter(self)
  File "/home/wangtao/anaconda2/envs/pytorch1.0py3.5/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 560, in __init__
    w.start()
  File "/home/wangtao/anaconda2/envs/pytorch1.0py3.5/lib/python3.5/multiprocessing/process.py", line 105, in start
    self._popen = self._Popen(self)
  File "/home/wangtao/anaconda2/envs/pytorch1.0py3.5/lib/python3.5/multiprocessing/context.py", line 212, in _Popen
    return _default_context.get_context().Process._Popen(process_obj)
  File "/home/wangtao/anaconda2/envs/pytorch1.0py3.5/lib/python3.5/multiprocessing/context.py", line 267, in _Popen
    return Popen(process_obj)
  File "/home/wangtao/anaconda2/envs/pytorch1.0py3.5/lib/python3.5/multiprocessing/popen_fork.py", line 20, in __init__
    self._launch(process_obj)
  File "/home/wangtao/anaconda2/envs/pytorch1.0py3.5/lib/python3.5/multiprocessing/popen_fork.py", line 67, in _launch
    self.pid = os.fork()
  File "/home/wangtao/.pycharm_helpers/pydev/_pydev_bundle/pydev_monkey.py", line 464, in new_fork
    _on_forked_process()
  File "/home/wangtao/.pycharm_helpers/pydev/_pydev_bundle/pydev_monkey.py", line 50, in _on_forked_process
    pydevd.settrace_forked()
  File "/home/wangtao/.pycharm_helpers/pydev/pydevd.py", line 1445, in settrace_forked
    patch_multiprocessing=True,
  File "/home/wangtao/.pycharm_helpers/pydev/pydevd.py", line 1210, in settrace
    patch_multiprocessing,
  File "/home/wangtao/.pycharm_helpers/pydev/pydevd.py", line 1254, in _locked_settrace
    debugger.connect(host, port)  # Note: connect can raise error.
  File "/home/wangtao/.pycharm_helpers/pydev/pydevd.py", line 328, in connect
    self.initialize_network(s)
  File "/home/wangtao/.pycharm_helpers/pydev/pydevd.py", line 320, in initialize_network
    time.sleep(0.1)  # give threads time to start
KeyboardInterrupt
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
  File "/home/wangtao/anaconda2/envs/pytorch1.0py3.5/lib/python3.5/multiprocessing/util.py", line 319, in _exit_function
    p.join()
  File "/home/wangtao/anaconda2/envs/pytorch1.0py3.5/lib/python3.5/multiprocessing/process.py", line 122, in join
    assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process