I keep getting a list index out of range error and I can't seem to solve. Can anyone help
I'm using:
Pytorch 1.10.0
Cudatoolkit 11.3.1
Traceback (most recent call last):
File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, self._kwargs)
File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 26, in mp_run_session
metrics = session.run()
File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 118, in run
self.run_rl()
File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 106, in run_rl
self.agent.update(state, action, reward, next_state, done)
File "/home/harman/SLM-Lab/slm_lab/agent/init.py", line 53, in update
loss = self.algorithm.train()
File "/home/harman/SLM-Lab/slm_lab/agent/algorithm/dqn.py", line 138, in train
loss = self.calc_q_loss(batch)
File "/home/harman/SLM-Lab/slm_lab/agent/algorithm/dqn.py", line 194, in calc_q_loss
q_preds = self.net(states)
File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, *kwargs)
File "/home/harman/SLM-Lab/slm_lab/agent/net/conv.py", line 176, in forward
x = self.conv_model(x)
File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(input, kwargs)
File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/container.py", line 141, in forward
input = module(input)
File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, kwargs)
File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 446, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 443, in _conv_forward
self.padding, self.dilation, self.groups)
RuntimeError: cuDNN error: CUDNN_STATUS_NOT_INITIALIZED
Process Process-4:
Traceback (most recent call last):
File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, *self._kwargs)
File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 26, in mp_run_session
metrics = session.run()
File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 118, in run
self.run_rl()
File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 106, in run_rl
self.agent.update(state, action, reward, next_state, done)
File "/home/harman/SLM-Lab/slm_lab/agent/init.py", line 53, in update
loss = self.algorithm.train()
File "/home/harman/SLM-Lab/slm_lab/agent/algorithm/dqn.py", line 140, in train
total_loss += loss
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!
Process Process-5:
Traceback (most recent call last):
File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(self._args, self._kwargs)
File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 26, in mp_run_session
metrics = session.run()
File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 118, in run
self.run_rl()
File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 106, in run_rl
self.agent.update(state, action, reward, next_state, done)
File "/home/harman/SLM-Lab/slm_lab/agent/init.py", line 53, in update
loss = self.algorithm.train()
File "/home/harman/SLM-Lab/slm_lab/agent/algorithm/dqn.py", line 140, in train
total_loss += loss
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!
Traceback (most recent call last):
File "run_lab.py", line 99, in
main()
File "run_lab.py", line 91, in main
get_spec_and_run(*args)
File "run_lab.py", line 75, in get_spec_and_run
run_spec(spec, lab_mode)
File "run_lab.py", line 58, in run_spec
Trial(spec).run()
File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 182, in run
metrics = analysis.analyze_trial(self.spec, session_metrics_list)
File "/home/harman/SLM-Lab/slm_lab/experiment/analysis.py", line 266, in analyze_trial
trial_metrics = calc_trial_metrics(session_metrics_list, info_prepath)
File "/home/harman/SLM-Lab/slm_lab/experiment/analysis.py", line 186, in calc_trial_metrics
frames = session_metrics_list[0]['local']['frames']
IndexError: list index out of range
I keep getting a list index out of range error and I can't seem to solve. Can anyone help
I'm using: Pytorch 1.10.0 Cudatoolkit 11.3.1
Traceback (most recent call last): File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap self.run() File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 99, in run self._target(*self._args, self._kwargs) File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 26, in mp_run_session metrics = session.run() File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 118, in run self.run_rl() File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 106, in run_rl self.agent.update(state, action, reward, next_state, done) File "/home/harman/SLM-Lab/slm_lab/agent/init.py", line 53, in update loss = self.algorithm.train() File "/home/harman/SLM-Lab/slm_lab/agent/algorithm/dqn.py", line 138, in train loss = self.calc_q_loss(batch) File "/home/harman/SLM-Lab/slm_lab/agent/algorithm/dqn.py", line 194, in calc_q_loss q_preds = self.net(states) File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(*input, *kwargs) File "/home/harman/SLM-Lab/slm_lab/agent/net/conv.py", line 176, in forward x = self.conv_model(x) File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(input, kwargs) File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/container.py", line 141, in forward input = module(input) File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(*input, kwargs) File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 446, in forward return self._conv_forward(input, self.weight, self.bias) File "/home/harman/anaconda3/envs/lab/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 443, in _conv_forward self.padding, self.dilation, self.groups) RuntimeError: cuDNN error: CUDNN_STATUS_NOT_INITIALIZED Process Process-4: Traceback (most recent call last): File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap self.run() File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 99, in run self._target(*self._args, *self._kwargs) File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 26, in mp_run_session metrics = session.run() File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 118, in run self.run_rl() File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 106, in run_rl self.agent.update(state, action, reward, next_state, done) File "/home/harman/SLM-Lab/slm_lab/agent/init.py", line 53, in update loss = self.algorithm.train() File "/home/harman/SLM-Lab/slm_lab/agent/algorithm/dqn.py", line 140, in train total_loss += loss RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! Process Process-5: Traceback (most recent call last): File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap self.run() File "/home/harman/anaconda3/envs/lab/lib/python3.7/multiprocessing/process.py", line 99, in run self._target(self._args, self._kwargs) File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 26, in mp_run_session metrics = session.run() File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 118, in run self.run_rl() File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 106, in run_rl self.agent.update(state, action, reward, next_state, done) File "/home/harman/SLM-Lab/slm_lab/agent/init.py", line 53, in update loss = self.algorithm.train() File "/home/harman/SLM-Lab/slm_lab/agent/algorithm/dqn.py", line 140, in train total_loss += loss RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! Traceback (most recent call last): File "run_lab.py", line 99, in
main()
File "run_lab.py", line 91, in main
get_spec_and_run(*args)
File "run_lab.py", line 75, in get_spec_and_run
run_spec(spec, lab_mode)
File "run_lab.py", line 58, in run_spec
Trial(spec).run()
File "/home/harman/SLM-Lab/slm_lab/experiment/control.py", line 182, in run
metrics = analysis.analyze_trial(self.spec, session_metrics_list)
File "/home/harman/SLM-Lab/slm_lab/experiment/analysis.py", line 266, in analyze_trial
trial_metrics = calc_trial_metrics(session_metrics_list, info_prepath)
File "/home/harman/SLM-Lab/slm_lab/experiment/analysis.py", line 186, in calc_trial_metrics
frames = session_metrics_list[0]['local']['frames']
IndexError: list index out of range
+-----------------------------------------------------------------------------+ | NVIDIA-SMI 470.57.02 Driver Version: 470.57.02 CUDA Version: 11.4 | |-------------------------------+----------------------+----------------------+ | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |===============================+======================+======================| | 0 NVIDIA GeForce ... Off | 00000000:01:00.0 On | N/A | | 30% 33C P8 23W / 220W | 715MiB / 7973MiB | 26% Default | | | | N/A | +-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=============================================================================| | 0 N/A N/A 952 G /usr/lib/xorg/Xorg 120MiB | | 0 N/A N/A 1472 G /usr/lib/xorg/Xorg 327MiB | | 0 N/A N/A 1599 G /usr/bin/gnome-shell 71MiB | | 0 N/A N/A 1898 G /usr/lib/firefox/firefox 150MiB | +-----------------------------------------------------------------------------+