Closed AlejandroCN7 closed 3 years ago
I am experiencing this error.
Based on the following test script using a DQN model trained for 20 episodes:
#!/usr/bin/python3
import gym
import energym
import argparse
import uuid
import mlflow
import numpy as np
from energym.utils.callbacks import LoggerCallback, LoggerEvalCallback
from energym.utils.wrappers import NormalizeObservation
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import EvalCallback, BaseCallback, CallbackList
from stable_baselines3.common.vec_env import DummyVecEnv
parser = argparse.ArgumentParser()
parser.add_argument('--environment', '-env', type=str, default=None)
parser.add_argument('--episodes', '-ep', type=int, default=1)
parser.add_argument('--learning_rate', '-lr', type=float, default=0.0001)
parser.add_argument('--buffer_size', '-bf', type=int, default=1000000)
parser.add_argument('--learning_starts', '-ls', type=int, default=50000)
parser.add_argument('--batch_size', '-bs', type=int, default=32)
parser.add_argument('--tau', '-t', type=float, default=1.0)
parser.add_argument('--gamma', '-g', type=float, default=.99)
parser.add_argument('--train_freq', '-tf', type=int, default=4)
parser.add_argument('--gradient_steps', '-gs', type=int, default=1)
parser.add_argument('--target_update_interval', '-tu', type=int, default=10000)
parser.add_argument('--exploration_fraction', '-e', type=float, default=.1)
parser.add_argument('--exploration_initial_eps', '-ei', type=float, default=1.0)
parser.add_argument('--exploration_final_eps', '-ef', type=float, default=.05)
parser.add_argument('--max_grad_norm', '-m', type=float, default=10)
args = parser.parse_args()
# experiment ID
environment = args.environment
n_episodes = args.episodes
name = 'DQN-' + environment + '-' + str(n_episodes) + '-episodes'
with mlflow.start_run(run_name=name):
mlflow.log_param('env', environment)
mlflow.log_param('episodes', n_episodes)
mlflow.log_param('learning_rate', args.learning_rate)
mlflow.log_param('buffer_size', args.buffer_size)
mlflow.log_param('learning_starts', args.learning_starts)
mlflow.log_param('batch_size', args.batch_size)
mlflow.log_param('tau', args.tau)
mlflow.log_param('gamma', args.gamma)
mlflow.log_param('train_freq', args.train_freq)
mlflow.log_param('gradient_steps', args.gradient_steps)
mlflow.log_param('target_update_interval', args.target_update_interval)
mlflow.log_param('exploration_fraction', args.exploration_fraction)
mlflow.log_param('exploration_initial_eps', args.exploration_initial_eps)
mlflow.log_param('exploration_final_eps', args.exploration_final_eps)
mlflow.log_param('max_grad_norm', args.max_grad_norm)
env = gym.make(environment)
env = NormalizeObservation(env)
#### LOAD MODEL ####
model = DQN.load('best_models/' + name + '/best_model.zip')
for i in range(n_episodes - 1):
obs = env.reset()
rewards = []
done = False
current_month = 0
while not done:
a, _ = model.predict(obs)
obs, reward, done, info = env.step(a)
rewards.append(reward)
if info['month'] != current_month:
current_month = info['month']
print(info['month'], sum(rewards))
print('Episode ', i, 'Mean reward: ', np.mean(rewards), 'Cumulative reward: ', sum(rewards))
env.close()
mlflow.log_metric('mean_reward', np.mean(rewards))
mlflow.log_metric('cumulative_reward', sum(rewards))
mlflow.end_run()
The error that occurs is as follows:
ERROR: <class 'numpy.ndarray'>
Traceback (most recent call last):
File "./DQN.py", line 115, in <module>
obs, reward, done, info = env.step(a)
File "/workspaces/energym/energym/utils/wrappers.py", line 24, in step
observation, reward, done, info = self.env.step(action)
File "/workspaces/energym/energym/envs/eplus_env.py", line 169, in step
action_ = list(setpoints)
UnboundLocalError: local variable 'setpoints' referenced before assignment
It is because the 'if' blocks do not consider the possibility that the action can be an object of the numpy.ndarray
class, only integers, tuples and lists.
If I print the actions, the output is as follows:
...
****
ACTION:
6
<class 'numpy.int64'>
****
****
ACTION:
6
<class 'numpy.int64'>
****
****
ACTION:
9
<class 'numpy.ndarray'>
****
ERROR...
Therefore, I think it could be solved by simply extracting the value to integer, since what is returned is a single number.
PD. I attach the model used for replication best_model.zip
Nice! I'm going to replicate and update repo applying what you say :). Thank you so much!
You're right! CSVLogger
class works with date and observation variables separately, but this information shouldn't be resized in original observation. The following should solve it. Thank you so much again @manjavacas
The problem is in action type "if" blocks (environment step). Test is usually passed but fails sometimes randomly. This bug needs to be fixed.