Closed siddahant closed 2 years ago
I pushed a bugfix, let me know if that solves the issue.
No, still getting the same error here is my sample colab code.
`
!pip install highway-env
import gym
import highway_env
!pip install git+https://github.com/eleurent/rl-agents#egg=rl-agents
from rl_agents.agents.common.factory import agent_factory
import sys
from tqdm.notebook import trange
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg
!git clone https://github.com/eleurent/highway-env.git
sys.path.insert(0, './highway-env/scripts/')
from utils import record_videos, show_videos
env = gym.make("highway-fast-v0")
env = record_videos(env)
obs, done = env.reset(), False
agent_config = {
"__class__": "<class 'rl_agents.agents.deep_q_network.pytorch.DQNAgent'>",
"model": {
"type": "MultiLayerPerceptron",
"layers": [256, 256]
},
"double": False,
"loss_function": "l2",
"optimizer": {
"lr": 5e-4
},
"gamma": 0.8,
"n_steps": 1,
"batch_size": 32,
"memory_capacity": 15000,
"target_update": 50,
"exploration": {
"method": "EpsilonGreedy",
"tau": 6000,
"temperature": 1.0,
"final_temperature": 0.05
}
}
agent = agent_factory(env, agent_config)
for step in trange(env.unwrapped.config["duration"], desc="Running..."):
action = agent.act(obs)
obs, reward, done, info = env.step(action)
env.close()
show_videos()
`
TypeError Traceback (most recent call last)
[<ipython-input-5-521d5e2a82ab>](https://localhost:8080/#) in <module>
53 # Run episode
54 for step in trange(env.unwrapped.config["duration"], desc="Running..."):
---> 55 action = agent.act(obs)
56 obs, reward, done, info = env.step(action)
57
[/usr/local/lib/python3.7/dist-packages/rl_agents/agents/deep_q_network/abstract.py](https://localhost:8080/#) in act(self, state, step_exploration_time)
76 # TODO: it would be more efficient to forward a batch of states
77 if isinstance(state, tuple):
---> 78 return tuple(self.act(agent_state, step_exploration_time=False) for agent_state in state)
79
80 # Single-agent setting
[/usr/local/lib/python3.7/dist-packages/rl_agents/agents/deep_q_network/abstract.py](https://localhost:8080/#) in <genexpr>(.0)
76 # TODO: it would be more efficient to forward a batch of states
77 if isinstance(state, tuple):
---> 78 return tuple(self.act(agent_state, step_exploration_time=False) for agent_state in state)
79
80 # Single-agent setting
[/usr/local/lib/python3.7/dist-packages/rl_agents/agents/deep_q_network/abstract.py](https://localhost:8080/#) in act(self, state, step_exploration_time)
79
80 # Single-agent setting
---> 81 values = self.get_state_action_values(state)
82 self.exploration_policy.update(values)
83 return self.exploration_policy.sample()
[/usr/local/lib/python3.7/dist-packages/rl_agents/agents/deep_q_network/abstract.py](https://localhost:8080/#) in get_state_action_values(self, state)
138 :return: [Q(a1,s), ..., Q(an,s)] the array of its action-values for each actions
139 """
--> 140 return self.get_batch_state_action_values([state])[0]
141
142 def step_optimizer(self, loss):
[/usr/local/lib/python3.7/dist-packages/rl_agents/agents/deep_q_network/pytorch.py](https://localhost:8080/#) in get_batch_state_action_values(self, states)
78
79 def get_batch_state_action_values(self, states):
---> 80 return self.value_net(torch.tensor(states, dtype=torch.float).to(self.device)).data.cpu().numpy()
81
82 def save(self, filename):
TypeError: must be real number, not dict
I am getting this as states
tensor([[ 1.0000, 0.9185, 0.0000, 0.3125, 0.0000, 1.0000, 0.1066, 0.0000,
-0.0432, 0.0000, 1.0000, 0.2050, 0.2500, -0.0254, 0.0000, 1.0000,
0.2994, 0.7500, -0.0447, 0.0000, 1.0000, 0.4037, 0.7500, -0.0229,
0.0000]])
[{'speed': 25, 'crashed': False, 'action': 4, 'rewards': {'collision_reward': 0.0, 'right_lane_reward': 0.0, 'high_speed_reward': 0.5, 'on_road_reward': 1.0}}]
Ah I see, sorry I forgot to update the colab.
You have to update two lines as follows:
(obs, info), done = env.reset(), False
and
obs, reward, done, truncated, info = env.step(action)
Thank you! now it works on colab.