Hi everyone,
I modified the DQN algorithm in this repository to a multi-agent DQN approach for a wireless network environment. Actually, I wrote this code inspired by a repository on GitHub. Although the original code works well, when I change the environment, the following error occurs.
Traceback (most recent call last): File "D:/main -DQN.py", line 452, in <module> main() File "D:/main -DQN.py", line 432, in main algo = DQN( args) # n_clusters is the action dimension in DQN File "D:/main -DQN.py", line 158, in __init__ self.agent = Agent( args, self.tau) File "D:/main -DQN.py", line 246, in __init__ self.model = self.network() File "D:/main -DQN.py", line 254, in network inp = Input((self.state_dim)) File "C:\Users\AppData\Roaming\Python\Python37\site-packages\keras\engine\topology.py", line 1451, in Input batch_shape = (None,) + tuple(shape) TypeError: 'int' object is not iterable
The complete code is as follows:
`
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
import pandas as pd
import numpy as np
import sys
import os
import copy, json, argparse
from numpy import pi
from random import random, uniform, choices, randint, sample, randrange
import random
import math
from tqdm import tqdm
import keras.backend as K
from keras.optimizers import Adam
from keras.models import Model
from keras.layers import Dense, Flatten, Input
from collections import deque
class Environ:
def __init__(self, args):
self.args=args
self.state_dim= (self.args.A, )
self.action_dim=args.C
self.bs = complex((500 / 2), (500/ 2))
self.S=(np.zeros(self.args.A)).reshape(-1)
def Location(self):
rx = uniform(0, 500)
ry = uniform(0, 500)
Loc = complex(rx, ry)
return Loc
def PathGain(self,Loc):
d = abs(Loc- self.bs)
d=d **(-3)
u = np.random.rand(1, 1)
sigma = 1
x = sigma * np.sqrt(-2 * np.log(u))
h= d* x
return h
def reset(self): # Reset the states
s=np.zeros(self.args.A)
return s.reshape(-1)
def RecievePower(self,UsersLoc):
H=self.PathGain(UsersLoc)
UsersRecievePower=self.args.P*H
return UsersRecievePower
def TotalRate(self, actionRB_i,actionRB):
interference = self.args.Noise
Loc_i=self.Location()
for j in range(self.args.A):
if actionRB_i ==actionRB[j] :
Loc_j = self.Location()
RecievePower_j = self.RecievePower(Loc_j)
interference = interference + RecievePower_j
else:
interference= interference
RecievePower_i = self.RecievePower(Loc_i)
SINR = interference / (interference-RecievePower_i)
Rate =self.args.BW*( np.log2( SINR))
return Rate
def computeQoS(self,actionRB,actionRB_i):
TotalRate=self.TotalRate(actionRB,actionRB_i)
if TotalRate >=self.args.Rmin:
QoS=1.0
else:
QoS=0.0
return QoS
def ComputeState(self,actionRB):
QoS=np.zeros(self.args.A)
for i in range(self.args.A):
actionRB_i=actionRB[i]
QoS[i] = self.computeQoS(actionRB,actionRB_i)
S = np.zeros( self.args.A)
for i in range(self.args.A):
S[i]=QoS[i]
self.S=S
return self.S.reshape(-1)
def Reward(self,actionRB,actionRB_i):
Rate = np.zeros(self.args.A)
Satisfied_Users = 0
for i in range(self.args.A):
Rate[i] = self.TotalRate(actionRB, actionRB_i)
Satisfied_Users = Satisfied_Users + self.computeQoS(actionRB)
TotalRate = 0.0
TotalPower = self.args.circuitPower
for i in range(self.args.A):
TotalRate = TotalRate + Rate[i]
TotalPower = TotalPower + self.args.P
if Satisfied_Users == self.args.A:
reward = TotalRate / TotalPower
else:
reward = self.args.negative_cost
return reward
def step(self,actionRB):
next_s = self.ComputeState(actionRB)
r = self.Reward(actionRB)
done = False
info = None
return next_s, r, done, info
class Environment(object):
def __init__(self, gym_env, action_repeat):
self.env = gym_env
self.timespan = action_repeat
self.gym_actions = 2 # range(gym_env.action_space.n)
self.state_buffer = deque()
def get_action_size(self):
return self.env.action_dim
def get_state_size(self):
return self.env.state_dim
def reset(self):
# Clear the state buffer
self.state_buffer = deque()
x_t = self.env.reset()
s_t = np.stack([x_t for i in range(self.timespan)], axis=0)
for i in range(self.timespan - 1):
self.state_buffer.append(x_t)
return s_t
def step(self, action):
x_t1, r_t, terminal, info = self.env.step(action)
previous_states = np.array(self.state_buffer)
s_t1 = np.empty((self.timespan, *self.env.state_dim))
s_t1[:self.timespan - 1, :] = previous_states
s_t1[self.timespan - 1] = x_t1
# Pop the oldest frame, add the current frame to the queue
self.state_buffer.popleft()
self.state_buffer.append(x_t1)
return s_t1, r_t, terminal, info
def render(self):
return self.env.render()
class DQN:
def init(self, args):
Environment and DQN parameters
self.args=args
self.action_dim = self.args.C
self.state_dim = self.args.A
self.buffer_size = self.args.capacity
# Memory Buffer for Experience Replay
self.buffer = MemoryBuffer(self.buffer_size)
self.epsilon=self.args.eps
self.tau = 1.0
self.agent = Agent( args, self.tau)
def policy_action(self, s):
if random() <= self.epsilon:
return randrange(self.action_dim)
else:
return np.argmax(self.agent.predict(s)[0])
def train_agent(self):
# Sample experience from memory buffer
s, a, r, d, new_s, idx = self.buffer.sample_batch(self.batch_size)
# Apply Bellman Equation on batch samples to train our DQN
q = self.agent.predict(s)
next_q = self.agent.predict(new_s)
q_targ = self.agent.target_predict(new_s)
for i in range(s.shape[0]):
if d[i]:
q[i, a[i]] = r[i]
else:
next_best_action = np.argmax(next_q[i, :])
q[i, a[i]] = r[i] + self.args.gamma * q_targ[i, next_best_action]
# Train on batch
self.agent.fit(s, q)
# Decay epsilon
self.epsilon *= self.args.eps_decay
def train(self, env, args, summary_writer):
results = []
tqdm_e = tqdm(range(self.args.nepisodes), desc='Score', leave=True, unit=" episodes")
for e in tqdm_e:
# Reset episode
time, cumul_reward, done = 0, 0, False
old_state = env.reset()
while not done:
# if args.render:
# env.render()
# Actor picks an action (following the policy)
a=[]
for i in range(self.args.A):
a[i]= self.policy_action(old_state)
# Retrieve new state, reward, and whether the state is terminal
new_state, r, done, _ = env.step(a)
# Memorize for experience replay
self.memorize(old_state, a, r, done, new_state)
# Update current state
old_state = new_state
cumul_reward += r
time += 1
# Train DDQN and transfer weights to target network
if(self.buffer.size() > args.batch_size):
self.train_agent(self.args.batch_size)
self.agent.transfer_weights()
# Gather stats every episode for plotting
if(args.gather_stats):
mean, stdev = gather_stats(self, env)
results.append([e, mean, stdev])
# Export results for Tensorboard
score = tfSummary('score', cumul_reward)
summary_writer.add_summary(score, global_step=e)
summary_writer.flush()
# Display score
tqdm_e.set_description("Score: " + str(cumul_reward))
tqdm_e.refresh()
return results
def memorize(self, state, action, reward, done, new_state):
self.buffer.memorize(state, action, reward, done, new_state)
def save_weights(self, path):
path += '_LR_{}'.format(self.args.learningrate)
self.agent.save(path)
def load_weights(self, path):
self.agent.load_weights(path)
class Agent:
def init(self, args, tau):
self.args=args
self.state_dim = self.args.A
self.action_dim = self.args.C
self.tau = tau
self.lr=self.args.learningrate
Initialize Deep Q-Network
self.model = self.network()
self.model.compile(Adam(self.lr), 'mse')
# Build target Q-Network
self.target_model = self.network()
self.target_model.compile(Adam(self.lr), 'mse')
self.target_model.set_weights(self.model.get_weights())
def network(self):
inp = Input((self.state_dim))
if(len(self.state_dim) > 2):
inp = Input((self.state_dim[1:]))
x = conv_block(inp, 32, (2, 2), 8)
x = conv_block(x, 64, (2, 2), 4)
x = conv_block(x, 64, (2, 2), 3)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
else:
x = Flatten()(inp)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(self.action_dim, activation='linear')(x)
return Model(inp, x)
def transfer_weights(self):
W = self.model.get_weights()
tgt_W = self.target_model.get_weights()
for i in range(len(W)):
# updated based on Polyak averaging method
tgt_W[i] = self.tau * W[i] + (1 - self.tau) * tgt_W[i]
self.target_model.set_weights(tgt_W)
def fit(self, inp, targ):
self.model.fit(self.reshape(inp), targ, epochs=1, verbose=0)
def predict(self, inp):
return self.model.predict(self.reshape(inp))
def target_predict(self, inp):
return self.target_model.predict(self.reshape(inp))
def reshape(self, x):
if len(x.shape) < 4 and len(self.state_dim) > 2:
return np.expand_dims(x, axis=-1)
elif len(x.shape) < 3:
return np.expand_dims(x, axis=-1)
else:
return x
def save(self, path):
self.model.save_weights(path + '.h5')
def load_weights(self, path):
self.model.load_weights(path)
class MemoryBuffer(object):
def init(self, buffer_size):
Standard Buffer
self.buffer = deque()
self.count = 0
self.buffer_size = buffer_size
def memorize(self, state, action, reward, done, new_state):
experience = (state, action, reward, done, new_state)
# Check if buffer is already full
if self.count < self.buffer_size:
self.buffer.append(experience)
self.count += 1
else:
self.buffer.popleft()
self.buffer.append(experience)
def size(self):
return self.count
def sample_batch(self, batch_size):
batch = []
if self.count < batch_size:
idx = None
batch = random.sample(self.buffer, self.count)
else:
idx = None
batch = random.sample(self.buffer, batch_size)
# Return a batch of experience
s_batch = np.array([i[0] for i in batch])
a_batch = np.array([i[1] for i in batch])
r_batch = np.array([i[2] for i in batch])
d_batch = np.array([i[3] for i in batch])
new_s_batch = np.array([i[4] for i in batch])
return s_batch, a_batch, r_batch, d_batch, new_s_batch, idx
def update(self, idx):
self.buffer.update(idx)
def clear(self):
self.buffer = deque()
self.count = 0
Hi everyone, I modified the DQN algorithm in this repository to a multi-agent DQN approach for a wireless network environment. Actually, I wrote this code inspired by a repository on GitHub. Although the original code works well, when I change the environment, the following error occurs.
Traceback (most recent call last): File "D:/main -DQN.py", line 452, in <module> main() File "D:/main -DQN.py", line 432, in main algo = DQN( args) # n_clusters is the action dimension in DQN File "D:/main -DQN.py", line 158, in __init__ self.agent = Agent( args, self.tau) File "D:/main -DQN.py", line 246, in __init__ self.model = self.network() File "D:/main -DQN.py", line 254, in network inp = Input((self.state_dim)) File "C:\Users\AppData\Roaming\Python\Python37\site-packages\keras\engine\topology.py", line 1451, in Input batch_shape = (None,) + tuple(shape) TypeError: 'int' object is not iterable
The complete code is as follows: ` from keras.backend.tensorflow_backend import set_session import tensorflow as tf import pandas as pd import numpy as np import sys import os import copy, json, argparse from numpy import pi from random import random, uniform, choices, randint, sample, randrange import random import math from tqdm import tqdm import keras.backend as K from keras.optimizers import Adam from keras.models import Model from keras.layers import Dense, Flatten, Input from collections import dequeclass Environ:
class Environment(object):
class DQN: def init(self, args):
Environment and DQN parameters
class Agent: def init(self, args, tau): self.args=args self.state_dim = self.args.A self.action_dim = self.args.C self.tau = tau self.lr=self.args.learningrate
Initialize Deep Q-Network
class MemoryBuffer(object): def init(self, buffer_size):
Standard Buffer
def get_session(): config = tf.ConfigProto() config.gpu_options.allow_growth = True return tf.Session(config=config)
def tfSummary(tag, val): return tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=val)])
def gather_stats(agent, env): score = [] for k in range(10): old_state = env.reset() cumul_r, done = 0, False while not done: a = agent.policy_action(old_state) oldstate, r, done, = env.step(a) cumul_r += r score.append(cumul_r) return np.mean(np.array(score)), np.std(np.array(score))
def conv_block(inp, d=3, pool_size=(2, 2), k=3): conv = conv_layer(d, k)(inp) return MaxPooling2D(pool_size=pool_size)(conv)
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
def parse_args(args): parser = argparse.ArgumentParser(description='Training parameters') # parser.add_argument('--out_dir', type=str, default='experiments', help="Name of the output directory") parser.add_argument('--consecutive_frames', type=int, default=2, help="Number of consecutive frames (action repeat)") parser.add_argument('--gather_stats', dest='gather_stats', action='store_true', help="Compute Average reward per episode (slower)") parser.add_argument('--A', type=int, default='10', help="The number of agents") parser.add_argument('--C', type=int, default='30', help="The number of Resources") parser.add_argument('--Noise', type=float, default='0.00000000000001', help="The background noise") parser.add_argument('--BW', type=int, default='180000', help="The bandwidth") parser.add_argument('--Rmin', type=int, default='1000000', help="Agents' QoS") parser.add_argument('--P', type=float, default='0.01', help="Agents' transmit power") parser.add_argument('--circuitPower', type=float, default='0.05', help="The circuit Power") parser.add_argument('--negative_cost', type=float, default='-1.0', help="The negative cost") parser.add_argument('--capacity', type=int, default='500', help="Capacity of Replay Buffer") parser.add_argument('--learningrate', type=float, default='0.01', help="The learning rate") parser.add_argument('--eps', type=float, default='0.8', help="The epsilon") parser.add_argument('--eps_decay', type=float, default='0.99', help="The epsilon decay") parser.add_argument('--eps_increment', type=float, default='0.003', help="The epsilon increment") parser.add_argument('--batch_size', type=int, default='8', help="The batch size") parser.add_argument('--gamma', type=float, default='0.99', help="The discount factor") parser.add_argument('--nepisodes', type=int, default='500', help="The number of episodes") parser.add_argument('--nsteps', type=int, default='500', help="The number of steps") parser.add_argument('--env', type=str, default='Environ', help="Wireless environment") parser.add_argument('--gpu', type=str, default="", help='GPU ID')
def main(args=None):
Parse arguments
if name == "main": main() ` Thanks in advance for your help.