Open Sen1553 opened 1 year ago
def compute_return函数无法计算序列带环的情况 如状态序列s3-s4-s5-s3-s6 修改代码如下 def compute_return(start_index, chain, gamma): G = 0 for i in reversed(range( len(chain))): print("G = %f* %f + %d" %(gamma, G, rewards[chain[i] - 1] )) G = gamma * G + rewards[chain[i] - 1] print(i) return G
def compute_return(start_index, chain, gamma): G = 0 for i in reversed(range( len(chain))): print("G = %f* %f + %d" %(gamma, G, rewards[chain[i] - 1] )) G = gamma * G + rewards[chain[i] - 1] print(i) return G
def compute_return函数无法计算序列带环的情况 如状态序列s3-s4-s5-s3-s6 修改代码如下
def compute_return(start_index, chain, gamma): G = 0 for i in reversed(range( len(chain))): print("G = %f* %f + %d" %(gamma, G, rewards[chain[i] - 1] )) G = gamma * G + rewards[chain[i] - 1] print(i) return G