Open Wu-Jiayang opened 1 year ago
class Model(paddle.nn.Layer):
"""
输入obs,期望输出-obs
"""
def __init__(self):
super(Model, self).__init__()
obs_dim = 1
dim = 128
self.fc1 = paddle.nn.Linear(obs_dim, dim)
self.fc2 = paddle.nn.Linear(dim, dim)
self.lstm = paddle.nn.LSTM(dim, dim, num_layers=1)
self.output_fc1 = paddle.nn.Linear(dim, dim)
self.output_fc2 = paddle.nn.Linear(dim, 1)
def forward(self, obs, state=None):
output = paddle.nn.functional.gelu(self.fc1(obs))
output = self.fc2(output)
output, state = self.lstm(output, initial_states=state) # [batch_size, time_steps, dim]
output = paddle.nn.functional.gelu(self.output_fc1(output))
act_logits = self.output_fc2(output)
return act_logits, state
def episode_generate() -> list:
"""
随机生成递减数列,要求数列元素取值区间为[0, 20],数列长度区间为[2, 20]
"""
begin = random.randint(1, 20)
l = random.randint(2, 20)
end = max(begin - l, -1)
return list(range(begin, end, -1))
我无法使用lstm实现td算法
例如:随机输入递减数列,数列元素取值区间为[0, 20],数列长度区间为[2, 20],输出该数列的相反数 要求:
我期望得到一个类似[-3, -2, -1, 0]的递增数列,但是大部分情况下我只能得到[-4.5486226 -4.5486226 -4.5486226 -4.5486226],我无法找到原因。
以下是我的主要代码: