Open RenHyz opened 2 years ago
您好,我们已经收到了您的问题,会安排技术人员尽快解答您的问题,请耐心等待。请您再次检查是否提供了清晰的问题描述、复现代码、环境&版本、报错信息等。同时,您也可以通过查看官网API文档、常见问题、历史Issue、AI社区来寻求解答。祝您生活愉快~
Hi! We've received your issue and please be patient to get responded. We will arrange technicians to answer your questions as soon as possible. Please make sure that you have posted enough message to demo your request. You may also check out the API,FAQ,Github Issue and AI community to get the answer.Have a nice day!
@RenHyz 麻烦附一下这段错误代码的链接
@zhouwei25 这里面粘贴的就是源代码下面是输出的结果
@RenHyz 这个图片是我们的文档代码吗
@RenHyz 可以打印出 fluid.layers.fc对应的 参数看看
@zhouwei25 这个图片是之前提问老师给出的解决方法。
@zhouwei25 打印出的fluid.layers.fc
@RenHyz 用PrintOP打印Tensor值,看参数变化了没,你这里看到的只是一个占位符信息,帮助不了调试 https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/static/Print_cn.html#print
@zhouwei25 调试的时候发现训练的时候FC的参数除了输入的data值变了其他的都没变
@RenHyz 这里能调一下代码格式对齐吗,这样看不清楚
test_oneEXE.zip @zhouwei25
bug描述 Describe the Bug
paddle版本2.0 使用股票环境中的收盘价作为输入,利用LSTM网络产生交易动作(0,1),在执行交易动作之后产生买卖 的收益(reward),再由reward来计算两次交易之间的微分夏普率。将微分夏普率当作目标函数去优化 LSTM网络。
在loss函数中加入predict_act之后还是一样不能优化,是什么问题呢?
import gym import gym_anytrading from gym_anytrading.envs import TradingEnv, ForexEnv,StocksEnv,Actions,Positions from gym_anytrading.datasets import FOREX_EURUSD_1H_ASK,STOCKS_GOOGL import matplotlib.pyplot as plt import numpy as np from copy import deepcopy import warnings with warnings.catch_warnings(): warnings.filterwarnings("ignore",category=DeprecationWarning) import paddle.fluid as fluid import paddle import math import os os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" try: paddle.enable_static() except: pass WINDOWS_SIZE = 10
获取交易收盘价数据
env = gym.make('stocks-v1',window_size = WINDOWS_SIZE) env.reset() get_close = np.array(env.df.loc[:,'Close']).astype('float32').reshape(-1,1) close = [] for i in range(len(get_close)-WINDOWS_SIZE): close.append(get_close[i:i+WINDOWS_SIZE]) close = np.array(close).astype('float32') def my_train_reader(): def reader(): for temp in close: yield temp[:] , return reader def my_dsr_reader(): def reader(x): for temp in close: yield temp ,x return reader
def Diff_SR(reward):#传入的参数是reward的list n = len(reward) _reward = reward[n-1]#reward列表中的最后一个值 if n<=1: Dt = 0 return Dt elif np.mean(reward[:n-1]): Dt = 0 return Dt else: At_1 = np.mean(reward[:n-1]) Bt_1 = np.var(reward[:n-1]) _At = _reward - At_1 _Bt = _reward _reward - Bt_1 Dt_fz = Bt_1 _At - At_1 _Bt / 2 Dt_fm = math.pow(math.fabs(Bt_1 - At_1 At_1), 3 / 2) Dt = Dt_fz / Dt_fm return Dt
At = At_1+ETA*(reward-At_1)
定义神经网络及输入输出
def doubleNet(ipt): hidden = fluid.layers.fc(input=ipt, size=100 4) lstm_h, c= fluid.layers.dynamic_lstm(input=hidden,size=1004,is_reverse=False) lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') lstm_max_tanh = fluid.layers.tanh(lstm_max) predict_act = fluid.layers.fc(input=lstm_max_tanh, size=2, act='softmax') return predict_act
obs = fluid.data(name='obs', shape=[None,1], dtype='float32', lod_level=1) dsr = fluid.data(name='dsr', shape=[None,1], dtype='float32', lod_level=1) actionNet = doubleNet(obs) infer_act_program = fluid.default_main_program().clone(for_test = False)
构建损失函数
tensor_act = paddle.argmax(actionNet[0]) add_1 = paddle.ones_like(tensor_act) tensor_act_1 = paddle.add(tensor_act,add_1).astype('float32') cost = paddle.multiply(tensor_act_1,dsr).astype('float32') print("Tensor's stop_gradient: ", cost.stop_gradient)#输出False,说明cost会计算并传播梯度
dsr = np.array(Dt_list)
fluid.layers.mean(传入变量)
avg_cost = paddle.mean(cost) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-3, epsilon=1e-3) opt = optimizer.minimize(avg_cost)
创建CPU执行器
place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) feed_list = [obs] feeder = fluid.DataFeeder(place=place,feed_list=feed_list) feed_list2 = [obs,dsr] feeder2 = fluid.DataFeeder(place=place, feed_list=feed_list2)
输入收盘价输出所有的预测动作
def predict(): obs = my_train_reader()() act_prob_list = [] prob_list = [] for index, obs in enumerate(obs): obs = np.array(obs) data = [] data.append(obs) act_prob = exe.run(program=infer_act_program, feed=feeder.feed(data), fetch_list=[actionNet])[0] act = np.squeeze(act_prob) prob_list.append(act) action = np.argmax(act) act_prob_list.append(action) print(prob_list[0:5]) return act_prob_list def run_episode(): reward_list = [] dsr_list = []
获取预测动作
for i in range(5): env.reset() reward_list, dsr_list = run_episode() print('episode',i,'total reward=%.5f'%np.sum(reward_list),'total dsr=%.5f'%np.sum(dsr_list))
输出结果: E:\RhyAnaconda\envs\paddle2.2\python.exe E:/LearnPad/learn_pad_pycharm/test_trading/test_oneEXE.py Tensor's stop_gradient: False [array([0.6579416 , 0.34205842], dtype=float32), array([0.6580735 , 0.34192654], dtype=float32), array([0.65913564, 0.34086433], dtype=float32), array([0.6601127, 0.3398873], dtype=float32), array([0.6596957, 0.3403043], dtype=float32)] episode 0 total reward=0.00000 total dsr=0.00000 [array([0.6579416 , 0.34205842], dtype=float32), array([0.6580735 , 0.34192654], dtype=float32), array([0.65913564, 0.34086433], dtype=float32), array([0.6601127, 0.3398873], dtype=float32), array([0.6596957, 0.3403043], dtype=float32)] episode 1 total reward=0.00000 total dsr=0.00000 [array([0.6579416 , 0.34205842], dtype=float32), array([0.6580735 , 0.34192654], dtype=float32), array([0.65913564, 0.34086433], dtype=float32), array([0.6601127, 0.3398873], dtype=float32), array([0.6596957, 0.3403043], dtype=float32)] episode 2 total reward=0.00000 total dsr=0.00000 [array([0.6579416 , 0.34205842], dtype=float32), array([0.6580735 , 0.34192654], dtype=float32), array([0.65913564, 0.34086433], dtype=float32), array([0.6601127, 0.3398873], dtype=float32), array([0.6596957, 0.3403043], dtype=float32)] episode 3 total reward=0.00000 total dsr=0.00000 [array([0.6579416 , 0.34205842], dtype=float32), array([0.6580735 , 0.34192654], dtype=float32), array([0.65913564, 0.34086433], dtype=float32), array([0.6601127, 0.3398873], dtype=float32), array([0.6596957, 0.3403043], dtype=float32)] episode 4 total reward=0.00000 total dsr=0.00000
进程已结束,退出代码为 0
其他补充信息 Additional Supplementary Information
No response