DeepX-inc / machina

Control section: Deep Reinforcement Learning framework
MIT License
279 stars 43 forks source link

Executed run_ppo.py,RuntimeError: size mismatch occurred. #258

Closed hosokawa-taiji closed 5 years ago

hosokawa-taiji commented 5 years ago

I executed run_ppo.py, but RuntimeError: size mismatch occurred. A few weeks ago, It worked fine. On 13,Nov THTensorMath.cpp in Pytorch was changed something. I think this is the cause. Can you handle this?

hosokawa-taiji commented 5 years ago

It has occurred only my original gym environment. I have a problem with my environment, maybe.

mmisono commented 5 years ago

Could you describe the details? That is,

hosokawa-taiji commented 5 years ago

I'm using google colaboratory.According to the investigation,

hosokawa-taiji commented 5 years ago
hosokawa-taiji commented 5 years ago

class FxEnv_v0(gym.Env):

定数

WINDOW = 600 PCT_CHANGE = 600 HP = 100 THRESHOLD = 3

def init(self): super().init()

データの読込・変換

self.data = self.loadAndTransformData()
# action_spaceを設定する
self.action_space = gym.spaces.Box(
  low = -10,
  high = 10,
  shape = (1,),
  dtype = np.int64
)
# observation_spaceを設定する
self.observation_space = gym.spaces.Box(
  low = -10,
  high = 10,
  shape = ((self.WINDOW,)),
  dtype = np.float64
)
# reward_range を設定する
self.reward_range = [-2., 10.]
# 初期設定
self.position = 0
self.hp = self.HP
self.reset()

def step(self, action):

reward,hpの計算

reward = 0
nextData = self.data[self.position + self.WINDOW].round()
if(action == nextData):
  reward = abs(action)
else:
  reward = -0.1 * (abs(action - nextData)[0])
  self.hp -= 1
if(action == 0 and nextData == 0):
  reward = 1
# positionの設定
self.position += 1
return self.getObservation(), reward, self.isDone(), {}

def reset(self):

HPの設定

self.hp = self.HP
# observationの設定
return self.getObservation()

def render(self, mode='human', close=False): pass

def close(self): pass

def seed(self, seed=None): pass

def loadAndTransformData(self): data = pd.read_csv('drive/My Drive/Colab Notebooks/GBPJPY1.csv', names=(['Bid'])) data = data.rolling(self.WINDOW).mean() data = data.pct_change(self.PCT_CHANGE).dropna() scaler = preprocessing.MinMaxScaler(feature_range=(-10, 10)) return scaler.fit_transform(data)

def getObservation(self): return self.data[self.position : self.position + self.WINDOW]

def isDone(self): if(self.hp <= 0): return True elif(self.position == len(self.data) - self.WINDOW): self.position = 0 return True else: return False

hosokawa-taiji commented 5 years ago

And I ran this command python machina/example/run_ppo.py --cuda 0 --env_name 'Fx-v0' --rnn then I got this error massage.

{'batch_size': 256,
 'c2d': False,
 'clip_param': 0.2,
 'cuda': 0,
 'env_name': 'Fx-v0',
 'epoch_per_iter': 10,
 'gamma': 0.995,
 'init_kl_beta': 1,
 'kl_targ': 0.01,
 'lam': 1,
 'log': 'garbage',
 'max_epis': 1000000,
 'max_grad_norm': 10,
 'max_steps_per_iter': 10000,
 'num_parallel': 4,
 'pol_lr': 0.0003,
 'ppo_type': 'clip',
 'record': False,
 'rnn': True,
 'rnn_batch_size': 8,
 'seed': 256,
 'vf_lr': 0.0003}
2019-11-15 09:12:11.192617 UTC | observation space: Box(600,)
2019-11-15 09:12:11.192765 UTC | action space: Box(1,)
Process Process-4:
Process Process-3:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.6/dist-packages/machina/samplers/epi_sampler.py", line 126, in mp_sample
    l, epi = one_epi(env, pol, deterministic_flag, prepro)
  File "/usr/local/lib/python3.6/dist-packages/machina/samplers/epi_sampler.py", line 51, in one_epi
    ac_real, ac, a_i = pol(torch.tensor(o, dtype=torch.float))
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/machina/pols/gaussian_pol.py", line 50, in forward
    mean, log_std, hs = self.net(obs, hs, h_masks)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/content/machina/example/simple_net.py", line 169, in forward
    xs = torch.relu(self.input_layer(xs))
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py", line 87, in forward
    return F.linear(input, self.weight, self.bias)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 1372, in linear
    output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [600 x 1], m2: [600 x 256] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:197
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.6/dist-packages/machina/samplers/epi_sampler.py", line 126, in mp_sample
    l, epi = one_epi(env, pol, deterministic_flag, prepro)
  File "/usr/local/lib/python3.6/dist-packages/machina/samplers/epi_sampler.py", line 51, in one_epi
    ac_real, ac, a_i = pol(torch.tensor(o, dtype=torch.float))
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/machina/pols/gaussian_pol.py", line 50, in forward
    mean, log_std, hs = self.net(obs, hs, h_masks)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/content/machina/example/simple_net.py", line 169, in forward
    xs = torch.relu(self.input_layer(xs))
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py", line 87, in forward
    return F.linear(input, self.weight, self.bias)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 1372, in linear
    output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [600 x 1], m2: [600 x 256] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:197
Process Process-2:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.6/dist-packages/machina/samplers/epi_sampler.py", line 126, in mp_sample
    l, epi = one_epi(env, pol, deterministic_flag, prepro)
  File "/usr/local/lib/python3.6/dist-packages/machina/samplers/epi_sampler.py", line 51, in one_epi
    ac_real, ac, a_i = pol(torch.tensor(o, dtype=torch.float))
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/machina/pols/gaussian_pol.py", line 50, in forward
    mean, log_std, hs = self.net(obs, hs, h_masks)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/content/machina/example/simple_net.py", line 169, in forward
    xs = torch.relu(self.input_layer(xs))
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py", line 87, in forward
    return F.linear(input, self.weight, self.bias)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 1372, in linear
    output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [600 x 1], m2: [600 x 256] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:197
Process Process-5:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.6/dist-packages/machina/samplers/epi_sampler.py", line 126, in mp_sample
    l, epi = one_epi(env, pol, deterministic_flag, prepro)
  File "/usr/local/lib/python3.6/dist-packages/machina/samplers/epi_sampler.py", line 51, in one_epi
    ac_real, ac, a_i = pol(torch.tensor(o, dtype=torch.float))
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/machina/pols/gaussian_pol.py", line 50, in forward
    mean, log_std, hs = self.net(obs, hs, h_masks)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/content/machina/example/simple_net.py", line 169, in forward
    xs = torch.relu(self.input_layer(xs))
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 541, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py", line 87, in forward
    return F.linear(input, self.weight, self.bias)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 1372, in linear
    output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [600 x 1], m2: [600 x 256] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:197
mmisono commented 5 years ago

self.observation.space.shape is (WINDOW,) but the actual observation space returned by the environment is (WINDOW, 1). Please give the following a try.

  def getObservation(self):
    return self.data[self.position : self.position + self.WINDOW].flatten()
hosokawa-taiji commented 5 years ago

It works! A lot of thanks!!!