Running an LSTM Policy A2C Trained Model

Hello,

I'm trying to run a trained model for the LSTM and LNLSTM policies in A2C. I was able to get the basic convolutional neural net policy to work just fine, but when trying to step the model for LSTM, I get an error because of the state and mask parameters. The error seems to have something to do with putting placeholders in the feed_dict when running the TF session from the policy. Here is my full code and error:

import time
import tensorflow as tf
from baselines import logger
from baselines.a2c.a2c import Model
from baselines.common.cmd_util import atari_arg_parser
from baselines.common.vec_env.vec_frame_stack import VecFrameStack
from baselines.common.atari_wrappers import make_atari, wrap_deepmind
from baselines.a2c.policies import CnnPolicy, LstmPolicy, LnLstmPolicy

def enjoy(seed, policy, fps=100):
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy

    env = wrap_deepmind(make_atari("MsPacmanNoFrameskip-v4"), clip_rewards=False, frame_stack=True)
    env.seed(seed)

    tf.reset_default_graph()
    ob_space = env.observation_space
    ac_space = env.action_space
    nsteps = 5  # default value, change if needed

    model = Model(policy=policy_fn, ob_space=ob_space, ac_space=ac_space, nenvs=1, nsteps=nsteps)
    model.load("lnlstm_policy_a2c_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            time.sleep(1.0 / fps)
            action, _, _, _ = model.step_model.step([obs.__array__()], state = model.step_model.S, mask= model.step_model.M)
            obs, rew, done, _ = env.step(action)
            episode_rew += rew
        print('Episode reward:', episode_rew)

    env.close()

def main():
    parser = atari_arg_parser()
    parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='lnlstm')
    args = parser.parse_args()
    logger.configure()
    enjoy(args.seed, args.policy)

if __name__ == '__main__':
    main()

in enjoy 
action, _, _, _= model.step_model.step([obs.__array__()], state= model.step_model.S, mask = model.step_model.M)
in step
return sess.run([a0, v0, snew, neglogp0], {X:ob, S:state, M:mask})

Type Error: The value of a feed cannot be a tf.Tensor object. Acceptable feed values include Python scalars, etc. For reference, the tensor object was Tensor("Placeholder_6:0", shape=(1,512), dtype=float32)

Thank you for any help you might be able to provide.

openai / baselines

Running an LSTM Policy A2C Trained Model #398