Closed Zed-Wu closed 1 year ago
Yes, you can use TStepTransition with some example configs like below.
Though, we removed recurrent architectures from the repo due to maintenance reasons.
recurrent_horizon = 32
num_heads = 6
agent_cfg = dict(
type="BC",
batch_size=32,
recurrent_horizon=recurrent_horizon,
actor_cfg=dict(
type="ContinuousActor",
head_cfg=dict(
type="TanhHead",
noise_std=1e-5,
),
nn_cfg=dict(
type="Visuomotor",
visual_nn_cfg=dict(type="PointNet", point_indicator=False, feat_dim="pcd_all_channel", mlp_spec=[64, 128, 512], feature_transform=[]),
rnn_cfg=dict(
type='RNN',
rnn_cfg=dict(
type="TransformerXL",
block_cfg=dict(
attention_cfg=dict(
type="MultiHeadSelfAttentionXL",
embed_dim=192,
num_heads=num_heads,
latent_dim=32,
dropout=0.1,
),
mlp_cfg=dict(
type="LinearMLP",
norm_cfg=None,
mlp_spec=[192, 768, 192],
bias="auto",
inactivated_output=True,
linear_init_cfg=dict(type="xavier_init", gain=1, bias=0),
),
dropout=0.1,
),
mlp_cfg=None,
num_blocks=4,
history_len=recurrent_horizon,
decode=True,
latent_proj_cfg = dict(
type="LinearMLP",
norm_cfg=None,
mlp_spec=["512 + agent_shape", 256, 192],
bias="auto",
inactivated_output=True,
linear_init_cfg=dict(type="xavier_init", gain=1, bias=0),
),
),
),
mlp_cfg=dict(
type="LinearMLP",
norm_cfg=None,
mlp_spec=["192 + agent_shape", 192, 192, "action_shape"],
inactivated_output=True,
zero_init_output=True,
),
),
optim_cfg=dict(type="Adam", lr=1e-4),
),
)
env_cfg = dict(
type="gym",
env_name="HalfCheetah-v3",
unwrapped=False,
)
replay_cfg = dict(
type="ReplayMemory",
capacity=-1,
num_samples=-1,
keys=["obs", "actions", "dones", "episode_dones"],
buffer_filenames=[
"./work_dirs/Halfcheetah-v3/SAC/eval_3000000/trajectory.h5",
],
sampling_cfg=dict(
type="TStepTransition",
horizon=-1,
with_replacement=False,
),
)
train_cfg = dict(
on_policy=False,
total_steps=50000,
warm_steps=0,
n_steps=0,
n_updates=500,
n_eval=10000,
n_checkpoint=10000,
)
eval_cfg = dict(
type="Evaluation",
num=10,
num_procs=1,
use_hidden_state=False,
save_traj=True,
save_video=False,
use_log=False,
)
Excellent, I really appreciate your help
I want to change the IMPALA to the transformer architecture in BC. Now the input size should be
[batch size, length, embed_dim]
instead of[batch size, embed_dim]
. Should I use theTStepTransition
inmaniskill_learn/env/sampling_strategy.py
to get multiple transitions at one time? I did not find any example using the TStepTransition, and I am not sure if it can work well (e.g., is it possible to cause some bugs whendynamic_loading==True
and the trajectories are truncated?).