import alf
import alf.examples.muzero_conf
from alf.utils import dist_utils
from alf.utils.normalizers import ScalarAdaptiveNormalizer
from alf.algorithms.mcts_models import SimpleMCTSModel
from alf.algorithms.mcts_algorithm import MCTSAlgorithm, VisitSoftmaxTemperatureByProgress
from alf.optimizers import AdamTF
from alf.networks import StableNormalProjectionNetwork
Hi all,
I'm trying to run a custom gym environment, but I'm getting this error
E1118 09:54:31.725832 4371203456 nest.py:52] map_structure() fails for (array(0, dtype=object),). Error message: 'module 'torch' has no attribute 'object'
I'm testing this gym env https://github.com/MatePocs/gym-basic And my config:
` import torch
import alf import alf.examples.muzero_conf from alf.utils import dist_utils from alf.utils.normalizers import ScalarAdaptiveNormalizer from alf.algorithms.mcts_models import SimpleMCTSModel from alf.algorithms.mcts_algorithm import MCTSAlgorithm, VisitSoftmaxTemperatureByProgress from alf.optimizers import AdamTF from alf.networks import StableNormalProjectionNetwork
alf.config( "create_environment", env_name="gym_basic:basic-v0", num_parallel_environments=1)
alf.config( "StableNormalProjectionNetwork", max_std=1000.0, state_dependent_std=True, scale_distribution=True, dist_squashing_transform=dist_utils.Softsign())
alf.config( "SimplePredictionNet", continuous_projection_net_ctor=StableNormalProjectionNetwork)
alf.config( "SimpleMCTSModel", num_sampled_actions=20)
alf.config( "MCTSAlgorithm", discount=0.99, num_simulations=10, root_dirichlet_alpha=0.5, root_exploration_fraction=0., pb_c_init=0.5, pb_c_base=19652, is_two_player_game=False, visit_softmax_temperature_fn=VisitSoftmaxTemperatureByProgress(), act_with_exploration_policy=True, learn_with_exploration_policy=True, search_with_exploration_policy=True, unexpanded_value_score='mean', expand_all_children=False, expand_all_root_children=True)
alf.config( "MuzeroAlgorithm", mcts_algorithm_ctor=MCTSAlgorithm, model_ctor=SimpleMCTSModel, num_unroll_steps=5, td_steps=10, reward_normalizer=ScalarAdaptiveNormalizer(auto_update=False), reanalyze_ratio=1.0, target_update_period=1, target_update_tau=0.01)
alf.config("Agent", optimizer=AdamTF(lr=5e-4))
training config
alf.config( "TrainerConfig", unroll_length=10, mini_batch_size=256, num_updates_per_train_iter=10, num_iterations=10000, num_checkpoints=5, evaluate=False, summary_interval=0, num_summaries=100, replay_buffer_length=100000, initial_collect_steps=1000) `
How to fix this?