anyscale / academy

Ray tutorials from Anyscale
https://anyscale.com
Apache License 2.0
574 stars 195 forks source link

IndexError on calling ppo.PPOTrainer(config, env = SELECT_ENV) 01-Application-Cart-Pole.ipynb #54

Open Wormh0-le opened 3 years ago

Wormh0-le commented 3 years ago

I follow the suggestion , config["framework"] = "torch" config["num_gpus"] = 0.001 # can't work config["num_gpus_per_worker"] = (1 - 0.001) / 4 but I get the error,

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-33-c1627876193e> in <module>
----> 1 agent = ppo.PPOTrainer(config, env=SELECT_ENV)
      2 
      3 results = []
      4 episode_data = []
      5 episode_json = []

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/rllib/agents/trainer_template.py in __init__(self, config, env, logger_creator)
    121 
    122         def __init__(self, config=None, env=None, logger_creator=None):
--> 123             Trainer.__init__(self, config, env, logger_creator)
    124 
    125         def _init(self, config: TrainerConfigDict,

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in __init__(self, config, env, logger_creator)
    546             logger_creator = default_logger_creator
    547 
--> 548         super().__init__(config, logger_creator)
    549 
    550     @classmethod

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/tune/trainable.py in __init__(self, config, logger_creator)
     96 
     97         start_time = time.time()
---> 98         self.setup(copy.deepcopy(self.config))
     99         setup_time = time.time() - start_time
    100         if setup_time > SETUP_TIME_THRESHOLD:

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in setup(self, config)
    707 
    708         with get_scope():
--> 709             self._init(self.config, self.env_creator)
    710 
    711             # Evaluation setup.

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/rllib/agents/trainer_template.py in _init(self, config, env_creator)
    153                 policy_class=self._policy_class,
    154                 config=config,
--> 155                 num_workers=self.config["num_workers"])
    156             self.execution_plan = execution_plan
    157             self.train_exec_impl = execution_plan(self.workers, config)

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in _make_workers(self, env_creator, validate_env, policy_class, config, num_workers)
    795             trainer_config=config,
    796             num_workers=num_workers,
--> 797             logdir=self.logdir)
    798 
    799     @DeveloperAPI

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/rllib/evaluation/worker_set.py in __init__(self, env_creator, validate_env, policy_class, trainer_config, num_workers, logdir, _setup)
     98                 num_workers=num_workers,
     99                 config=self._local_config,
--> 100                 spaces=spaces,
    101             )
    102 

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/rllib/evaluation/worker_set.py in _make_worker(self, cls, env_creator, validate_env, policy_cls, worker_index, num_workers, config, spaces)
    405             fake_sampler=config["fake_sampler"],
    406             extra_python_environs=extra_python_environs,
--> 407             spaces=spaces,
    408         )
    409 

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py in __init__(self, env_creator, validate_env, policy_spec, policy_mapping_fn, policies_to_train, tf_session_creator, rollout_fragment_length, count_steps_by, batch_mode, episode_horizon, preprocessor_pref, sample_async, compress_observations, num_envs, observation_fn, observation_filter, clip_rewards, clip_actions, env_config, model_config, policy_config, worker_index, num_workers, record_env, log_dir, log_level, callbacks, input_creator, input_evaluation, output_creator, remote_worker_envs, remote_env_batch_wait_ms, soft_horizon, no_done_at_end, seed, extra_python_environs, fake_sampler, spaces, policy, monitor_path)
    535         else:
    536             self.policy_map, self.preprocessors = self._build_policy_map(
--> 537                 policy_dict, policy_config)
    538 
    539         # Update Policy's view requirements from Model, only if Policy directly

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py in _build_policy_map(self, policy_dict, policy_config)
   1194             # non-tf.
   1195             else:
-> 1196                 policy_map[name] = cls(obs_space, act_space, merged_conf)
   1197 
   1198         if self.worker_index == 0:

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/rllib/policy/policy_template.py in __init__(self, obs_space, action_space, config)
    265                 action_distribution_fn=action_distribution_fn,
    266                 max_seq_len=config["model"]["max_seq_len"],
--> 267                 get_batch_divisibility_req=get_batch_divisibility_req,
    268             )
    269 

~/anaconda3/envs/anyscale-academy/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py in __init__(self, observation_space, action_space, config, model, loss, action_distribution_class, action_sampler_fn, action_distribution_fn, max_seq_len, get_batch_divisibility_req)
    153                 for i, id_ in enumerate(gpu_ids) if i < config["num_gpus"]
    154             ]
--> 155             self.device = self.devices[0]
    156             ids = [
    157                 id_ for i, id_ in enumerate(gpu_ids) if i < config["num_gpus"]

IndexError: list index out of range

And after I debug, i found that ray/rllib/policy/torch_policy.py#L150 ray.get_gpu_ids()will return a empty list, how can i use gpu for training?

Wormh0-le commented 3 years ago

I use num_gpus_per_worker uncorrectly, and I use tf and tfe, it works well, but torch will return list index out of range. And I downgraded ray from 1.4 to 0.8.7, and now it works well.It is a bug for ray 1.4?