rlworkgroup / garage

A toolkit for reproducible reinforcement learning research.
MIT License
1.84k stars 309 forks source link

torch.cuda.is_available() is False, but it is not! #2320

Closed bara-bba closed 2 years ago

bara-bba commented 2 years ago

Hi everyone!

I'm trying to implement the RaySampler in SAC algorithm but I get a RuntimeError saying that CUDA device is not detected when actually it is (in the main script). I attach the code:

#!/usr/bin/env python3
"""This is an example to train a task with SAC algorithm written in PyTorch."""
import os
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from garage import wrap_experiment
from garage.envs import GymEnv, normalize
from garage.experiment import deterministic
from garage.replay_buffer import PathBuffer
from garage.sampler import RaySampler, VecWorker
from garage.trainer import TFTrainer
from garage.torch import set_gpu_mode
from garage.torch.algos import SAC
from garage.torch.policies import TanhGaussianMLPPolicy
from garage.torch.q_functions import ContinuousMLPQFunction

from panda_env import PandaEnv

"""Snapshotter snapshots training data.

When training, it saves data to binary files. When resuming,
it loads from saved data.

Args:
    snapshot_dir (str): Path to save the log and iteration snapshot.
    snapshot_mode (str): Mode to save the snapshot. Can be either "all"
        (all iterations will be saved), "last" (only the last iteration
        will be saved), "gap" (every snapshot_gap iterations are saved),
        "gap_and_last" (save the last iteration as 'params.pkl' and save
        every snapshot_gap iteration separately), "gap_overwrite" (same as
        gap but overwrites the last saved snapshot), or "none" (do not
        save snapshots).
    snapshot_gap (int): Gap between snapshot iterations. Wait this number
        of iterations before taking another snapshot.

"""

@wrap_experiment(snapshot_mode='last')
def garage_sac_panda_position(ctxt=None, seed=1):
    """Set up environment and algorithm and run the task.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by Trainer to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.

    """
    deterministic.set_seed(seed)

    with TFTrainer(ctxt) as trainer:

        env = normalize(GymEnv(PandaEnv(), max_episode_length=1000))

        policy = TanhGaussianMLPPolicy(
            env_spec=env.spec,
            hidden_sizes=[256, 256],
            hidden_nonlinearity=nn.ReLU,
            output_nonlinearity=None,
            min_std=np.exp(-20.),
            max_std=np.exp(2.),
        )

        qf1 = ContinuousMLPQFunction(env_spec=env.spec,
                                     hidden_sizes=[256, 256],
                                     hidden_nonlinearity=F.relu)

        qf2 = ContinuousMLPQFunction(env_spec=env.spec,
                                     hidden_sizes=[256, 256],
                                     hidden_nonlinearity=F.relu)

        replay_buffer = PathBuffer(capacity_in_transitions=int(1e6))

        sampler = RaySampler(agents=policy,
                             envs=env,
                             max_episode_length=env.spec.max_episode_length,
                             n_workers=2,
                             is_tf_worker=True,
                             worker_class=VecWorker,
                             worker_args=dict(n_envs=6)
        )

        sac = SAC(env_spec=env.spec,
                  policy=policy,
                  qf1=qf1,
                  qf2=qf2,
                  sampler=sampler,
                  gradient_steps_per_itr=1000,
                  max_episode_length_eval=5000,
                  replay_buffer=replay_buffer,
                  min_buffer_size=1e4,
                  target_update_tau=5e-3,
                  discount=0.99,
                  buffer_batch_size=256,
                  reward_scale=1.,
                  steps_per_epoch=1)

        if torch.cuda.is_available():
            set_gpu_mode(True)
        else:
            set_gpu_mode(False)
        sac.to()
        trainer.setup(algo=sac, env=env)
        # trainer.train(n_epochs=3000, batch_size=1000, plot=True)
        trainer.train(n_epochs=3000, batch_size=1000)

s = np.random.randint(0, 1000)
garage_sac_panda_position(seed=521)

And the resulting Error:

Traceback (most recent call last):
  File "scripts/garage_sac.py", line 117, in <module>
    garage_sac_panda_position(seed=521)
  File "/home/bara/.local/lib/python3.8/site-packages/garage/experiment/experiment.py", line 369, in __call__
    result = self.function(ctxt, **kwargs)
  File "scripts/garage_sac.py", line 113, in garage_sac_panda_position
    trainer.train(n_epochs=3000, batch_size=1000)
  File "/home/bara/.local/lib/python3.8/site-packages/garage/trainer.py", line 402, in train
    average_return = self._algo.train(self)
  File "/home/bara/.local/lib/python3.8/site-packages/garage/torch/algos/sac.py", line 195, in train
    trainer.step_episode = trainer.obtain_samples(
  File "/home/bara/.local/lib/python3.8/site-packages/garage/trainer.py", line 260, in obtain_samples
    eps = self.obtain_episodes(itr, batch_size, agent_update, env_update)
  File "/home/bara/.local/lib/python3.8/site-packages/garage/trainer.py", line 224, in obtain_episodes
    episodes = self._sampler.obtain_samples(
  File "/home/bara/.local/lib/python3.8/site-packages/garage/sampler/ray_sampler.py", line 188, in obtain_samples
    upd = [ray.get(up) for up in updated]
  File "/home/bara/.local/lib/python3.8/site-packages/garage/sampler/ray_sampler.py", line 188, in <listcomp>
    upd = [ray.get(up) for up in updated]
  File "/home/bara/.local/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/home/bara/.local/lib/python3.8/site-packages/ray/worker.py", line 1713, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError: ray::SamplerWorker.update() (pid=8853, ip=192.168.1.217, repr=<garage.sampler.ray_sampler.SamplerWorker object at 0x7faf01bafdf0>)
  At least one of the input arguments for this task could not be computed:
ray.exceptions.RaySystemError: System error: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.
traceback: Traceback (most recent call last):
  File "/home/bara/.local/lib/python3.8/site-packages/ray/serialization.py", line 281, in deserialize_objects
    obj = self._deserialize_object(data, metadata, object_ref)
  File "/home/bara/.local/lib/python3.8/site-packages/ray/serialization.py", line 194, in _deserialize_object
    return self._deserialize_msgpack_data(data, metadata_fields)
  File "/home/bara/.local/lib/python3.8/site-packages/ray/serialization.py", line 172, in _deserialize_msgpack_data
    python_objects = self._deserialize_pickle5_data(pickle5_data)
  File "/home/bara/.local/lib/python3.8/site-packages/ray/serialization.py", line 162, in _deserialize_pickle5_data
    obj = pickle.loads(in_band)
  File "/home/bara/.local/lib/python3.8/site-packages/torch/storage.py", line 141, in _load_from_bytes
    print(torch.load(io.BytesIO(b)))
  File "/home/bara/.local/lib/python3.8/site-packages/torch/serialization.py", line 595, in load
    return _legacy_load(opened_file, map_location, pickle_module, **pickle_load_args)
  File "/home/bara/.local/lib/python3.8/site-packages/torch/serialization.py", line 774, in _legacy_load
    result = unpickler.load()
  File "/home/bara/.local/lib/python3.8/site-packages/torch/serialization.py", line 730, in persistent_load
    deserialized_objects[root_key] = restore_location(obj, location)
  File "/home/bara/.local/lib/python3.8/site-packages/torch/serialization.py", line 175, in default_restore_location
    result = fn(storage, location)
  File "/home/bara/.local/lib/python3.8/site-packages/torch/serialization.py", line 151, in _cuda_deserialize
    device = validate_cuda_device(location)
  File "/home/bara/.local/lib/python3.8/site-packages/torch/serialization.py", line 135, in validate_cuda_device
    raise RuntimeError('Attempting to deserialize object on a CUDA '
RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU

I'm using Ubuntu 20.04, AMD CPU (don't know if it can help) and a RTX 2060 Super. Thanks in advance!

krzentner commented 2 years ago

This issue appears to be a duplicate of #2230. One solution is available in this commit, which I should probably merge. You can also work around it by using MultiprocessingSampler instead of RaySampler.

bara-bba commented 2 years ago

I tried with MultiprocessingSampler but gives me the same error