AlignmentResearch / vlmrm

MIT License
44 stars 12 forks source link

Problems about installations #5

Open LemonZhong opened 3 months ago

LemonZhong commented 3 months ago

This is quite a good work. I used pip install dev.. to install, but there are quite a lot problems.

(vlmrm) root@autodl-container-d33848b29e-3752a142:~/vlmrm/vlmrm# vlmrm train "$(cat config.yaml)" /root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/pydantic/_internal/_fields.py:127: UserWarning: Field "modelcheckpoint" has conflict with protected namespace "model".

You may be able to resolve this warning by setting model_config['protected_namespaces'] = (). warnings.warn( /root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/pydantic/_internal/_fields.py:127: UserWarning: Field "model_basepath" has conflict with protected namespace "model".

You may be able to resolve this warning by setting model_config['protected_namespaces'] = (). warnings.warn( 2024-08-02 19:13:27.076 | INFO | vlmrm.trainer.train:train:146 - Command called: /root/miniconda3/envs/vlmrm/bin/vlmrm train env_name: Humanoid-v4 # RL environment name base_path: /data/runs/training # Base path to save logs and checkpoints seed: 42 # Seed for reproducibility description: Humanoid training using CLIP reward tags: # Wandb tags

You may be able to resolve this warning by setting model_config['protected_namespaces'] = (). warnings.warn( /root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/pydantic/_internal/_fields.py:127: UserWarning: Field "model_basepath" has conflict with protected namespace "model".

You may be able to resolve this warning by setting model_config['protected_namespaces'] = (). warnings.warn( 2024-08-02 19:13:30.281 | INFO | vlmrm.trainer.train:primary_worker:69 - Creating environment instance /root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/pydantic/_internal/_fields.py:127: UserWarning: Field "modelcheckpoint" has conflict with protected namespace "model".

You may be able to resolve this warning by setting model_config['protected_namespaces'] = (). warnings.warn( /root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/pydantic/_internal/_fields.py:127: UserWarning: Field "model_basepath" has conflict with protected namespace "model".

You may be able to resolve this warning by setting model_config['protected_namespaces'] = (). warnings.warn( Process ForkServerProcess-1:2: Traceback (most recent call last): File "/root/miniconda3/envs/vlmrm/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap self.run() File "/root/miniconda3/envs/vlmrm/lib/python3.9/multiprocessing/process.py", line 108, in run self._target(self._args, self._kwargs) File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 29, in _worker env = _patch_env(env_fn_wrapper.var()) File "/root/vlmrm/vlmrm/src/vlmrm/contrib/sb3/make_vec_env.py", line 66, in _init env = make_env_fn(env_kwargs) File "/root/vlmrm/vlmrm/src/vlmrm/envs/base.py", line 25, in make_env_wrapper env = gymnasium.make( File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/gymnasium/envs/registration.py", line 755, in make env_creator = load_env_creator(env_spec.entry_point) File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/gymnasium/envs/registration.py", line 553, in load_env_creator mod = importlib.import_module(mod_name) File "/root/miniconda3/envs/vlmrm/lib/python3.9/importlib/init.py", line 127, in import_module return _bootstrap._gcd_import(name[level:], package, level) File "", line 1030, in _gcd_import File "", line 1007, in _find_and_load File "", line 986, in _find_and_load_unlocked File "", line 680, in _load_unlocked File "", line 790, in exec_module File "", line 228, in _call_with_frames_removed File "/root/vlmrm/vlmrm/src/vlmrm/envs/mujoco/clip_rewarded_humanoid.py", line 6, in from gymnasium.envs.mujoco import MujocoEnv File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/gymnasium/envs/mujoco/init.py", line 1, in from gymnasium.envs.mujoco.mujoco_env import MujocoEnv, MuJocoPyEnv # isort:skip File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/gymnasium/envs/mujoco/mujoco_env.py", line 19, in import mujoco File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/mujoco/init.py", line 48, in from mujoco.gl_context import File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/mujoco/gl_context.py", line 41, in from mujoco.egl import GLContext as _GLContext File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/mujoco/egl/init.py", line 31, in from mujoco.egl import egl_ext as EGL File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/mujoco/egl/egl_ext.py", line 27, in from OpenGL import EGL File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/OpenGL/EGL/init.py", line 2, in from OpenGL.raw.EGL._types import * File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/OpenGL/raw/EGL/_types.py", line 87, in raw_eglQueryString = _p.PLATFORM.EGL.eglQueryString AttributeError: 'NoneType' object has no attribute 'eglQueryString' /root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/pydantic/_internal/_fields.py:127: UserWarning: Field "modelcheckpoint" has conflict with protected namespace "model".

You may be able to resolve this warning by setting model_config['protected_namespaces'] = (). warnings.warn( /root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/pydantic/_internal/_fields.py:127: UserWarning: Field "model_basepath" has conflict with protected namespace "model".

You may be able to resolve this warning by setting model_config['protected_namespaces'] = (). warnings.warn( Process ForkServerProcess-1:1: Traceback (most recent call last): File "/root/miniconda3/envs/vlmrm/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap self.run() File "/root/miniconda3/envs/vlmrm/lib/python3.9/multiprocessing/process.py", line 108, in run self._target(self._args, self._kwargs) File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 29, in _worker env = _patch_env(env_fn_wrapper.var()) File "/root/vlmrm/vlmrm/src/vlmrm/contrib/sb3/make_vec_env.py", line 66, in _init env = make_env_fn(env_kwargs) File "/root/vlmrm/vlmrm/src/vlmrm/envs/base.py", line 25, in make_env_wrapper env = gymnasium.make( File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/gymnasium/envs/registration.py", line 755, in make env_creator = load_env_creator(env_spec.entry_point) File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/gymnasium/envs/registration.py", line 553, in load_env_creator mod = importlib.import_module(mod_name) File "/root/miniconda3/envs/vlmrm/lib/python3.9/importlib/init.py", line 127, in import_module return _bootstrap._gcd_import(name[level:], package, level) File "", line 1030, in _gcd_import File "", line 1007, in _find_and_load File "", line 986, in _find_and_load_unlocked File "", line 680, in _load_unlocked File "", line 790, in exec_module File "", line 228, in _call_with_frames_removed File "/root/vlmrm/vlmrm/src/vlmrm/envs/mujoco/clip_rewarded_humanoid.py", line 6, in from gymnasium.envs.mujoco import MujocoEnv File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/gymnasium/envs/mujoco/init.py", line 1, in from gymnasium.envs.mujoco.mujoco_env import MujocoEnv, MuJocoPyEnv # isort:skip File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/gymnasium/envs/mujoco/mujoco_env.py", line 19, in import mujoco File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/mujoco/init.py", line 48, in from mujoco.gl_context import File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/mujoco/gl_context.py", line 41, in from mujoco.egl import GLContext as _GLContext File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/mujoco/egl/init.py", line 31, in from mujoco.egl import egl_ext as EGL File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/mujoco/egl/egl_ext.py", line 27, in from OpenGL import EGL File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/OpenGL/EGL/init.py", line 2, in from OpenGL.raw.EGL._types import * File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/OpenGL/raw/EGL/_types.py", line 87, in raw_eglQueryString = _p.PLATFORM.EGL.eglQueryString AttributeError: 'NoneType' object has no attribute 'eglQueryString' 2024-08-02 19:13:34.172 | ERROR | vlmrm.trainer.train:train:184 - An error has been caught in function 'train', process 'MainProcess' (21862), thread 'MainThread' (140350658221888): Traceback (most recent call last):

File "/root/miniconda3/envs/vlmrm/bin/vlmrm", line 8, in sys.exit(main()) │ │ └ <function main at 0x7fa5eef223a0> │ └ └ <module 'sys' (built-in)>

File "/root/vlmrm/vlmrm/src/vlmrm/cli/main.py", line 12, in main app() └ <typer.main.Typer object at 0x7fa5eef13c10>

File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/typer/main.py", line 311, in call return get_command(self)(*args, kwargs) │ │ │ └ {} │ │ └ () │ └ <typer.main.Typer object at 0x7fa5eef13c10> └ <function get_command at 0x7fa5ee57df70> File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/click/core.py", line 1157, in call return self.main(args, kwargs) │ │ │ └ {} │ │ └ () │ └ <function TyperGroup.main at 0x7fa5ee572c10> └ File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/typer/core.py", line 778, in main return _main( └ <function _main at 0x7fa5ee56eee0> File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/typer/core.py", line 216, in _main rv = self.invoke(ctx) │ │ └ <click.core.Context object at 0x7fa5e3320bb0> │ └ <function MultiCommand.invoke at 0x7fa5eec660d0> └ File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/click/core.py", line 1688, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) │ │ │ │ └ <click.core.Context object at 0x7fa5e32297c0> │ │ │ └ <function Command.invoke at 0x7fa5eec61b80> │ │ └ │ └ <click.core.Context object at 0x7fa5e32297c0> └ <function MultiCommand.invoke.._process_result at 0x7fa5e3228310> File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/click/core.py", line 1434, in invoke return ctx.invoke(self.callback, ctx.params) │ │ │ │ │ └ {'config': 'env_name: Humanoid-v4 # RL environment name\nbase_path: /data/runs/training # Base path to save logs and checkpoi... │ │ │ │ └ <click.core.Context object at 0x7fa5e32297c0> │ │ │ └ <function train at 0x7fa5e3230670> │ │ └ │ └ <function Context.invoke at 0x7fa5eec5d940> └ <click.core.Context object at 0x7fa5e32297c0> File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/click/core.py", line 783, in invoke return __callback(args, kwargs) │ └ {'config': 'env_name: Humanoid-v4 # RL environment name\nbase_path: /data/runs/training # Base path to save logs and checkpoi... └ () File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/typer/main.py", line 683, in wrapper return callback(**use_params) # type: ignore │ └ {'config': 'env_name: Humanoid-v4 # RL environment name\nbase_path: /data/runs/training # Base path to save logs and checkpoi... └ <function train at 0x7fa5e32303a0>

File "/root/vlmrm/vlmrm/src/vlmrm/trainer/train.py", line 184, in train _train() └ <function train.._train at 0x7fa5e342f940>

File "/root/vlmrm/vlmrm/src/vlmrm/trainer/train.py", line 172, in _train multiprocessing.spawn( │ └ <function spawn at 0x7fa5e3315670> └ <module 'vlmrm.multiprocessing' from '/root/vlmrm/vlmrm/src/vlmrm/multiprocessing.py'>

File "/root/vlmrm/vlmrm/src/vlmrm/multiprocessing.py", line 103, in spawn return start_processes(fn, args, nprocs, join, daemon, start_method="spawn") │ │ │ │ │ └ False │ │ │ │ └ True │ │ │ └ 1 │ │ └ ('nccl', Config(env_name='Humanoid-v4', base_path=PosixPath('/data/runs/training'), seed=42, description='Humanoid training u... │ └ <function init_process at 0x7fa5e3230430> └ <function start_processes at 0x7fa5e33155e0>

File "/root/vlmrm/vlmrm/src/vlmrm/multiprocessing.py", line 59, in start_processes while not context.join(): │ └ <function ProcessContext.join at 0x7fa55372ce50> └ <torch.multiprocessing.spawn.ProcessContext object at 0x7fa5e3421100>

File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 160, in join raise ProcessRaisedException(msg, error_index, failed_process.pid) │ │ │ │ └ <property object at 0x7fa55376ea90> │ │ │ └ │ │ └ 0 │ └ '\n\n-- Process 0 terminated with the following error:\nTraceback (most recent call last):\n File "/root/vlmrm/vlmrm/src/vlm... └ <class 'torch.multiprocessing.spawn.ProcessRaisedException'>

torch.multiprocessing.spawn.ProcessRaisedException:

-- Process 0 terminated with the following error: Traceback (most recent call last): File "/root/vlmrm/vlmrm/src/vlmrm/multiprocessing.py", line 17, in _wrap fn(i, stop_event, *args) File "/root/vlmrm/vlmrm/src/vlmrm/trainer/train.py", line 202, in init_process primary_worker(config, config_dump, stop_event) File "/root/vlmrm/vlmrm/src/vlmrm/trainer/train.py", line 70, in primary_worker vec_env = make_vec_env( File "/root/vlmrm/vlmrm/src/vlmrm/contrib/sb3/make_vec_env.py", line 100, in make_vec_env vec_env = vec_env_cls(make_env_fns, *vec_env_kwargs) File "/root/vlmrm/vlmrm/src/vlmrm/contrib/sb3/subproc_vec_env.py", line 11, in init super().init(args, **kwargs) File "/root/miniconda3/envs/vlmrm/lib/python3.9/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 120, in init observation_space, action_space = self.remotes[0].recv() File "/root/miniconda3/envs/vlmrm/lib/python3.9/multiprocessing/connection.py", line 255, in recv buf = self._recv_bytes() File "/root/miniconda3/envs/vlmrm/lib/python3.9/multiprocessing/connection.py", line 419, in _recv_bytes buf = self._recv(4) File "/root/miniconda3/envs/vlmrm/lib/python3.9/multiprocessing/connection.py", line 384, in _recv chunk = read(handle, remaining) ConnectionResetError: [Errno 104] Connection reset by peer

LemonZhong commented 3 months ago

How can I solve them?