`BaseEnv.render()` Returns the Last Frame of the Previous Episode After `BaseEnv.reset()`

hesic73 commented 4 days ago

In my training script, I have two environments:

One environment for collecting online data.
An evaluation environment (eval_env) with num_envs > 1, which I use to periodically evaluate the policy.

However, from the second evaluation onwards, the first frame in the recorded video is always the last frame from the previous episode. This happens even though I explicitly call eval_env.reset() before starting the evaluation.

While I consistently observe this issue in my training script, I cannot reproduce it with the following minimal script. Besides, if I set num_envs=1 in my training script (but still run it on GPU), the issue does not occur.

Do you have any insights into what might be causing this issue, especially given that it only occurs with num_envs > 1?

import numpy as np
from typing import List, Tuple, Optional, Callable, Dict
import imageio.v3 as iio
from gymnasium import Wrapper, Env
import gymnasium as gym

from mani_skill.envs import BaseEnv
from mani_skill.utils.structs import Actor, Pose
from mani_skill.utils import sapien_utils
from mani_skill.sensors.camera import CameraConfig

import torch

from typing import Any, Optional, Tuple, Sequence, Dict, List, Union, Callable

from mani_skill.utils.registration import register_env
import time

@register_env("MyEnv-v0", max_episode_steps=50)
class MyEnv(BaseEnv):
    def __init__(self, *args, **kwargs):
        self._first_initialized = False
        super().__init__(*args, **kwargs)

    @property
    def _default_human_render_camera_configs(self):
        pose = sapien_utils.look_at([0.5, 0.5, 1.2], [0.0, 0.0, 0.5])
        return [CameraConfig("render_camera", pose, 640, 480, 1, 0.01, 100)]

    def _load_scene(self, options: dict):
        builder = self.scene.create_actor_builder()
        half_size = 0.05
        builder.add_box_collision(
            half_size=[half_size] * 3,
        )
        builder.add_box_visual(
            half_size=[half_size] * 3,
            material=[1, 0, 0],
        )

        self.cube = builder.build(name='cube')

    def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
        self.cube.set_pose(Pose.create_from_pq(
            p=torch.tensor([0, 0, 0.5], device=self.device)))
        self.agent.robot.set_pose(Pose.create_from_pq(
            p=torch.tensor([-100, -100, 0.0], device=self.device),
        ))
        pass

    def evaluate(self) -> dict:
        fail = torch.zeros(self.num_envs, dtype=torch.bool, device=self.device)
        return dict(fail=fail)

class _PreStepOrAnyDoneWrapper(Wrapper):
    def __init__(self, env, func: Callable[[Env,], None], done_indices: Optional[List[int]] = None):
        super().__init__(env)
        self._func = func
        self._done_indices = done_indices

    def step(self, action):
        self._func(self.env)
        obs, reward, terminated, truncated, info = self.env.step(action)
        done = terminated | truncated
        if self._done_indices is None:
            any_done = done.any()
        else:
            any_done = done[self._done_indices].any()
        if any_done:
            self._func(self.env)
        return obs, reward, terminated, truncated, info

class VideoRecorder:
    def __init__(self):
        self._frames: List[np.ndarray] = []

    def record_frame(self, frame: np.ndarray):
        self._frames.append(frame)

    def reset(self):
        self._frames = []

    def save(self, path: str):
        if not self._frames:
            raise ValueError("No frames to save")
        iio.imwrite(path, self._frames, fps=30, codec='libx264')

def rollout_until_first_done(env: BaseEnv):
    obs, info = env.reset()
    while True:
        action = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(action=action)
        done = terminated | truncated
        if done[0].item():
            break

def main():

    env = gym.make(id="MyEnv-v0",
                   robot_uids='panda',
                   render_mode='rgb_array',
                   sim_backend='gpu',
                   reward_mode='none',
                   num_envs=128,
                   )

    video_recorder = VideoRecorder()

    def record_frame_func(env: BaseEnv):
        frame: torch.Tensor = env.render()
        if len(frame.shape) > 3:
            assert len(frame.shape) == 4
            frame = frame[0]
        assert len(frame.shape) == 3  # (H, W, C)
        frame = frame.cpu().numpy()
        video_recorder.record_frame(frame)

    env = _PreStepOrAnyDoneWrapper(env, record_frame_func, done_indices=[0])

    rollout_until_first_done(env)

    video_recorder.reset()
    rollout_until_first_done(env)
    video_recorder.save('demo.mp4')

if __name__ == '__main__':
    main()

StoneT2000 commented 2 days ago

Are you saying the issue is in your own video recording code or the RecordEpisode wrapper we provide?

hesic73 commented 2 days ago

In my own code.

StoneT2000 commented 2 days ago

I see. You mention this minimal script can't reproduce the issue? I'm not sure how I can help debug here.

My only guess is that maybe you are using the vector env wrapper which converts the maniskill env into the gymnasium vector env API. That wrapper by default has auto resets, which calls env.reset. I am guessing it does not call your video reset function perhaps in the way you think it does.

haosulab / ManiSkill

`BaseEnv.render()` Returns the Last Frame of the Previous Episode After `BaseEnv.reset()` #621