different observation with/without `stack=True`

When I set stack = True/False for the same environment and get the first observation&reward&action: for stack case, the first observation is dataset_s['observations'][0][0,:]) for unstack case, the first observation is dataset['observations'][0,:]).

The question is, in both cases, the reward list and the action list is same. But the observation list in stack/unstack cases are different. I attached the first observation in stack/unstack case. I wonder what the reason is? Could you please explain it? Thanks in advance.

stacked case

unstacked case

here's the code:

import gym
import d4rl_atari
import pickle
import numpy as np
import matplotlib.pyplot as plt

def test_stack():
    env_s = gym.make('ms-pacman-expert-v0', stack=True) # -v{0, 1, 2, 3, 4} for datasets with the other random seeds
    env_s.reset()
    dataset_s = env_s.get_dataset()
    ob_s = dataset_s['observations'][0]
    # print(len(ob_s)) 1m
    # print(ob_s[0].shape) (4,84,84)
    re_s = dataset_s['rewards']
    # print(re_s.shape) (1m,)

    env = gym.make('ms-pacman-expert-v0', stack=False)
    env.reset()
    dataset = env.get_dataset()
    ob = dataset['observations'][0,:]
    re = dataset['rewards']
    print(np.sum(re != re_s))  # 0, so reward sequence is same
    a_s = dataset_s['actions']
    a = dataset['actions']
    print(np.sum(a_s != a)) #0, so action sequence is same
    o_s = ob_s[0,:]
    plt.imshow(o_s)
    plt.show()
    o = ob[0,:]
    plt.imshow(o)
    plt.show()
    # print(np.sum(o_s != o))

if __name__ == '__main__':
    test_stack()

takuseno / d4rl-atari

different observation with/without `stack=True` #6