DLR-RM / stable-baselines3

PyTorch version of Stable Baselines, reliable implementations of reinforcement learning algorithms.
https://stable-baselines3.readthedocs.io
MIT License
8.92k stars 1.68k forks source link

[Question] Terminated due to signal: ILLEGAL INSTRUCTION (4) #1265

Closed austinmw closed 1 year ago

austinmw commented 1 year ago

❓ Question

Hi, I'm attempting to train PPO on the Chrome Dino game using Selenium and running into the following error:

Terminated due to signal: ILLEGAL INSTRUCTION (4)

I'm running on a M1 MacBook Pro with Python 3.10, PyTorch 1.13, numpy 1.24.1, and master branch of stable-baselines3.

My script is:

import base64
import os
import time
from collections import deque
from io import BytesIO
import imageio
from tqdm import tqdm

import cv2
import gym
import numpy as np
from PIL import Image
from gym import spaces
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.ppo.policies import CnnPolicy
from stable_baselines3.common.vec_env import SubprocVecEnv

class EnvironmentChromeTRex(gym.Env):

    def __init__(self,
        screen_width,  # width of the compressed image
        screen_height,  # height of the compressed image
        chromedriver_path: str = 'chromedriver'
    ):
        self.screen_width = screen_width
        self.screen_height = screen_height
        self.chromedriver_path = chromedriver_path
        self.num_observation = 0

        self.action_space = spaces.Discrete(3)  # set of actions: do nothing, jump, down
        self.observation_space = spaces.Box(
            low=0,
            high=255,
            shape=(self.screen_width, self.screen_height, 4),
            dtype=np.uint8
        )
        # connection to chrome
        _chrome_options = webdriver.ChromeOptions()
        _chrome_options.add_argument("--mute-audio")
        _chrome_options.add_argument("disable-infobars")
        # _chrome_options.add_argument("--disable-gpu") # if running on Windows

        self._driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=_chrome_options)

        self.current_key = None
        # current state represented by 4 images
        self.state_queue = deque(maxlen=4)

        self.actions_map = [
            Keys.ARROW_RIGHT,  # do nothing
            Keys.ARROW_UP,  # jump
            Keys.ARROW_DOWN  # down
        ]
        action_chains = ActionChains(self._driver)
        self.keydown_actions = [action_chains.key_down(item) for item in self.actions_map]
        self.keyup_actions = [action_chains.key_up(item) for item in self.actions_map]

    def reset(self):
        try:
            self._driver.get('chrome://dino')
        except WebDriverException as e:
            print(e)

        WebDriverWait(self._driver, 10).until(
            EC.presence_of_element_located((
                By.CLASS_NAME,
                "runner-canvas"
            ))
        )

        # trigger game start
        body = self._driver.find_element(By.ID, "t").send_keys(Keys.SPACE)
        return self._next_observation()

    def _get_image(self):
        LEADING_TEXT = "data:image/png;base64,"
        _img = self._driver.execute_script(
            "return document.querySelector('canvas.runner-canvas').toDataURL()"
        )
        _img = _img[len(LEADING_TEXT):]
        return np.array(
    Image.open(BytesIO(base64.b64decode(_img)))
    )

    def _next_observation(self):
        image = cv2.cvtColor(self._get_image(), cv2.COLOR_BGR2GRAY)
        image = image[:500, :480]  # cropping
        image = cv2.resize(image, (self.screen_width, self.screen_height))

        self.num_observation += 1
        self.state_queue.append(image)

        if len(self.state_queue) < 4:
            return np.stack([image] * 4, axis=-1)
        else:
            return np.stack(self.state_queue, axis=-1)

        return image

    def _get_score(self):
        try:
            num = int(''.join(self._driver.execute_script("return Runner.instance_.distanceMeter.digits")))
        except:
            num = 0
        return num

    def _get_done(self):
        return self._driver.execute_script("return Runner.instance_.crashed")

    def step(self, action: int):
        self._driver.find_element_by_tag_name("body") \
        .send_keys(self.actions_map[action])

        obs = self._next_observation()

        done = self._get_done()
        reward = .1 if not done else -1

        time.sleep(.015)

        return obs, reward, done, {"score": self._get_score()}

    def render(self, mode: str = 'human'):
        img = cv2.cvtColor(self._get_image(), cv2.COLOR_BGR2RGB)
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)
            return self.viewer.isopen

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

if __name__ == '__main__':

    env_lambda = lambda: EnvironmentChromeTRex(
        screen_width=96,
        screen_height=96,
        chromedriver_path=os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            "chromedriver"
        )
    )
    do_train = True
    num_cpu = 1
    save_path = "chrome_dino_ppo_cnn_1980000_steps.zip"
    env = SubprocVecEnv([env_lambda for i in range(num_cpu)])

    if do_train:
        checkpoint_callback = CheckpointCallback(
            save_freq=200000,
            save_path='./.checkpoints/',
            name_prefix=save_path,
        )

        model = PPO(
            CnnPolicy,
            env,
            verbose=1,
            tensorboard_log="./.tb_chromedino_env/",
        )

        model.learn(
            total_timesteps=2000000, callback=[checkpoint_callback]
        )

        model.save(save_path)

    model = PPO.load('./.checkpoints/'+save_path, env=env)

    images = []

    obs = env.reset()

    img = model.env.render(mode='rgb_array')

    for i in tqdm(range(500)):
        images.append(img)
        action, _states = model.predict(obs, deterministic=True)
        obs, rewards, dones, info = env.step(action)

        # env.render(mode='human')

        img = env.render(mode='rgb_array')

    imageio.mimsave('dino.gif', [np.array(img) for i, img in enumerate(images)], fps=15)

    exit()

And the full output is:

Using cpu device
Wrapping the env in a VecTransposeImage.
[W NNPACK.cpp:53] Could not initialize NNPACK! Reason: Unsupported hardware.
Message: unknown error: net::ERR_INTERNET_DISCONNECTED
  (Session info: chrome=108.0.5359.124)
Stacktrace:
0   chromedriver                        0x000000010450ef38 chromedriver + 4910904
1   chromedriver                        0x000000010448ea03 chromedriver + 4385283
2   chromedriver                        0x00000001040d3747 chromedriver + 472903
3   chromedriver                        0x00000001040cb242 chromedriver + 438850
4   chromedriver                        0x00000001040bd17d chromedriver + 381309
5   chromedriver                        0x00000001040be8d5 chromedriver + 387285
6   chromedriver                        0x00000001040bd4ad chromedriver + 382125
7   chromedriver                        0x00000001040bc22f chromedriver + 377391
8   chromedriver                        0x00000001040bc0a0 chromedriver + 376992
9   chromedriver                        0x00000001040ba7f2 chromedriver + 370674
10  chromedriver                        0x00000001040babb4 chromedriver + 371636
11  chromedriver                        0x00000001040d5b68 chromedriver + 482152
12  chromedriver                        0x00000001041599e1 chromedriver + 1022433
13  chromedriver                        0x000000010413e112 chromedriver + 909586
14  chromedriver                        0x000000010415928e chromedriver + 1020558
15  chromedriver                        0x000000010413dee3 chromedriver + 909027
16  chromedriver                        0x000000010410830c chromedriver + 688908
17  chromedriver                        0x000000010410988e chromedriver + 694414
18  chromedriver                        0x00000001044dc1de chromedriver + 4702686
19  chromedriver                        0x00000001044e0b19 chromedriver + 4721433
20  chromedriver                        0x00000001044e828e chromedriver + 4752014
21  chromedriver                        0x00000001044e191a chromedriver + 4725018
22  chromedriver                        0x00000001044b5b02 chromedriver + 4545282
23  chromedriver                        0x0000000104500888 chromedriver + 4851848
24  chromedriver                        0x0000000104500a05 chromedriver + 4852229
25  chromedriver                        0x0000000104516e5f chromedriver + 4943455
26  libsystem_pthread.dylib             0x00007ff8037354e1 _pthread_start + 125
27  libsystem_pthread.dylib             0x00007ff803730f6b thread_start + 15

Terminated due to signal: ILLEGAL INSTRUCTION (4)

I'm new to stable-baselines and not sure how to debug this error. Google didn't turn up much in the way of similar issues. Any help is greatly appreciated!

Checklist

qgallouedec commented 1 year ago

Your question is actually about a custom environment. I am not able to tell you what the problem is but a very useful tool to help you debug your env:

from stable_baselines3.common.env_checker import check_env

env = CustomEnv(arg1, ...)
# It will check your custom environment and output additional warnings if needed
check_env(env)

I close this issue because it is not in the right section. If the problem persists, I invite you to reopen a new issue by choosing the right template: Issues > Issue: 🤖 Custom Gym Environment Issue, which will help you explain your problem better so that we can better spot the error.

qgallouedec commented 1 year ago

Check that also https://discuss.pytorch.org/t/bug-w-nnpack-cpp-80-could-not-initialize-nnpack-reason-unsupported-hardware/107518/6