[Bug] "The kernel has died..." during Ray tune.run

Search before asking

[x] I searched the issues and found no similar issues.

Ray Component

Ray Core, Ray Tune

What happened + What you expected to happen

I'm trying to start notebook from this article locally. I slightly modified this notebook and added loop in which I will train a few agents. But after ~15 minutes after loop start I get this error:

The kernel has died, and the automatic restart has failed. It is possible the kernel cannot be restarted. If you are not able to restart the kernel, you will still be able to save the notebook, but running code will no longer work until the notebook is reopened.

The last time I also saw this output in addition to "The kernel has died...":

WARNING tune.py:582 -- SIGINT received (e.g. via Ctrl+C), ending Ray Tune run. This will try to checkpoint the experiment state one last time. Press CTRL+C one more time (or send SIGINT/SIGKILL/SIGTERM) to skip.

But I was AFK, and couldn't use Ctrl+C What is the problem and how can I fix it?

Versions / Dependencies

Ray: 1.9.2 Python: 3.9.7 FinRL: 0.3.3

Reproduction script

#Importing the libraries
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime
import optuna
%matplotlib inline
from finrl.apps import config
from finrl.finrl_meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.finrl_meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.finrl_meta.env_stock_trading.env_stocktrading_np import StockTradingEnv as StockTradingEnv_numpy 
from finrl.drl_agents.rllib.models import DRLAgent as DRLAgent_rllib
from stable_baselines3.common.vec_env import DummyVecEnv
from finrl.finrl_meta.data_processor import DataProcessor
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
import ray
from pprint import pprint
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.agents.ddpg import DDPGTrainer
from ray.rllib.agents.a3c import A2CTrainer
from ray.rllib.agents.a3c import a2c
from ray.rllib.agents.ddpg import ddpg, td3
from ray.rllib.agents.ppo import ppo
from ray.rllib.agents.sac import sac
import sys
sys.path.append("../FinRL-Library")
import os
import itertools
from ray import tune
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.optuna import OptunaSearch

from ray.tune.registry import register_env

import time
from typing import Dict, Optional, Any

def sample_a2c_params():
    return{
        "lambda": 0.9,
        "entropy_coeff": 0.0000066,
        "lr": 0.0003
    }
MODELS = {"a2c": a2c, "ddpg": ddpg, "td3": td3, "sac": sac, "ppo": ppo}

def get_train_env(start_date, end_date, ticker_list, data_source, time_interval, 
          technical_indicator_list, env, model_name, if_vix = True,
          **kwargs):

    #fetch data
    DP = DataProcessor(data_source, **kwargs)
    data = DP.download_data(ticker_list, start_date, end_date, time_interval)
    data = DP.clean_data(data)
    data = DP.add_technical_indicator(data, technical_indicator_list)
    if if_vix:
        data = DP.add_vix(data)
    price_array, tech_array, turbulence_array = DP.df_to_array(data, if_vix)
    train_env_config = {'price_array':price_array,
              'tech_array':tech_array,
              'turbulence_array':turbulence_array,
              'if_train':True}

    return train_env_config

TRAIN_START_DATE = '2014-01-01'
TRAIN_END_DATE = '2019-07-30'

VAL_START_DATE = '2019-08-01'
VAL_END_DATE = '2020-07-30'

TEST_START_DATE = '2020-08-01'
TEST_END_DATE = '2021-10-01'

technical_indicator_list =config.TECHNICAL_INDICATORS_LIST

model_name = 'a2c'
env = StockTradingEnv_numpy
ticker_list = ['TSLA']
data_source = 'yahoofinance'
time_interval = '1D'

train_env_config = get_train_env(TRAIN_START_DATE, VAL_END_DATE, 
                     ticker_list, data_source, time_interval, 
                        technical_indicator_list, env, model_name)

from ray.tune.registry import register_env

env_name = 'StockTrading_train_env'
register_env(env_name, lambda config: env(train_env_config))

MODEL_TRAINER = {'a2c':A2CTrainer,'ppo':PPOTrainer,'ddpg':DDPGTrainer}
if model_name == "ddpg":
    sample_hyperparameters = sample_ddpg_params()
elif model_name == "ppo":
    sample_hyperparameters = sample_ppo_params()
elif model_name == "a2c":
    sample_hyperparameters = sample_a2c_params()

def run_optuna_tune():

    algo = OptunaSearch()
    algo = ConcurrencyLimiter(algo,max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    num_samples = 5
    training_iterations = 10

    analysis = tune.run(
      MODEL_TRAINER[model_name],
      metric="episode_reward_mean", #The metric to optimize for tuning
      mode="max", #Maximize the metric
#       search_alg = algo,#OptunaSearch method which uses Tree Parzen estimator to sample hyperparameters
      scheduler=scheduler, #To prune bad trials
      config = {**sample_hyperparameters,
                'env':'StockTrading_train_env','num_workers':1,
                'num_gpus':0,'framework':'torch'},
      num_samples = num_samples, #Number of hyperparameters to test out
      stop = {'training_iteration':training_iterations},#Time attribute to validate the results
      verbose=1,local_dir="./tuned_models",#Saving tensorboard plots
      # resources_per_trial={'gpu':1,'cpu':1},
      max_failures = 1,#Extra Trying for the failed trials
      raise_on_failed_trial=False,#Don't return error even if you have errored trials
      keep_checkpoints_num = num_samples-5, 
      checkpoint_score_attr ='episode_reward_mean',#Only store keep_checkpoints_num trials based on this score
      checkpoint_freq=training_iterations#Checpointing all the trials
    )
    print("Best hyperparameter: ", analysis.best_config)
    return analysis

for i in range(30):
    analysis = run_optuna_tune()

Anything else

No response

Are you willing to submit a PR?

[ ] Yes I am willing to submit a PR!

ray-project / ray

[Bug] "The kernel has died..." during Ray tune.run #21917