kernels get killed OOM when running 1min data REG_CN

I 'm running 1min data ,the script as below, swap mem is 128G, It will be killed by OOM, why? start time:2020-09-14 end_time: 2021-06-21

and all.txt contains 4067 stocks. I attached all.txt at the end. I downloaded these 1min data from yahoo by： python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min

I used 8 kernels

how should I configure?

`#! /usr/bin/env python import os import qlib

import pandas as pd from qlib.contrib.data.handler import Alpha158 from qlib.data.dataset import TSDatasetH from qlib.contrib.model.pytorch_alstm_ts import ALSTM from qlib.tests.data import GetData from qlib.constant import REG_CN from qlib.data import D from qlib.contrib.report import analysis_model, analysis_position import multiprocessing from qlib.utils import exists_qlib_data, init_instance_by_config from qlib.workflow import R from qlib.workflow.record_temp import SignalRecord, PortAnaRecord from qlib.utils import flatten_dict

if name == "main": multiprocessing.freeze_support()

provider_uri = "~/.qlib/qlib_data/cn_data_1min"  # target_dir

qlib.init(provider_uri=provider_uri)

GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
qlib.init(provider_uri=provider_uri, region=REG_CN,kernels=8, expression_cache=None, dataset_cache=None)

D.calendar(start_time='2020-09-14', end_time='2021-06-21',freq='1min')
benchmark = "SH000300"

instruments = D.instruments(market='all')
#print (type(instruments))
#print (instruments)

stock_list = D.list_instruments(instruments=instruments,
start_time='2020-09-14',
end_time='2021-06-21',
freq = '1min',
as_list=True)

#D.calendar()

# 设置日期、股票池等参数
data_handler_config = {
"start_time":"2020-09-14",
"end_time":"2021-06-21",
"fit_start_time":"2020-09-20",
"fit_end_time":"2021-06-18",
"freq":'1min',
"instruments":stock_list }

h = Alpha158(**data_handler_config)

# 获取列名(因子名称)
#print(h.get_cols())

Alpha158_df_feature = h.fetch(col_set="feature")

#print(Alpha158_df_feature)

task = {
"model": {
"class": "LGBModel",
"module_path": "qlib.contrib.model.gbdt",
"kwargs": {
"loss": "mse",
"colsample_bytree": 0.8879,
"learning_rate": 0.0421,
"subsample": 0.8789,
"lambda_l1": 205.6999,
"lambda_l2": 580.9768,
"max_depth": 8,
"num_leaves": 210,
"num_threads": 20,
        },
    },
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "Alpha158",
"module_path": "qlib.contrib.data.handler",
"kwargs": data_handler_config,
            },
"segments": {
"train": ("2020-09-14", "2020-11-30"),
"valid": ("2020-12-01", "2021-02-28"),
"test": ("2021-03-01", "2021-06-20"),
            },
        },
    },
}

# model initiaiton
model = init_instance_by_config(task["model"]) #
dataset = init_instance_by_config(task["dataset"])
# start exp to train model
with R.start(experiment_name="train_model"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)  #拟合模型
        R.save_objects(trained_model=model)
        rid = R.get_recorder().id

###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "executor": {
        "class": "SimulatorExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "1min",
            "generate_portfolio_metrics": True,
        },
    },
    "strategy": {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "model": model,
            "dataset": dataset,
            "topk": 50,
            "n_drop": 5,
        },
    },
    "backtest": {
        "start_time": "2020-09-14",
        "end_time": "2021-06-15",
        "account": 1000,
        "benchmark": benchmark,
        "exchange_kwargs": {
            "freq": "1min",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

# backtest and analysis

with R.start(experiment_name="backtest_analysis"):
    recorder = R.get_recorder(recorder_id=rid, experiment_name="train_model")
    model = recorder.load_object("trained_model")

    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    par = PortAnaRecord(recorder, port_analysis_config, "1min")
    par.generate()

''' recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="backtest_analysis")

    label_df = dataset.prepare("test", col_set="label")
    label_df.columns = ["label"]

    report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1min.pkl")
    positions = recorder.load_object("portfolio_analysis/positions_normal_1min.pkl")
    analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1min.pkl")

    analysis_position.report_graph(report_normal_df)
    analysis_position.risk_analysis_graph(analysis_df, report_normal_df)

    pred_df = recorder.load_object("pred.pkl")
    pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)

    print (f"label_df size:{len(label_df)}")
    print (f"pred_label size:{len(pred_label)}")
    #os._exit(0)
    ###analysis_position.score_ic_graph(pred_label)

    ###analysis_model.model_performance_graph(pred_label)

''' `

all.txt

microsoft / qlib

kernels get killed OOM when running 1min data REG_CN #1826