microsoft / qlib

Qlib is an AI-oriented quantitative investment platform that aims to realize the potential, empower research, and create value using AI technologies in quantitative investment, from exploring ideas to implementing productions. Qlib supports diverse machine learning modeling paradigms. including supervised learning, market dynamics modeling, and RL.
https://qlib.readthedocs.io/en/latest/
MIT License
15.18k stars 2.6k forks source link

kernels get killed OOM when running 1min data REG_CN #1826

Open DanielKui opened 2 months ago

DanielKui commented 2 months ago

I 'm running 1min data ,the script as below, swap mem is 128G, It will be killed by OOM, why? start time:2020-09-14 end_time: 2021-06-21

and all.txt contains 4067 stocks. I attached all.txt at the end. I downloaded these 1min data from yahoo by: python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data_1min --region cn --interval 1min

I used 8 kernels

how should I configure?

`#! /usr/bin/env python import os import qlib

import pandas as pd from qlib.contrib.data.handler import Alpha158 from qlib.data.dataset import TSDatasetH from qlib.contrib.model.pytorch_alstm_ts import ALSTM from qlib.tests.data import GetData from qlib.constant import REG_CN from qlib.data import D from qlib.contrib.report import analysis_model, analysis_position import multiprocessing from qlib.utils import exists_qlib_data, init_instance_by_config from qlib.workflow import R from qlib.workflow.record_temp import SignalRecord, PortAnaRecord from qlib.utils import flatten_dict

if name == "main": multiprocessing.freeze_support()

provider_uri = "~/.qlib/qlib_data/cn_data_1min"  # target_dir

qlib.init(provider_uri=provider_uri)

GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
qlib.init(provider_uri=provider_uri, region=REG_CN,kernels=8, expression_cache=None, dataset_cache=None)

D.calendar(start_time='2020-09-14', end_time='2021-06-21',freq='1min')
benchmark = "SH000300"

instruments = D.instruments(market='all')
#print (type(instruments))
#print (instruments)

stock_list = D.list_instruments(instruments=instruments,
start_time='2020-09-14',
end_time='2021-06-21',
freq = '1min',
as_list=True)

#D.calendar()

# 设置日期、股票池等参数
data_handler_config = {
"start_time":"2020-09-14",
"end_time":"2021-06-21",
"fit_start_time":"2020-09-20",
"fit_end_time":"2021-06-18",
"freq":'1min',
"instruments":stock_list }

h = Alpha158(**data_handler_config)

# 获取列名(因子名称)
#print(h.get_cols())

Alpha158_df_feature = h.fetch(col_set="feature")

#print(Alpha158_df_feature)

task = {
"model": {
"class": "LGBModel",
"module_path": "qlib.contrib.model.gbdt",
"kwargs": {
"loss": "mse",
"colsample_bytree": 0.8879,
"learning_rate": 0.0421,
"subsample": 0.8789,
"lambda_l1": 205.6999,
"lambda_l2": 580.9768,
"max_depth": 8,
"num_leaves": 210,
"num_threads": 20,
        },
    },
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "Alpha158",
"module_path": "qlib.contrib.data.handler",
"kwargs": data_handler_config,
            },
"segments": {
"train": ("2020-09-14", "2020-11-30"),
"valid": ("2020-12-01", "2021-02-28"),
"test": ("2021-03-01", "2021-06-20"),
            },
        },
    },
}

# model initiaiton
model = init_instance_by_config(task["model"]) #
dataset = init_instance_by_config(task["dataset"])
# start exp to train model
with R.start(experiment_name="train_model"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)  #拟合模型
        R.save_objects(trained_model=model)
        rid = R.get_recorder().id

###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "executor": {
        "class": "SimulatorExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "1min",
            "generate_portfolio_metrics": True,
        },
    },
    "strategy": {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "model": model,
            "dataset": dataset,
            "topk": 50,
            "n_drop": 5,
        },
    },
    "backtest": {
        "start_time": "2020-09-14",
        "end_time": "2021-06-15",
        "account": 1000,
        "benchmark": benchmark,
        "exchange_kwargs": {
            "freq": "1min",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

# backtest and analysis

with R.start(experiment_name="backtest_analysis"):
    recorder = R.get_recorder(recorder_id=rid, experiment_name="train_model")
    model = recorder.load_object("trained_model")

    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    par = PortAnaRecord(recorder, port_analysis_config, "1min")
    par.generate()

''' recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="backtest_analysis")

    label_df = dataset.prepare("test", col_set="label")
    label_df.columns = ["label"]

    report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1min.pkl")
    positions = recorder.load_object("portfolio_analysis/positions_normal_1min.pkl")
    analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1min.pkl")

    analysis_position.report_graph(report_normal_df)
    analysis_position.risk_analysis_graph(analysis_df, report_normal_df)

    pred_df = recorder.load_object("pred.pkl")
    pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)

    print (f"label_df size:{len(label_df)}")
    print (f"pred_label size:{len(pred_label)}")
    #os._exit(0)
    ###analysis_position.score_ic_graph(pred_label)

    ###analysis_model.model_performance_graph(pred_label)

''' `

image image

image

all.txt

SunsetWolf commented 3 weeks ago

We think it may be caused by the machine having too little memory, we tried using your code but did not reproduce the problem. You can try it by shortening the training data, or you can try a different machine with more memory.