microsoft / qlib

Qlib is an AI-oriented quantitative investment platform that aims to realize the potential, empower research, and create value using AI technologies in quantitative investment, from exploring ideas to implementing productions. Qlib supports diverse machine learning modeling paradigms. including supervised learning, market dynamics modeling, and RL.
https://qlib.readthedocs.io/en/latest/
MIT License
14.54k stars 2.53k forks source link

How to set region in backtest ? #1801

Open TompaBay opened 4 weeks ago

TompaBay commented 4 weeks ago

I'm currently using the following code for testing over US data, I have a question about trading cost. Based on what I've learned, US market has different rule of trading cost compared to Chinese market. US market's trading cost is based on share, while Chinese market's trading cost is based on trading value, so I wonder how to set the trading cost if I'm going to test on US market. Thanks!   `from pprint import pprint

import qlib import pandas as pd from qlib.utils.time import Freq from qlib.utils import flatten_dict from qlib.backtest import backtest, executor from qlib.contrib.evaluate import risk_analysis from qlib.contrib.strategy import TopkDropoutStrategy

if name == "main":

qlib.init(provider_uri=r"../../benchmark/us_data/qlib_data/")
score_df = pd.read_csv("../gbrt.csv")
score_df['instrument'] = score_df['instrument'].astype(str)
print(score_df)
start = "2023-01-01"
end = "2023-12-31"
score_df = score_df[(score_df['datetime'] >= start) & (score_df['datetime'] <= end)]
score_df.sort_values(by=['datetime', 'instrument'], inplace=True)

score_df["datetime"] = pd.to_datetime(score_df["datetime"])
pred_score = score_df.set_index(["datetime", "instrument"])["score"]
SP500_BENCH = "SPX"
FREQ = "day"
STRATEGY_CONFIG = {
    "topk": 50,
    "n_drop": 10,
    "signal": pred_score,

    # "class": "TopkDropoutStrategy",
    # "module_path": "qlib.contrib.strategy",
    # "kwargs": {"signal": pred_score, "topk": 50, "n_drop": 5},

}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
    "verbose": True,
    #  "class": "SimulatorExecutor",
    #     "module_path": "qlib.backtest.executor",
    #     "kwargs": {
    #         "time_per_step": "day",
    #         "generate_portfolio_metrics": True,
    #     },
}

backtest_config = {
    "start_time": start,
    "end_time": end,
    "account": 100000000,
    "benchmark": SP500_BENCH,
    "exchange_kwargs": {
        "trade_unit": 1,
        "freq": FREQ,
        "limit_threshold": None,
        "deal_price": "close",
        "open_cost": 0.0000229,
        "close_cost": 0,
        "min_cost": 0.01,
    },
}

strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)

portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

report_normal, positions_normal = portfolio_metric_dict.get(analysis_freq)

analysis = dict()
analysis["excess_return_without_cost"] = risk_analysis(
    report_normal["return"] - report_normal["bench"], freq=analysis_freq
)
analysis["excess_return_with_cost"] = risk_analysis(
    report_normal["return"] - report_normal["bench"] - report_normal["cost"], freq=analysis_freq
)

analysis_df = pd.concat(analysis)  # type: pd.DataFrame
# log metrics
analysis_dict = flatten_dict(analysis_df["risk"].unstack().T.to_dict())
# print out results
pprint(f"The following are analysis results of benchmark return({analysis_freq}).")
pprint(risk_analysis(report_normal["bench"], freq=analysis_freq))
pprint(f"The following are analysis results of the excess return without cost({analysis_freq}).")
pprint(analysis["excess_return_without_cost"])
pprint(f"The following are analysis results of the excess return with cost({analysis_freq}).")
pprint(analysis["excess_return_with_cost"])

`