whittlem / pycryptobot

Python Crypto Bot (PyCryptoBot)
Apache License 2.0
1.97k stars 738 forks source link

Log file benchmarker #331

Open NeitsabesD opened 3 years ago

NeitsabesD commented 3 years ago

Is your feature request related to a problem? Please describe. I wanted to benchmark the various simulations I'm performing

Describe the solution you'd like I wrote a small function that read the log files and process it into a panda dataframe

Additional context In a near future it would be great to share log files on the repo to share ou best practices. In my dreams, community would share configs on various currencies, period of time and those configs would be validated by votes


Here are the few lines of code I wrote down, you specify the log folder, it return a panda dataframe with stats. I'm an absolute newbie on git so it is of any interest, please help yourself

import os
import pandas as pd
import numpy as np
import re

def readLogs(log_folder ="../robot_logs/", market="BTCUSD"):
    print(market)
    #Regex pattern to extract dates
    regex_pattern  = r'[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1]) (2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]'

    #read the log folder and look for matching log files
    files = os.listdir(log_folder)
    files = [x for x in files if market in x]
    #results will go there
    granularity = None
    results = []

    #Loop files
    for file in files:
        #print(file)
        lines = open(log_folder+file).readlines()

        #Temp dict will be the variable storing the parameters and results of each log dile
        temp_dict = dict(log_file = file, market=market)
        # We will store in the following lists the various elements of each trades
        margins = []
        profits = []
        sells_filled = []
        log_dates = []
        trade_dates = []
        skip_log = False  #will be used to skip a log file if incomplete
        for line in lines:
            line = line[29:]
            # At each line extract the dates. The first one is the local executing time on the computer, the second if exists is the market time
            line_dates = [x.group() for x in re.finditer(regex_pattern, line)]

            # We will log the dates in a list variable
            if len(line_dates)>0 and 'formed at' not in line and " Bot Started " not in line:
                log_dates.append(line_dates[0])

                # On the first loops, extract the granularity infos 

                if granularity == None and " Bot Started " not in line:
                    try:
                        temp_dict["granularity"] = line.split(' | ')[2]

                    except:
                        skip_log = True
            #Sometime the sims are interrupted by users, if so, we skip the log file
            if skip_log ==True :
                continue
            #We grab the parameter of the bots. Usually " : " is in the line
            if " : " in line:
                x = line.replace('|','').strip().split(":")
                key = x[0].strip() # The parameter name Release, Bot mode, Sell at loss etc...
                if len(x)==0: continue
                # The parameter value, True or False
                if x[1].strip()[0:4]=="True":
                    value=True
                elif x[1].strip()[0:5]=="False":
                    value = False
                else:
                    #if not true or False
                    value = ":".join(x[1:]).strip()
                temp_dict[key] = value

            #If "MARGIN NO FEES" is in the line, then we have some data to collect about profit and transcation date
            if "MARGIN NO FEES" in line:
                profit = float(line.split('| DIFF |')[-1].split('|')[0].strip())
                profits.append(profit)
                trade_dates .append(log_dates[-1])

        # On some of my logs, the bot was working so there was no line with the final "Margin" information. I skipped those files
        if "Margin" not in temp_dict.keys():
            temp_dict['Margin'] = None
            continue

        temp_dict['trade_profits'] =profits
        temp_dict['trade_dates'] =trade_dates 
        temp_dict['trading_start'] = log_dates[0]
        temp_dict['trading_end'] = log_dates[-1]
        results.append(temp_dict)

    # Store results in DataFrame
    df = pd.DataFrame(results)
    df = df.replace(True,1).replace(False,0)

    # Store total margin value in number
    m1 = df["Margin"].isnull()==False
    df.loc[m1,"Margin"] = df.loc[m1,"Margin"].apply(lambda x: float(x.replace('%',''))/100)
    df["Margin"] = df["Margin"].map(float)
    # Sort by decreasing margin value
    df_1 = df[m1].sort_values('Margin',ascending=False).reset_index(drop=True)

    # Info about the trading period
    df_1['trading_start'] = pd.to_datetime(df_1['trading_start'])
    df_1['trading_end'] = pd.to_datetime(df_1['trading_end'])
    df_1['trading_period'] = df_1['trading_end'] -  df_1['trading_start'] 

    # Avg margin by day --> compare different granularities
    df_1['margin_by_trading_days'] = df_1["Margin"] / (df_1['trading_period'].dt.total_seconds()/3600/24)
    df_1['wins'] = df_1['trade_profits'].apply(lambda x : np.sum(np.array(x) > 0, axis=0))
    df_1['looses'] = df_1['trade_profits'].apply(lambda x : np.sum(np.array(x) < 0, axis=0))
    df_1['Buy Count']  = df_1['Buy Count'].fillna(0).map(int)
    df_1['Sell Count']  = df_1['Sell Count'].fillna(0).map(int)

    return df_1

Usage : df_benchmark = readLogs(log_folder ="../robot_logs/", market="BTCUSD")

whittlem commented 3 years ago

This sounds interesting. I'll look at adding this officially into the code when I get a chance.