hosseinmoein / DataFrame

C++ DataFrame for statistical, Financial, and ML analysis -- in modern C++ using native types and contiguous memory storage
https://hosseinmoein.github.io/DataFrame/
BSD 3-Clause "New" or "Revised" License
2.38k stars 298 forks source link

Group by on string dataframe #293

Closed trinadh0030 closed 3 months ago

trinadh0030 commented 3 months ago

`

include <DataFrame/DataFrame.h> // Main DataFrame header

include <DataFrame/DataFrameFinancialVisitors.h> // Financial algorithms

include <DataFrame/DataFrameMLVisitors.h> // Machine-learning algorithms

include <DataFrame/DataFrameStatsVisitors.h> // Statistical algorithms

include <DataFrame/Utils/DateTime.h>

include

include

using namespace hmdf; using namespace std; using namespace chrono;

using StrDataFrame = StdDataFrame; using MyDataFrame = StdDataFrame; random_device rd; mt19937 generator(rd()); uniform_real_distribution dist(1, 100); uniform_int_distribution side_dist(0, 1);

void generate_data(StrDataFrame *df, int users_count, int symbol_count) { for (int i = 0; i < users_count; ++i) {

    for (int j = 0; j < symbol_count; ++j) {
        string id = "user_" + to_string(i) + "_symbol_" + to_string(j) + "1";

        df->append_row(&(id), make_pair("amount",dist(generator)),
                       make_pair("entry_price",dist(generator)),
                      make_pair("side",side_dist(generator)),
                      make_pair("symbol", "symbol_" + to_string(j)),
                      make_pair("user_id",  to_string(j)),
                      make_pair("margin_type", 1));
        string id2 = "user_" + to_string(i) + "_symbol_" + to_string(j) + "0";

        df->append_row(&id2, make_pair("amount",dist(generator)),
                       make_pair("entry_price",dist(generator)),
                       make_pair("side",side_dist(generator)),
                       make_pair("symbol", "symbol_" + to_string(j)),
                       make_pair("user_id",  to_string(j)),
                       make_pair("margin_type", 0));
    }
}

}

static std::vector calculate_pnl(StrDataFrame::IndexVecType::const_iterator idx_begin, StrDataFrame::IndexVecType::const_iterator idx_end, std::vector::const_iterator b_entry_price, std::vector::const_iterator e_entry_price, std::vector::const_iterator b_side, std::vector::const_iterator e_side, std::vector::const_iterator b_amount, std::vector::const_iterator e_amount ) {

const std::size_t           col_s = std::distance(idx_begin, idx_end);
std::vector<double>    result (col_s);

for (std::size_t i = 0; i < col_s; ++i) {
    result[i] = (*(b_side + i) )== 1 ? (550 - *(b_entry_price + i)) * (*(b_amount + i)) : -1 * ((550 -
                                                                                                      *(b_entry_price +
                                                                                                        i)) *
                                                                                                     (*(b_amount +
                                                                                                        i)));
}
return (result);

}

int main(int, char *[]) {

ThreadGranularity::set_optimum_thread_level();

StrDataFrame PMSDataframe;
PMSDataframe.create_column<double>(static_cast<const char *>("amount"));
PMSDataframe.create_column<double>(static_cast<const char *>("entry_price"));
PMSDataframe.create_column<int>(static_cast<const char *>("side"));
PMSDataframe.create_column<string>(static_cast<const char *>("symbol"));
PMSDataframe.create_column<int>(static_cast<const char *>("margin_type"));
PMSDataframe.create_column<string>(static_cast<const char *>("user_id"));
PMSDataframe.create_column<double>(static_cast<const char *>("pnl"));

generate_data(&PMSDataframe, 100000, 200);

for (auto itr :  PMSDataframe.get_columns_info<int8_t, double>()){
  cout << endl <<"Print Tuple "  << get<0>(itr) << " " << get<1>(itr) << " " << endl;

} auto start = high_resolution_clock::now();

PMSDataframe.consolidate<double, int, double, double>
        ("entry_price", "side", "amount", "pnl", calculate_pnl, false);

auto end = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(end - start);

cout << "Duration : " << duration << endl;

auto func_cross = [](const string &, const int &val1) -> bool {
    return (val1 == 1);
};
auto func_isolated = [](const string &, const int &val1) -> bool {
    return (val1 == 0);
};

StrDataFrame cross_dataFrame = PMSDataframe.get_data_by_sel<int, decltype(func_cross), int, string, double>("margin_type", func_cross);
StrDataFrame isolated_dataFrame = PMSDataframe.get_data_by_sel<int, decltype(func_isolated), int, string, double>("margin_type", func_isolated);

auto    result1 = PMSDataframe.groupby2<string , string>("user_id", "symbol",
                                                               FirstVisitor<StrDataFrame ::IndexType, StrDataFrame::IndexType>(),
                                                               make_tuple("pnl", "sum", SumVisitor<double>()));

result1.write<std::ostream, std::string, double, std::size_t, int>(std::cout, io_format::csv2);

return (0);

} `

while doing groupby2 in above code snippet, i am getting In template: no matching function for call to object of type 'typename tuple_element<2UL, tuple<const char *, const char *, SumVisitor<double, unsigned long>>>::type' (aka 'hmdf::SumVisitor<double>') error

can someone help me where i am doing mistake