ClickHouse / clickhouse-cpp

C++ client library for ClickHouse
Apache License 2.0
306 stars 159 forks source link

"Connection Reset by Peer" Error When Inserting Data into Narrow Table #380

Open murphyovo opened 4 months ago

murphyovo commented 4 months ago

I encountered a problem while attempting to insert data into a narrow table using the ClickHouse-CPP client. When the number of rows is large (e.g., 1,000,000 rows), I receive a "Connection reset by peer" error. Below is a simple demo illustrating the problem, where the error manifests with 1,000,000 rows but not with 500,000 rows:

#include<ctime>
#include <iostream>
#include <ctime>
#include <clickhouse/client.h>
#include <chrono>
#include <sstream>
#include <random>

int main() {
    const std::string _host = "";
    const std::string _user = "";
    const std::string _passwd = "";
    const std::string _database = "";
    const std::string _table = "";
    const std::string table_name = _database + "." + _table;

    const int push_step=1e6; // Number of rows per transfer
    const int ColumnNumber=99;

    Client client(ClientOptions().SetHost(_host).SetUser(_user).SetPassword(_passwd).SetCompressionMethod(CompressionMethod::LZ4));

    std::stringstream ss;
    ss << "CREATE TABLE IF NOT EXISTS " << table_name 
        << " (date Date32,fname FixedString(18), symbol FixedString(8), time DateTime64, value Int32"
        << ") ENGINE = ReplacingMergeTree() order by (date, fname, symbol, time) " ;
    client.Execute(ss.str());

    /******************** Prepare Data ********************/

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<int> dis(1, 10000);

    std::shared_ptr<ColumnDate32> date = std::make_shared<ColumnDate32>();
    std::shared_ptr<ColumnFixedString> symbol= std::make_shared<ColumnFixedString>(8);
    std::shared_ptr<ColumnFixedString> fname = std::make_shared<ColumnFixedString>(18);
    std::shared_ptr<ColumnDateTime64> time = std::make_shared<ColumnDateTime64>(3);
    std::shared_ptr<ColumnInt32> values = std::make_shared<ColumnInt32>();

    std::shared_ptr<ColumnDate32> date_stack = std::make_shared<ColumnDate32>();
    std::shared_ptr<ColumnFixedString> symbol_stack = std::make_shared<ColumnFixedString>(8);
    std::shared_ptr<ColumnFixedString> fname_stack = std::make_shared<ColumnFixedString>(18);
    std::shared_ptr<ColumnDateTime64> time_stack = std::make_shared<ColumnDateTime64>(3);
    std::shared_ptr<ColumnInt32> values_stack = std::make_shared<ColumnInt32>();

    date->Reserve(push_step);
    fname->Reserve(push_step);
    symbol->Reserve(push_step);
    time->Reserve(push_step);
    values->Reserve(push_step);

    for(auto i = 0; i < push_step; ++i) {
        time_t cur_time;
        std::time(&cur_time);
        date->Append(cur_time);
        symbol->Append("sh" + std::to_string(i).substr(0,6));
        time->Append(dis(gen));
    }
    for(auto i = 0; i < ColumnNumber; ++i) {
        date_stack->Append(date);
        symbol_stack->Append(symbol);
        time_stack->Append(time);
        for(auto j = 0; j < push_step; ++j) {
            fname->Append("f" + std::to_string(j));
            values->Append(dis(gen));
        }
        fname_stack->Append(fname);
        values_stack->Append(values);
        fname->Clear();
        values->Clear();
    }

    /****************** Transfer Data **********************/

    std::cout << "driver release start" << "\n";

    Block block;
    block.AppendColumn("date", date_stack);
    block.AppendColumn("symbol", symbol_stack);
    block.AppendColumn("fname", fname_stack);
    block.AppendColumn("time", time_stack);
    block.AppendColumn("value", values_stack);
    client.Insert(table_name, block);

    std::cout << "driver release done" << "\n";

    return 0;
}

Upon execution, the program outputs "driver release start" and then terminates with the following error message:

terminate called after throwing an instance of 'std::system_error'
  what():  fail to send 16 bytes of data: Connection reset by peer
Aborted

I suspect this issue may be related to network transmission or server-side configuration when inserting a large amount of data. Could you advise on how to adjust ClickHouse or the client code to resolve this "Connection reset by peer" error in such a scenario? Thank you for your assistance.