[PLEASE HELP]Facing periodic throughput drop when running a asynchronous boost asio server when it is communicating with other servers present in a different linux network namespace at saturation

I have 2 servers communicating with each other and load generator to calculate the throughput. The below code is of my 1st Server[Main Server], which accepts request from the load generator and communicates with a different server and sends a response. This is minimal code to reproduce the issue I am facing for a much bigger software system.

#include <iostream>
#include <string>
#include <boost/asio.hpp>
#include <boost/asio/thread_pool.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/bind.hpp>
#include "http_server.hh"

using namespace boost::asio;
using ip::tcp;

std::string desired_IP_address = "172.16.0.2"; // For example purposes

class Session : public std::enable_shared_from_this<Session> {
public:
    Session(ip::tcp::resolver& resolver,tcp::socket socket, tcp::socket client_socket) : socket_(std::move(socket)), resolver_(resolver), client_socket_(std::move(client_socket)) {}

    void start() { 
        do_read();
    }

private:
    void do_read() {
        auto self(shared_from_this());
        socket_.async_read_some(
            boost::asio::buffer(data_),
            [this, self](boost::system::error_code ec, std::size_t length) {
                if (!ec) {
                    std::string request(data_.data(), length);
                    path_ = extract_path(request);
                    resolver_.async_resolve(
                        ip::tcp::resolver::query("172.16.0.4", "5000"),
                        [this,self](const boost::system::error_code& ec,
                            ip::tcp::resolver::iterator it) {
                            if (ec) {
                                std::cout  << "Error resolving " << "localhost" << ": "
                                        << ec.message()<< std::endl;
                                return;
                            }

                            // For simplicity, we'll assume the first endpoint will always
                            // be available.
                            //std::cout << "localhost" << ": resolved to " << it->endpoint()
                            //        << std::endl;
                            do_connect(it->endpoint());
                        });
                    //handle_request_async(path);
                }
            });
    }

    void do_connect(const ip::tcp::endpoint& dest) {
        // Remember that the Asio library will make copies of parameters passed
        // by const reference, so it's ok to let the endpoint go out of scope
        // when this method returns.
        auto self(shared_from_this());
        boost::asio::ip::tcp::endpoint endpoint(boost::asio::ip::address::from_string("172.16.0.4"), 5000);
        client_socket_.async_connect(a
            endpoint, [this, self](const boost::system::error_code& ec) {
                if (ec) {
                    std::cout << "Error connecting to " << "localhost"<< ": "
                               << ec.message()<< std::endl;
                    return;
                }

                //std::cout << "localhost" << ": connected to "
                //          << client_socket_.remote_endpoint() << std::endl;
                do_send_http_get();
            });
    }

    void do_send_http_get() {
        auto self(shared_from_this());
        // At minimum, the remote server needs to know the path being fetched
        // and the host serving that path. The latter is required because a
        // single server often hosts multiple domains.
        request_ = std::string("GET /") + " HTTP/1.1\r\nHost: " + "example.com" + "\r\n\r\n";
        async_write(
            client_socket_, buffer(request_),
            [this,self](const boost::system::error_code& ec, std::size_t size) {
                if (ec) {
                    std::cout << "Error sending GET " << ec<< std::endl;
                    return;
                }

                //std::cout << "localhost" << ": sent " << size << " bytes"<< std::endl;
                do_recv_http_get_header();
            });
    }

    void do_recv_http_get_header() {
        // Since HTTP/1.1 is a text based protocol, most of it is human readable
        // by design. Notice how the "double end of line" character sequence
        // ("\r\n\r\n") is used to delimit message sections.
        auto self(shared_from_this());
        async_read_until(
            client_socket_, response_, "\r\n\r\n",
            [this, self](const boost::system::error_code& ec, std::size_t size) {
                if (ec) {
                    std::cout << "Error receiving GET header " << ec;
                    return;
                }

                //std::cout << "localhost:5000" << ": received " << size << ", streambuf "
                //          << response_.size();

                // The asio::streambuf class can use multiple buffers
                // internally, so we need to use a special iterator to copy out
                // the header.
                std::string header(
                    buffers_begin(response_.data()),
                    buffers_begin(response_.data()) + size);
                response_.consume(size);

                //std::cout << "----------" << std::endl << "localhost:5000"
                //          << ": header length " << header.size() << std::endl
                //          << header << std::endl;

                // First we'll check for the explicit "Content-Length" length
                // field. This provides the exact body length in bytes.
                size_t pos = header.find("Content-Length: ");
                if (pos != std::string::npos) {
                    size_t len = std::strtoul(
                        header.c_str() + pos + sizeof("Content-Length: ") - 1,
                        nullptr, 10);
                    do_receive_http_get_body(len - response_.size());
                    return;
                }

                // The other alternative is a chunked transfer. There is a quick
                // way to determine the remaining length in this case.
                pos = header.find("Transfer-Encoding: chunked");
                if (pos != std::string::npos) {
                    do_receive_http_get_chunked_body();
                    return;
                }

                std::cout << "Unknown body length";
            });
    }

    void do_receive_http_get_body(size_t len) {
        // For "Content-Length" we know exactly how many bytes are left to
        // receive.
        auto self(shared_from_this());
        async_read(
            client_socket_, response_, transfer_exactly(len),
            [this,self] (const boost::system::error_code& ec, std::size_t size) {
               handle_http_get_body(ec, size);
            });
    }

    void do_receive_http_get_chunked_body() {
        // For chunked transfers the final body chunk will be terminated by
        // another "double end of line" delimiter.
        auto self(shared_from_this());
        async_read_until(
            client_socket_, response_, "\r\n\r\n",
            [this,self] (const boost::system::error_code& ec, std::size_t size) {
                handle_http_get_body(ec, size);
            });
    }

    void handle_http_get_body(const boost::system::error_code& ec,
                              std::size_t size) {
        if (ec) {
            std::cout << "Error receiving GET body " << ec;
            return;
        }

        //std::cout << "localhost:5000" << ": received " << size << ", streambuf "
        //          << response_.size();

        // We can finally consume the body and print it out if desired.
        const auto& data = response_.data();
        std::string response_body(buffers_begin(data), buffers_end(data));
        response_.consume(size);

        //std::cout << "----------" << std::endl << "localhost:5000" << ": body length "
        //          << response_body.size() << std::endl;
        //std::cout << response_body << std::endl;
        handle_request_async();
    }

    void handle_request_async() {
        auto self(shared_from_this());

        async_response([this,self](const std::string& response) {
            // std::cout << "path " <<  path_ << std::endl;
            // std::cout << response << std::endl;
            async_write(socket_, boost::asio::buffer(response),
                [this,self](boost::system::error_code ec, std::size_t /*length*/) {
                    if (!ec) {
                        //std::cout << "response" << std::endl;
                        boost::system::error_code ignored_ec;
                        socket_.shutdown(tcp::socket::shutdown_both, ignored_ec);
                    }
                });
        });
    }

    void async_response(std::function<void(const std::string&)> callback) {
        // Assuming handle_request returns HTTP_Response asynchronously
        HTTP_Response* htmlResponse = handle_request(path_);
        //std::cout << "path: " << path << std::endl;
        std::string response = htmlResponse->body;
        //std::cout << "content: " << response << std::endl;
        callback(response);

        delete htmlResponse;
    }

    // Function to extract path from the HTTP request
    std::string extract_path(const std::string& request) {
        // Logic to extract path from the request string
        // Example logic: extracting the path after the GET method
        std::string path;
        //std::cout << request << std::endl;
        // Implement your path extraction logic here

        // Find the end of the request line (the first line of the HTTP request)
        std::size_t requestLineEnd = request.find("\r\n");
        if (requestLineEnd != std::string::npos) {
            std::string requestLine = request.substr(0, requestLineEnd);

            // Split the request line into parts (method, path, protocol)
            std::vector<std::string> parts;
            boost::split(parts, requestLine, boost::is_any_of(" "));

            // The second part typically contains the path (e.g., "GET /path HTTP/1.1")
            if (parts.size() >= 2) {
                path = parts[1]; // Extract the path from the request line
            }
        }

        return path;
    }

    tcp::socket socket_;
    tcp::socket client_socket_;
    std::array<char, 8192> data_;
    std::string path_;

    std::string request_;
    boost::asio::streambuf response_;
    ip::tcp::resolver& resolver_;

};

class Server {
public:
    Server(boost::asio::io_context& io_context, short port)
        : acceptor_(io_context, tcp::endpoint(boost::asio::ip::make_address(desired_IP_address), port)),
          //acceptor_(io_context, tcp::endpoint(tcp::v4(), port)),
          socket_(io_context), resolver_(io_context), client_socket_(io_context)
    {
        do_accept();
    }

private:
    void do_accept() {
        acceptor_.async_accept(
            socket_,
            [this] (boost::system::error_code ec) {
                if (!ec) {
                    std::make_shared<Session>(resolver_, std::move(socket_), std::move(client_socket_))->start();
                }
                do_accept();
            });
    }

    tcp::acceptor acceptor_;
    tcp::socket socket_;
    tcp::socket client_socket_;
    ip::tcp::resolver resolver_;
};

int main() {
    try {
        boost::asio::io_context io_context;
        Server server(io_context, 8080);
        boost::asio::thread_pool pool(40);
        for (std::size_t i = 0; i < 40; ++i)
            boost::asio::post(pool, [&io_context]() { io_context.run(); });

        pool.join();
    } catch (std::exception& e) {
        std::cerr << "Exception: " << e.what() << std::endl;
    }

    return 0;
}

I have another server [Service Discovery Server] to which the above server communicates with

#include <cpprest/http_listener.h>
#include <cpprest/json.h>

using namespace web;
using namespace web::http;
using namespace web::http::experimental::listener;

const utility::string_t SERVER_URL = U("http://172.16.0.4:5000");

void handle_get(const http_request& request)
{

    utility::string_t path = request.request_uri().path();

    std::string response = "http://172.16.0.3:5005";

    http_response httpResponse(status_codes::OK);
    json::value jsonResponse;
    jsonResponse[U("message")] = json::value::string(U(response));

    utility::string_t responseString = jsonResponse.serialize();
    httpResponse.headers().set_content_type(U("application/json"));
    httpResponse.set_body(responseString);
    request.reply(httpResponse);
    //std::cout << "Connected" << std::endl;
}

int main()
{
    http_listener listener(SERVER_URL);

    listener.support(methods::GET, handle_get);

    try {
        listener.open().then([&listener]() {
            std::cout << "Server started listening on " << SERVER_URL << std::endl;
        }).wait();

        std::cout << "Press Enter to exit." << std::endl;
        std::cin.ignore();

        listener.close().wait();
    }
    catch (const std::exception& e) {
        std::cerr << "Server error: " << e.what() << std::endl;
    }

    return 0;
}

And the load generator is as follows, which calculates the throughput at each second:

/* run using: ./load_gen localhost <server port> <number of concurrent users>
   <think time (in s)> <test duration (in s)> */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include <netdb.h>

#include <pthread.h>
#include <sys/time.h>
#include <signal.h>
#include <time.h>
#include <sys/epoll.h>
#include <sys/timerfd.h>
#include <arpa/inet.h>

#define MAX_EVENTS 1

FILE *log_file;
int token = 0;
pthread_mutex_t tokenMutex = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t fileMutex = PTHREAD_MUTEX_INITIALIZER;

int time_up;
//FILE *log_file;

// user info struct
struct user_info {
  // user id
  int id;

  // socket info
  int portno;
  char *hostname;
  float think_time;

  // user metrics
  int total_count;
  float total_rtt;
};

struct thread_args {
  struct user_info *info;
  int user_count;
};

// error handling function
void error(char *msg) {
  perror(msg);
  //exit(0);
}

// time diff in seconds
float time_diff(struct timeval *t2, struct timeval *t1) {
  return (t2->tv_sec - t1->tv_sec) + (t2->tv_usec - t1->tv_usec) / 1e6;
}

void getTimestamp(char *timestamp, int timestampSize) {
    struct timeval tv;
    struct tm *tm_info;

    gettimeofday(&tv, NULL);
    tm_info = localtime(&tv.tv_sec);

    snprintf(timestamp, timestampSize, "%02d:%02d:%02d.%06ld", 
        tm_info->tm_hour, tm_info->tm_min, tm_info->tm_sec, tv.tv_usec);
}

void *calculate_throughput(void *arg) {
  struct thread_args *args = (struct thread_args *)arg;
  struct user_info *info = args->info;
  int user_count = args->user_count;

  int timer_fd = timerfd_create(CLOCK_MONOTONIC, 0);
  if(timer_fd == -1){
    perror("timerfd_fd");
    exit(1);
  }

  struct itimerspec timer_spec;
  memset(&timer_spec, 0, sizeof(timer_spec));
  timer_spec.it_value.tv_sec = 1;
  timer_spec.it_value.tv_nsec = 0;
  timer_spec.it_interval.tv_sec = 1;
  timer_spec.it_interval.tv_nsec = 0;

  if(timerfd_settime(timer_fd,0, &timer_spec, NULL) == -1) {
    perror("timerfd_settime");
    exit(1);
  }
  struct timeval start, end;

  int epoll_fd = epoll_create1(0);
  if(epoll_fd == -1) {
    perror("epoll_create1");
    exit(1);
  }

  struct epoll_event event;
  event.events = EPOLLIN;
  event.data.fd = timer_fd;
  if(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, timer_fd, &event) == -1) {
    perror("epoll_ctl");
    exit(1);
  }

  gettimeofday(&start, NULL);
  int last_count = 0;
  while (!time_up) {
      struct epoll_event events[MAX_EVENTS];
      int num_events = epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
      if (num_events == -1) {
          perror("epoll_wait");
          exit(1);
      }
      for (int i = 0; i < num_events; i++) {
          if (events[i].data.fd == timer_fd) {
            uint64_t c = 0; //number of times the timer expired
            int r = read(timer_fd, &c, sizeof(c));
            int count=0;
            for (int i = 0; i < user_count; i++) {
              // Access info[i] to work with user-specific data
              count=count+info[i].total_count;
            }
            gettimeofday(&end, NULL);
            float wait_time_s = time_diff(&end, &start);
            float throughput = ((float)(count - last_count)) / wait_time_s;
            printf("%f\n", throughput);
            // printf("Throughput (requests/second): %f\n", throughput);
            // printf("Count: %d\n", count-last_count);
            // printf("Total Count: %d\n", last_count);
            last_count = count;
            gettimeofday(&start, NULL);
          }
      }
  }
  close(epoll_fd);
  close(timer_fd);
}

// user thread function
void *user_function(void *arg) {
  /* get user info */
  struct user_info *info = (struct user_info *)arg;

  int sockfd, n,num;
  char buffer[1024];
  char timestampStart[20];
  char timestampEnd[20]; // Adjust the size as needed
  struct timeval start, end;

  struct sockaddr_in serv_addr;
  struct hostent *server;
  info->total_count=0;
  info->total_rtt=0;

  char* requests[] = {"/index.html ","/apart1/index.html ","/apart2/index.html ","/apart1/flat11/index.html ", "/apart1/flat12/index.html ",
                      "/apart2/flat21/index.html ","/apart3/flat31/index.html ","/apart3/flat32/index.html "};

  server=gethostbyname(info->hostname);
  if(server== NULL){
    error("No such host");
  }
  bzero((char *)&serv_addr, sizeof(serv_addr));
  serv_addr.sin_family = AF_INET;
  //bzero((char *)&serv_addr.sin_addr.s_addr, sizeof(serv_addr.sin_addr.s_addr));
  serv_addr.sin_port = htons(info->portno);
  if (inet_pton(AF_INET, info->hostname, &serv_addr.sin_addr) <= 0) {
      perror("Error converting IP address");
      close(sockfd);
      exit(1);
  }

  while (1) {
    /* start timer */
    gettimeofday(&start, NULL);
    getTimestamp(timestampStart, sizeof(timestampStart));
    //printf("user id is %d %s %f\n",info->id,info->hostname,info->think_time);

    /* TODO: create socket */
    sockfd = socket(AF_INET,SOCK_STREAM,0);
    if(sockfd < 0){
      error("Error opening socket");
      continue;
    }

    /* TODO: send message to server */
    if (connect(sockfd, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0){
        error("ERROR connecting");
        close(sockfd);
        continue;
    }

    char tokenString[20];
    pthread_mutex_lock(&tokenMutex);
    token++;
    sprintf(tokenString, "%d",token);
    pthread_mutex_unlock(&tokenMutex);

    bzero(buffer, 1024);
    //strcpy(buffer,"GET /apart2/ HTTP/1.0\n");
    strcpy(buffer,"GET");
    strcat(buffer, " ");
    // strcat(buffer, " /");
    // strcat(buffer, tokenString);
    num = (rand() % (8));
    strcat(buffer,requests[num]);
    strcat(buffer,"HTTP/1.0\r\n");
    strcat(buffer, "\r\n");

    n = write(sockfd,buffer,strlen(buffer));
    if (n < 0){
        error("ERROR writing to socket");
        close(sockfd);
        continue;
    }

    //printf("%s\n", buffer);

    /* TODO: read reply from server */
    bzero(buffer, 1024);

    char fileBuffer[4096];
    strcat(fileBuffer, "======================================\n");
    strcat(fileBuffer, "token: ");
    strcat(fileBuffer, tokenString);
    strcat(fileBuffer, "\n");
    //printf("%s\n", tokenString);
    while((n = read(sockfd, buffer, sizeof(buffer))) > 0) {
        strncat(fileBuffer, buffer, n);
        // Check if the headerBuffer is getting too large, and break the loop if necessary
        if (strlen(fileBuffer) >= 4096 - 1) {
            printf("fileBuffer is full, breaking the loop\n");
            break;
        }
    }
    //printf("%s", buffer);
    getTimestamp(timestampEnd, sizeof(timestampEnd));
    strcat(fileBuffer, "\n");
    strcat(fileBuffer, "request_begin_time: ");
    strcat(fileBuffer, timestampStart);
    strcat(fileBuffer, "\n");
    strcat(fileBuffer, "request_end_time: ");
    strcat(fileBuffer, timestampEnd);
    strcat(fileBuffer, "\n");
    strcat(fileBuffer, "======================================\n");

    pthread_mutex_lock(&fileMutex);
    fprintf(log_file, "%s \n", fileBuffer);
    fflush(log_file);
    pthread_mutex_unlock(&fileMutex);
    bzero(fileBuffer, 4096);
    //printf("buffer:%s \t %s \n", buffer, tokenString);

    if (n< 0){
      error("ERROR reading from socket");
      printf("Error at read\n");
    }
    else{
      info->total_count++;
    }
    //fprintf(log_file, "User #%d finished\n", info->id);
    //printf("%s\n", buffer);

    /* TODO: close socket */
    close(sockfd);

    /* end timer */
    gettimeofday(&end, NULL);

    /* if time up, break */
    if (time_up)
      break;

    /* TODO: update user metrics */
    info->total_rtt=info->total_rtt+time_diff(&end,&start);

    /* TODO: sleep for think time */
    usleep(info->think_time*(1000000));
  }

  /* exit thread */
  info->total_rtt=info->total_rtt/info->total_count;
  //fprintf(log_file, "User #%d finished\n", info->id);
  //fflush(log_file);
  pthread_exit(NULL);
}

int main(int argc, char *argv[]) {
  int user_count, portno, test_duration;
  float think_time;
  char *hostname;
  int avg_count=0;
  float avg_rtt=0,throughput;

  if (argc != 6) {
    fprintf(stderr,
            "Usage: %s <hostname> <server port> <number of concurrent users> "
            "<think time (in s)> <test duration (in s)>\n",
            argv[0]);
    exit(0);
  }

  hostname = argv[1];
  portno = atoi(argv[2]);
  user_count = atoi(argv[3]);
  think_time = atof(argv[4]);
  test_duration = atoi(argv[5]);

  printf("Hostname: %s\n", hostname);
  printf("Port: %d\n", portno);
  printf("User Count: %d\n", user_count);
  printf("Think Time: %f s\n", think_time);
  printf("Test Duration: %d s\n", test_duration);

  /* open log file */
  log_file = fopen("log.txt", "w");
  if (log_file == NULL) {
      perror("Error opening log file");
      exit(1);
  }

  pthread_t threads[user_count];
  struct user_info info[user_count];
  struct timeval start, end;

  struct thread_args thread_data;
  thread_data.info = info;
  thread_data.user_count = user_count;
  pthread_t throughput_thread;
  pthread_create(&throughput_thread, NULL, calculate_throughput, (void *)&thread_data);

  /* start timer */
  gettimeofday(&start, NULL);
  time_up = 0;
  for (int i = 0; i < user_count; ++i) {
    /* TODO: initialize user info */
    info[i].id=i;
    info[i].hostname = hostname;
    info[i].portno=portno;
    info[i].think_time=think_time;

    /* TODO: create user thread */
    pthread_create(&threads[i],NULL,user_function,&info[i]);

    //fprintf(log_file, "Created thread %d\n", i);
  }

  /* TODO: wait for test duration */
  sleep(test_duration);

  //fprintf(log_file, "Woke up\n");

  /* end timer */
  time_up = 1;
  gettimeofday(&end, NULL);

  /* TODO: wait for all threads to finish */
  for (int i = 0; i < user_count; ++i) {
    pthread_join(threads[i],NULL);
  }

  /* TODO: print results */
  for (int i = 0; i < user_count; ++i) {
    //printf("user id %d  req. count: %d rtt :%f \n",i,info[i].total_count, info[i].total_rtt);
    avg_rtt=avg_rtt+info[i].total_rtt;
    avg_count=avg_count+info[i].total_count;
  }
  printf("avg count is %d rtt sum is %f \n",avg_count,avg_rtt);
  float wait_time_s=time_diff(&end,&start);
  avg_rtt=avg_rtt/user_count;
  throughput=avg_count/wait_time_s;//to do correct formula
  printf("average throughput : %f \n",throughput);
  printf("avg rtt : %f \n",avg_rtt);

  printf("Ending program\n");

  /* close log file */
  fclose(log_file);
  return 0;
}

So when I am running all these on localhost, I am getting a consistent throughput, without much variation. But I am running them in different Linux network namespaces, then I am facing a throughput drop on the main server. The issue seems to be in the "async_connect" function, since sometimes I am getting error for it. But neither the error or throughput drop is present when running these servers on localhost. NOTE: I am pinning the main server to a single core and saturating it to 100% to capture the throughput at saturation. The throughput plot is as follows:

Moreover, earlier I was completely using the cpprestsdk library, observed this throughput drop during inter-namespace communication , so implemented the same in boost asio, but found the same issue.

Am I doing something wrong above due to which I am seeing this periodic drop in throughput ?

chriskohlhoff / asio

[PLEASE HELP]Facing periodic throughput drop when running a asynchronous boost asio server when it is communicating with other servers present in a different linux network namespace at saturation #1388