OpenMPDK / KVSSD

KV SSD host software including APIs and drivers
Other
230 stars 55 forks source link

Memory leak if user doesn't handle async queue #20

Closed cdcsl closed 5 years ago

cdcsl commented 5 years ago

Location (Korea, USA, China, India, etc.) Korea

Describe the bug When the user doesn't check if the queue is full in their program, memory leaks indefinitely as IO commands created in kv_store (and other commands) are not freed when the queue is full, and new commands are created on the retry.

To Reproduce Steps to reproduce the behavior:

  1. Confirm free system memory with free
  2. Run the following modified version of the insertion routine in test_async.cpp
 sudo ./sample_code_async -d /dev/nvme0n1 -n 100000000 -k 16 -v 4096 -o 1
int perform_insertion(kvs_container_handle cont_hd, int count, 
int maxdepth, kvs_key_t klen, uint32_t vlen) {
  int num_submitted = 0;
  long int seq = 0;
  fprintf(stdout, "\n===========\n");
  fprintf(stdout, "   Do Write Operation\n");
  fprintf(stdout, "===========\n");

  struct timespec t1, t2;
  clock_gettime(CLOCK_REALTIME, &t1);

  kvs_key *kvskey = (kvs_key*)malloc(sizeof(kvs_key));
  kvs_value *kvsvalue = (kvs_value*)malloc(sizeof(kvs_value));

  char *key   = (char*)kvs_malloc(klen, 4096);
  char *value = (char*)kvs_malloc(vlen, 4096);

  while (num_submitted < count) {
      if (num_submitted >= count) break;

      if(!key || !value) {
        fprintf(stderr, "no mem is allocated\n");
        exit(1);
      }

      sprintf(key, "%0*ld", klen - 1, seq++);
      sprintf(value, "value%ld", seq);
      kvs_store_option option;
      memset(&option, 0, sizeof(kvs_store_option));
      option.st_type = KVS_STORE_POST;
      option.kvs_store_compress = false;

      const kvs_store_context put_ctx = {option, NULL, NULL };
      kvskey->key = key;
      kvskey->length = klen;
      kvsvalue->value = value;
      kvsvalue->length = vlen;
      kvsvalue->actual_value_size = kvsvalue->offset = 0;
      int ret = kvs_store_tuple_async(cont_hd, kvskey, kvsvalue, &put_ctx, NULL);

      if (ret != KVS_SUCCESS) {
        fprintf(stderr, "store tuple failed with err %s\n", kvs_errstr(ret)); 
        //ret = kvs_store_tuple_async(cont_hd, kvskey, kvsvalue, 
        //&put_ctx, complete);
        exit(1);
      }

      num_submitted++;
  }

  clock_gettime(CLOCK_REALTIME, &t2);
  unsigned long long start, end;
  start = t1.tv_sec * 1000000000L + t1.tv_nsec;
  end = t2.tv_sec * 1000000000L + t2.tv_nsec;
  double sec = (double)(end - start) / 1000000000L;
  fprintf(stdout, "Total time %.2f sec; Throughput %.2f ops/sec\n", sec, 
  (double)count /sec );

  return 0;
}
  1. Check free memory with free again as often as desired

Expected behavior Reasonable memory usage

System environment (please complete the following information)

Workload

Additional context

The problem can be fixed by modifying kernel_driver_adapter/src/kv_device.cpp: kv_store, from

    io_cmd *cmd = new io_cmd(dev, ns, que_hdl);

    cmd->ioctx.key = key;
    cmd->ioctx.value = const_cast<kv_value *>(value);
    cmd->ioctx.timeout_usec = 0;
    if (post_fn) {
        cmd->ioctx.post_fn = post_fn->post_fn;
        cmd->ioctx.private_data = post_fn->private_data;
    } else {
        cmd->ioctx.post_fn = NULL;
    }
    cmd->ioctx.opcode = KV_OPC_STORE;
    cmd->ioctx.command.store_info = info;

    return dev->submit_io(que_hdl, cmd);

to

    io_cmd *cmd = new io_cmd(dev, ns, que_hdl);

    cmd->ioctx.key = key;
    cmd->ioctx.value = const_cast<kv_value *>(value);
    cmd->ioctx.timeout_usec = 0;
    if (post_fn) {
        cmd->ioctx.post_fn = post_fn->post_fn;
        cmd->ioctx.private_data = post_fn->private_data;
    } else {
        cmd->ioctx.post_fn = NULL;
    }
    cmd->ioctx.opcode = KV_OPC_STORE;
    cmd->ioctx.command.store_info = info;

    ret = dev->submit_io(que_hdl, cmd);

    if(ret) free(cmd);

    return ret;

Alternatively, it can also be fixed by changing kernel_driver_adapter/src/kv_device.cpp: kv_store: submit_io, from

    if (kque->full()) {
        return KV_ERR_QUEUE_IS_FULL;
    } else {
        // send to real device directly for execution
        // actually only in a queue inside device driver for asyncIO
        // Please note this is not actual execution, it's just sent to device
        // for asynchronous execution.
        // Ideally device should export a queue interface for sending commands
        kv_result res = cmd->execute_cmd();
        if (res == KV_SUCCESS) {
            kque->increase_qdepth();
        }

        return res;
    }

to

    if (kque->full()) {
        free(cmd);
        return KV_ERR_QUEUE_IS_FULL;
    } else {
        // send to real device directly for execution
        // actually only in a queue inside device driver for asyncIO
        // Please note this is not actual execution, it's just sent to device
        // for asynchronous execution.
        // Ideally device should export a queue interface for sending commands
        kv_result res = cmd->execute_cmd();
        if (res == KV_SUCCESS) {
            kque->increase_qdepth();
        }

        return res;
    }

The same pattern is also repeated in many places throughout the code.

kvssd-support commented 5 years ago

We are looking into this - will update.

kvssd-support commented 5 years ago

Next release will fix this issue

cdcsl commented 5 years ago

Thanks.