axboe / liburing

Library providing helpers for the Linux kernel io_uring support
MIT License
2.7k stars 393 forks source link

Problem with direct descriptor in batch with `openat` and `write` SQEs #1169

Closed michael-g closed 3 weeks ago

michael-g commented 3 weeks ago

I have been trying to get to grips with liburing, in this particular case for opening/creating a file and writing its header in the same batch by using io_uring_register_files_sparse, io_uring_prep_openat_direct and io_uring_prep_write with IOSQE_IO_LINK and IOSQE_FIXED_FILE.

However, while the open/create-operation works and the file is created as expected, the linked write always fails (for me, I'm sure I'm doing something wrong). The error is EBADF "Bad file descriptor".

I'm using liburing versions 2.5 and 2.6, compiled from source. I'm running on Fedora 40 on kernel 6.8.11-300.fc40.x86_64.

My compilation line (wrestled from CMake's grasp) is

gcc  -I$HOME/alt/include -std=gnu18 -Wall -Wextra -Wpedantic -O0 -g -Wno-unused-parameter -Wno-unused-variable -o liburing_test_case ./liburing_test.c -Wl,-rpath,$HOME/alt/lib $HOME/alt/lib/liburing.so.2.6

... where $HOME/alt is the target for Liburing's ./configure --prefix $HOME/alt.

#include <stdlib.h>
#include <stdint.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <liburing.h>

typedef struct ring_ops ring_ops_s;

struct ring_ops
{
  struct io_uring m_ring;
  uint32_t m_num_dir_files;
  uint32_t m_pending_sqes;
};

static int mg_setup(ring_ops_s *ring, uint32_t sq_entries, uint32_t cq_entries, uint32_t n_files)
{
  printf("CALL: mg_setup(sq_entries=%u, cq_entries=%u, n_files=%u)\n", sq_entries, cq_entries, n_files);
  struct io_uring_params params = {0};

  params.cq_entries = cq_entries;

  int err = io_uring_queue_init_params(sq_entries, &ring->m_ring, &params);
  if (err < 0) {
    fprintf(stderr, "ERROR: failed in io_uring_queue_init_params: %s\n", strerror(errno));
    return -1;
  }
  err = io_uring_register_files_sparse(&ring->m_ring, n_files);
  if (err < 0) {
    fprintf(stderr, "Failed in io_uring_register_files: %s\n", strerror(abs(err)));
    return -1;
  }
  return 0;
}

static int mg_submit(ring_ops_s *ring)
{
  int err = io_uring_submit(&ring->m_ring);
  if (err >= 0) {
    ring->m_pending_sqes -= err;
    printf("Submitted %u SQEs, pending are %u\n", err, ring->m_pending_sqes);
  }
  else {
    fprintf(stderr, "ERROR: failed in io_uring_submit: %s\n", strerror(abs(err)));
  }
  return err;
}

static int mg_await_cqe(ring_ops_s *ring, const uint32_t count)
{
  struct io_uring_cqe *cqe = NULL;
  for(uint32_t i = 0; i < count; i ++) {
    int err = io_uring_wait_cqe(&ring->m_ring, &cqe);
    if (err < 0) {
      fprintf(stderr, "ERROR: while waiting for completion[%u]: %s\n", i, strerror(-err));
      io_uring_cqe_seen(&ring->m_ring, cqe);
      return err;
    }

    if (cqe->res < 0) {
      fprintf(stderr, "ERROR: reported for CQE[%u]: %s (%i), user_data is %zd\n", i, strerror(abs(cqe->res)), cqe->res, (uint64_t)cqe->user_data);
      io_uring_cqe_seen(&ring->m_ring, cqe);
      return cqe->res;
    }
    printf("Success reported for CQE[%u]: %i, user_data is %zd\n", i, cqe->res, (uint64_t)cqe->user_data);
    io_uring_cqe_seen(&ring->m_ring, cqe);
  }
  return 0;
}

struct io_uring_sqe* get_sqe(ring_ops_s *ring)
{
  struct io_uring_sqe *sqe = io_uring_get_sqe(&ring->m_ring);
  if (NULL == sqe) {
    fprintf(stderr, "ERROR: failed in io_uring_get_sqe\n");
  }
  return sqe;
}

static int mg_open_journal_and_write_header(ring_ops_s *ring, const char *path, int32_t flags, int32_t mode, uint32_t file_idx, const char *buf, size_t buf_len)
{

  struct io_uring_sqe *sqe = get_sqe(ring);
  if (NULL == sqe) {
    return -1;
  }
  printf("CALL: io_uring_prep_openat_direct(sqe=%p, dfd=%i, path=\"%s\", flags=0x%04x, mode=0x%04x, file_idx=%u)\n", (void*)sqe, -1, path, flags, mode, file_idx);

  io_uring_prep_openat_direct(sqe, -1, path, flags, mode, file_idx);
  io_uring_sqe_set_data64(sqe, 1);
  sqe->fd = file_idx;
  sqe->flags |= IOSQE_IO_LINK;

  ring->m_pending_sqes++;

  sqe = get_sqe(ring);
  if (NULL == sqe) {
    return -1;
  }

  printf("CALL: io_uring_prep_write(sqe=%p, file_idx=%u, buf=%p, len=%zd, offset=%i)\n", (void*)sqe, file_idx, (void*)buf, buf_len, 0);
  io_uring_prep_write(sqe, file_idx, buf, sizeof(buf), 0);
  io_uring_sqe_set_data64(sqe, 2);
  sqe->flags |= IOSQE_FIXED_FILE;
  // sqe->flags |= IOSQE_IO_DRAIN;

  ring->m_pending_sqes++;

  return 0;
}

int main(int argc, char *argv[])
{
  printf("Using liburing version %i.%i\n", io_uring_major_version(), io_uring_minor_version());

  ring_ops_s ring = {0};
  if (-1 == mg_setup(&ring, 256, 2048, 1)) {
    return EXIT_FAILURE;
  }

  uint32_t slot_idx = 0;

  char buf[8] = {1, 2, 3, 4, 5, 6, 7, 8};

  int err = mg_open_journal_and_write_header(&ring, "/tmp/test.journal", O_CREAT|O_TRUNC, 0644, slot_idx, buf, sizeof(buf));
  if (-1 == err) {
    fprintf(stderr, "Failed in mg_open_journal_and_write_header, exiting\n");
    return EXIT_FAILURE;
  }

  err = mg_submit(&ring);
  if (err < 0) {
    fprintf(stderr, "ERROR: failed in mg_submit, exiting\n");
    return EXIT_FAILURE;
  }

  err = mg_await_cqe(&ring, 2);
  if (err < 0) {
    fprintf(stderr, "Failed in mg_await_cqe, exiting\n");
    return EXIT_FAILURE;
  }

  return EXIT_SUCCESS;
}
michael-g commented 3 weeks ago

All too predictably I've found my bug shortly after posting: the flags argument to open should have been O_CREAT|O_TRUNC|O_RDWR; setting 0644 in the mode affects the file-permissions, not the file-descriptor.

Closing.

Regards

Mike