Update the Relooper algorithm

RReverser commented 3 years ago

C libraries often use goto exit / goto error as a way to jump within a function to a place that can cleanup resources before exiting when some condition is unmet.

Let me take this example (one of the simpler ones in the wild): https://github.com/kbarbary/sep/blob/d22bef88ded3c5b25f06584aa8a2bc931cad1826/ctest/test_image.c#L142-L212

/* an extremely dumb reader for our specific test FITS file! */
int read_test_image(char *fname, float **data, int *nx, int *ny)
{
  FILE *f;
  char buf[80];  /* buffer to hold a line */
  long pos;
  size_t nbytes, nel, i, elsize, nread;
  char *rawbytes = NULL;
  short *rawdata;
  char tmp;
  int status = 0;
  float bscale, bzero;

  /* hard-code image size & element size */
  *nx = 256;
  *ny = 256;
  elsize = 2;
  nel = *nx * *ny;
  bscale = 2.92273835509;
  bzero = 3713.66692596;

  f = fopen(fname, "r");

  /* read first line and check if it is a FITS file */
  nread = fread(buf, 1, 80, f);
  if (nread != 80) { status = 1; goto exit; }
  if (strncmp(buf, "SIMPLE  =                    T", 30) != 0)
    {
      printf("not a FITS file");
      status = 1;
      goto exit;
    }

  /* read rows until we get to END keyword */
  while (strncmp(buf, "END", 3) != 0)
    {
      nread = fread(buf, 1, 80, f);
      if (nread != 80) { status = 1; goto exit; }
    }

  /* move to next 2880 byte boundary. */
  pos = ftell(f) % 2880;  /* position in "page" */
  if (pos != 0) fseek(f, 2880 - pos, SEEK_CUR);

  /* read raw data bytes */
  nbytes = nel * elsize;
  rawbytes = (char *)malloc(nbytes);
  nread = fread(rawbytes, 1, nbytes, f);
  if (nread != nbytes) { status = 1; goto exit; }

  /* swap bytes in raw data (FITS is big-endian) */
  for (i=0; i<nbytes; i+=2)
    {
      tmp = rawbytes[i];
      rawbytes[i] = rawbytes[i+1];
      rawbytes[i+1] = tmp;
    }
  rawdata = (short *)rawbytes; /* buffer is now little-endian */

  /* convert to float, applying bscale/bzero */
  *data = (float *)malloc(nel * sizeof(float));
  for (i=0; i<nel; i++)
    {
      (*data)[i] = bscale * rawdata[i] + bzero;
    }

 exit:
  free(rawbytes);

  return status;
}

After transpiling + reorganize definitions, the Rust code generated by c2rust looks like this:

/* an extremely dumb reader for our specific test FITS file! */
#[no_mangle]

pub unsafe extern "C" fn read_test_image(
    mut fname: *mut libc::c_char,
    mut data: *mut *mut libc::c_float,
    mut nx: *mut libc::c_int,
    mut ny: *mut libc::c_int,
) -> libc::c_int {
    let mut current_block: u64; /* buffer to hold a line */
    let mut f = 0 as *mut crate::stdlib::FILE;
    let mut buf: [libc::c_char; 80] = [0; 80];
    let mut pos: libc::c_long = 0;
    let mut nbytes: crate::stddef_h::size_t = 0;
    let mut nel: crate::stddef_h::size_t = 0;
    let mut i: crate::stddef_h::size_t = 0;
    let mut elsize: crate::stddef_h::size_t = 0;
    let mut nread: crate::stddef_h::size_t = 0;
    let mut rawbytes = crate::stddef_h::NULL as *mut libc::c_char;
    let mut rawdata = 0 as *mut libc::c_short;
    let mut tmp: libc::c_char = 0;
    let mut status = 0 as libc::c_int;
    let mut bscale: libc::c_float = 0.;
    let mut bzero: libc::c_float = 0.;
    /* hard-code image size & element size */
    *nx = 256 as libc::c_int;
    *ny = 256 as libc::c_int;
    elsize = 2 as libc::c_int as crate::stddef_h::size_t;
    nel = (*nx * *ny) as crate::stddef_h::size_t;
    bscale = 2.92273835509f64 as libc::c_float;
    bzero = 3713.66692596f64 as libc::c_float;
    f = crate::stdlib::fopen(fname, b"r\x00" as *const u8 as *const libc::c_char);
    /* read first line and check if it is a FITS file */
    nread = crate::stdlib::fread(
        buf.as_mut_ptr() as *mut libc::c_void,
        1 as libc::c_int as libc::c_ulong,
        80 as libc::c_int as libc::c_ulong,
        f,
    );
    if nread != 80 as libc::c_int as libc::c_ulong {
        status = 1 as libc::c_int
    } else if crate::stdlib::strncmp(
        buf.as_mut_ptr(),
        b"SIMPLE  =                    T\x00" as *const u8 as *const libc::c_char,
        30 as libc::c_int as libc::c_ulong,
    ) != 0 as libc::c_int
    {
        ::libc::printf(b"not a FITS file\x00" as *const u8 as *const libc::c_char);
        status = 1 as libc::c_int
    } else {
        loop
        /* read rows until we get to END keyword */
        {
            if !(crate::stdlib::strncmp(
                buf.as_mut_ptr(),
                b"END\x00" as *const u8 as *const libc::c_char,
                3 as libc::c_int as libc::c_ulong,
            ) != 0 as libc::c_int)
            {
                current_block = 1109700713171191020;
                break;
            }
            nread = crate::stdlib::fread(
                buf.as_mut_ptr() as *mut libc::c_void,
                1 as libc::c_int as libc::c_ulong,
                80 as libc::c_int as libc::c_ulong,
                f,
            );
            if !(nread != 80 as libc::c_int as libc::c_ulong) {
                continue;
            }
            status = 1 as libc::c_int;
            current_block = 686024592065818529;
            break;
        }
        match current_block {
            686024592065818529 => {}
            _ => {
                /* move to next 2880 byte boundary. */
                pos = crate::stdlib::ftell(f) % 2880 as libc::c_int as libc::c_long; /* position in "page" */
                if pos != 0 as libc::c_int as libc::c_long {
                    crate::stdlib::fseek(
                        f,
                        2880 as libc::c_int as libc::c_long - pos,
                        ::libc::SEEK_CUR,
                    );
                }
                /* read raw data bytes */
                nbytes = nel.wrapping_mul(elsize);
                rawbytes = crate::stdlib::malloc(nbytes) as *mut libc::c_char;
                nread = crate::stdlib::fread(
                    rawbytes as *mut libc::c_void,
                    1 as libc::c_int as libc::c_ulong,
                    nbytes,
                    f,
                );
                if nread != nbytes {
                    status = 1 as libc::c_int
                } else {
                    /* swap bytes in raw data (FITS is big-endian) */
                    i = 0 as libc::c_int as crate::stddef_h::size_t; /* buffer is now little-endian */
                    while i < nbytes {
                        tmp = *rawbytes.offset(i as isize);
                        *rawbytes.offset(i as isize) = *rawbytes
                            .offset(i.wrapping_add(1 as libc::c_int as libc::c_ulong) as isize);
                        *rawbytes
                            .offset(i.wrapping_add(1 as libc::c_int as libc::c_ulong) as isize) =
                            tmp;
                        i = (i as libc::c_ulong).wrapping_add(2 as libc::c_int as libc::c_ulong)
                            as crate::stddef_h::size_t
                            as crate::stddef_h::size_t
                    }
                    rawdata = rawbytes as *mut libc::c_short;
                    /* convert to float, applying bscale/bzero */
                    *data = crate::stdlib::malloc(
                        nel.wrapping_mul(::std::mem::size_of::<libc::c_float>() as libc::c_ulong),
                    ) as *mut libc::c_float; /* corresponding x on small im */
                    i = 0 as libc::c_int as crate::stddef_h::size_t; /* corresponding y on small im */
                    while i < nel {
                        *(*data).offset(i as isize) = bscale
                            * *rawdata.offset(i as isize) as libc::c_int as libc::c_float
                            + bzero;
                        i = i.wrapping_add(1)
                    }
                }
            }
        }
    }
    ::libc::free(rawbytes as *mut libc::c_void);
    return status;
}

As you can see, it tried to interleave loops and conditions with the goto, making the dataflow less recognisable as well as inserting current_block IDs.

Instead, a much more natural output could use #![feature(label_break_value)] and output function like this:

/* an extremely dumb reader for our specific test FITS file! */
#[no_mangle]

pub unsafe extern "C" fn read_test_image(
    mut fname: *mut libc::c_char,
    mut data: *mut *mut libc::c_float,
    mut nx: *mut libc::c_int,
    mut ny: *mut libc::c_int,
) -> libc::c_int {
  'exit: {
     // ...main code...
     break 'exit; // <- transform `goto exit` anywhere in the function into this
     // ...more main code...
  }
  ::libc::free(rawbytes as *mut libc::c_void);
  return status;
}

That particular feature is unstable, but C2Rust already outputs nightly-specific code. However, a stable variant is also possible with a minor change:

/* an extremely dumb reader for our specific test FITS file! */
#[no_mangle]

pub unsafe extern "C" fn read_test_image(
    mut fname: *mut libc::c_char,
    mut data: *mut *mut libc::c_float,
    mut nx: *mut libc::c_int,
    mut ny: *mut libc::c_int,
) -> libc::c_int {
  'exit: loop {
     // ...main code...
     break 'exit; // <- transform `goto exit` anywhere in the function into this
     // ...more main code...
     break; // just break out of the superfluous loop so that it runs only once
  }
  ::libc::free(rawbytes as *mut libc::c_void);
  return status;
}

Either way, the output becomes a lot more readable, compiles to something a lot closer to the original, and easier to refactor by hand further into idiomatic Rust code as well.

IIUC, currently C2Rust is using a single Relooper algorithm for all control flow (?), but given how common this pattern is, perhaps it's worth special-casing it and solving separately?

ahomescu commented 3 years ago

IIUC, currently C2Rust is using a single Relooper algorithm for all control flow (?), but given how common this pattern is, perhaps it's worth special-casing it and solving separately?

We prefer to keep the translator as dumb as possible, and do cleanups and handle corner cases in c2rust-refactor, especially when those cleanups are non-trivial. It sounds like having a second algorithm in parallel with Relooper would be a massive change and maintenance burden, which is why I'd prefer if this were just a c2rust-refactor pass.

Alternatively, if this could be implemented as a small tweak to Relooper instead, that would be fine too. Relooper was originally designed for JavaScript and its output reflects that. It might be possible to extend it to emit more Rust-specific advanced control flow.

RReverser commented 3 years ago

especially when those cleanups are non-trivial

Hm, it feels like the opposite - because those cleanups are non-trivial, they're quite hard to model via refactor command (unless I'm missing a command that could detect the pattern above)?

By the time this output is emitted, it seems that important information about the original control flow is already lost and rather hard to recover at the stage when branches were already merged into a single intertwined loop.

Alternatively, if this could be implemented as a small tweak to Relooper instead, that would be fine too. Relooper was originally designed for JavaScript and its output reflects that. It might be possible to extend it to emit more Rust-specific advanced control flow.

Yeah, I'm aware of Relooper, but it was mostly designed to emit asm.js which used to be quite restricted. Even for regular JS, the output like above is in fact possible, as JS also allows labeled breaks.

RReverser commented 3 years ago

UPD: Alon Zakai pointed me to a PR in Relooper back in 2016 that improved cases like this: https://github.com/WebAssembly/binaryen/pull/648

Quotes:

In particular, the old Relooper used to add "label" uses for far-off targets in some cases, unnecessarily, which added overhead. We optimized that away 6 years ago in Emscripten/Binaryen, and Cheerp effectively did the same around 2 years ago with their Stackifier.

(c) https://twitter.com/kripken/status/1371925262397362176

Interesting, yes, that "current_block" stuff does look like the sort of thing the old Relooper would emit. So maybe using the modern approach would help.

It's a small tweak to the Relooper, here is where it landed: https://github.com/WebAssembly/binaryen/pull/648

(c) https://twitter.com/kripken/status/1371927740547354625

Perhaps it's possible to port same improvements to C2Rust's Relooper implementation?

RReverser commented 3 years ago

Ping - any thoughts on the above? In particular, are there any plans to update the Relooper algorithm to match upstream improvements?

ahomescu commented 3 years ago

Oh yes sorry, I was planning to respond to this but it fell through the cracks.

Unfortunately we (Immunant) don't have the time or resources to work on this right now, but we'll gladly take PRs and review them if anyone else submits some. I'll keep the issue open in case our situation changes.

RReverser commented 3 years ago

Ok, fair enough. I don't have capacity to work on this currently either, but I'll rename the issue and leave it open as well.

afetisov commented 2 years ago

I'll just note here that imho Relooper is the wrong algorithm in this case, since its goal is far removed from human-readable code generation, and the control-flow operations of WebAssembly are more restricted than in Rust (even if formally equivalent). Also, a good translation of gotos into proper labeled blocks & breaks must be performed during the transpilation, since once you translate gotos into state variables and matches, the information of control flow is essentially lost in the code and unreasonably hard to reconstruct. There are also QoL features which are entirely lost in translation: label names (C code would usually use well-named labels) and the ordering of CFG blocks (ideally the translated code would mirror the block ordering of C code, unless it was non-reducible).

In my opinion, the proper algorithm for C2Rust would be the Stackifier. It produces much more structured and readable CFGs.

If PRs are welcome, I could make one in a few weeks

kripken commented 2 years ago

@afetisov Note that the post you link is outdated as it compares the Stackifier algorithm to a much older Relooper version. The modern versions of both algorithms are very similar (details in that thread, but the 2017 Relooper has already addressed the limitations mentioned in that post). Both can produce structured and readable CFGs, and both tend to match the original source structure at least in reducible code (the main difference between them is in irreducible code, for which I'm not aware of a good comparison actually).

(But regardless I think a PR to update to either would probably be useful! Though I'm not a dev here.)

fw-immunant commented 1 year ago

There's also more recent work in this vein in "Beyond Relooper": https://dl.acm.org/doi/abs/10.1145/3547621

immunant / c2rust

Update the Relooper algorithm #330