ziglang / zig

General-purpose programming language and toolchain for maintaining robust, optimal, and reusable software.
https://ziglang.org
MIT License
35.1k stars 2.56k forks source link

Translate-c error on old style flexible array member access #21684

Open joseph-montanez opened 1 month ago

joseph-montanez commented 1 month ago

Zig Version

0.14.0-dev.1860+2e2927735

Steps to Reproduce and Observed Behavior

I am not sure what to call this specific issue.

This is from the php-src repository: php-src/Zend/zend_string.h

static zend_always_inline zend_string *zend_string_init(const char *str, size_t len, bool persistent)
{
    zend_string *ret = zend_string_alloc(len, persistent);

    memcpy(ZSTR_VAL(ret), str, len);
    ZSTR_VAL(ret)[len] = '\0';
    return ret;
}

This is in the headers which is included:

const php = @cImport({
    @cDefine("_GNU_SOURCE", "1");
    @cDefine("ZEND_DEBUG", "1");
    @cDefine("ZTS", "1");
    @cInclude("php_config.h");
    @cInclude("php.h");
    @cInclude("ext/standard/info.h");
});

This is the automatic conversion:

pub fn zend_string_init(arg_str: [*c]const u8, arg_len: usize, arg_persistent: bool) callconv(.C) [*c]zend_string {
    var str = arg_str;
    _ = &str;
    var len = arg_len;
    _ = &len;
    var persistent = arg_persistent;
    _ = &persistent;
    var ret: [*c]zend_string = zend_string_alloc(len, persistent);
    _ = &ret;
    _ = memcpy(@as(?*anyopaque, @ptrCast(@as([*c]u8, @ptrCast(@alignCast(&ret.*.val))))), @as(?*const anyopaque, @ptrCast(str)), len);
    ret.*.val[len] = '\x00';
    return ret;
}

The specific issue is here:

@as(?*anyopaque, @ptrCast(@as([*c]u8, @ptrCast(@alignCast(&ret.*.val)))))

This would result in only the first letter of the string to be copied and that determined the limit of the string. So if the expected string is supposed 5 characters long (which its only a single character), the next line ret.*.val[len] = '\x00'; would throw an out of bounds "thread 337505 panic: index out of bounds: index 5, len 1"

I had to go in and change cimport.zig for the corrected code.

pub fn zend_string_init(arg_str: [*c]const u8, arg_len: usize, arg_persistent: bool) callconv(.C) [*c]zend_string {
    const ret: [*c]zend_string = zend_string_alloc(arg_len, arg_persistent);
    if (ret == null) {
        return null;
    }
    const ret_val_ptr: [*c]u8 = @as([*c]u8, @ptrCast(&ret.*.val));
    _ = memcpy(ret_val_ptr, arg_str, arg_len);
    ret_val_ptr[arg_len] = 0;
    return ret;
}

Further up in the same cimport.zig is the memcpy defintion:

pub extern fn memcpy(__dest: ?*anyopaque, __src: ?*const anyopaque, __n: c_ulong) ?*anyopaque;

I just moved from Swift to Zig because it has the same exact problem with this same function and its auto import system. I am assuming my solution is still wrong as it problem creates a memory leak, but its a bit more than I can handle right now on how Zig handles this.

Expected Behavior

Should copy the entire buffer and not just the first byte.

pfgithub commented 1 month ago

An alternative workaround until this is fixed is to make a c binding for this function so that you don't have to edit translate-c output:

// workaround.h
#include zend headers
zend_string *workaround_zend_string_init (const char *str, size_t len, bool persistent);

// workaround.c
#include workaround.h
zend_string *workaround_zend_string_init (const char *str, size_t len, bool persistent)
{
        return zend_string_init(str, len, persistent);
}

Then, add the workaround.c to your build and call workaround_zend_string_init from zig

pfgithub commented 1 month ago

Reproduction:

There's nothing wrong with the memcpy, the problem is zig assuming that the array access shouldn't go out of bounds when that's how it was intended to be used in c.

// repro.c
struct example {
    char val[1];
};
char example_fn(struct example* ex, int index) {
    return ex->val[index];
}
// repro.zig
const translated = @cImport({
    @cInclude("repro.c");
});

test "example_fn" {
    var buf: [6]u8 = .{'H', 'e', 'l', 'l', 'o', '\x00'};
    for(buf, 0..) |expected, i| {
        try @import("std").testing.expectEqual(expected, translated.example_fn(@ptrCast(&buf), @intCast(i)));
    }
}
$> zig test repro.zig -I. -lc
thread 22064 panic: index out of bounds: index 1, len 1
cimport.zig:65:20: 0x8e1451 in example_fn (test.exe.obj)
    return ex.*.val[@as(c_uint, @intCast(index))];
                   ^

Linking the C code instead, it works as expected:

// repro.zig
extern fn example_fn(ex: *const anyopaque, index: c_int) u8;

test "example_fn" {
    var buf: [6]u8 = .{'H', 'e', 'l', 'l', 'o', '\x00'};
    for(buf, 0..) |expected, i| {
        try @import("std").testing.expectEqual(expected, example_fn(@ptrCast(&buf), @intCast(i)));
    }
}
$> zig test repro.zig repro.c -lc
All 1 tests passed.
joseph-montanez commented 1 month ago

Thanks! I understand now, and as a note I tried another way:

 @as([*c]u8, @ptrCast(&ret.*.val))[len] = '\x00';

However it seems that as long as its not set to another variable, Zig just doesn't have enough information to correctly understand. Which is why this version works:

    const ret_val_ptr: [*c]u8 = @as([*c]u8, @ptrCast(&ret.*.val));
    ret_val_ptr[arg_len] = 0;