ziglang / zig

General-purpose programming language and toolchain for maintaining robust, optimal, and reusable software.
https://ziglang.org
MIT License
35.18k stars 2.57k forks source link

Reached unreachable code in std.StringHashMap #11940

Closed markisus closed 2 years ago

markisus commented 2 years ago

Zig Version

0.9.1

Steps to Reproduce

Save the following to files into the same directory and run zig run stringhashmap_test.zig

stringhashmap_test.zig

const std = @import("std");

pub fn main() !void {
    var gpa: std.heap.GeneralPurposeAllocator(.{}) = .{};
    defer if (gpa.deinit()) {
        unreachable;
    };

    const allocator = gpa.allocator();
    var map = std.StringHashMap(void).init(allocator);
    defer map.deinit();

    var file = try std.fs.cwd().openFile("scratch.txt", .{});
    var buf: [50]u8 = undefined;
    while (try file.reader().readUntilDelimiterOrEof(buf[0..], '\n')) |line| {
        std.debug.print("Inserting {s}\n", .{line});
        _ = try map.getOrPut(line);
    }
}

scratch.txt

a
b
c
d
e
f
g

Expected Behavior

Program runs without error, or at least returns an error that can be caught instead of entering unreachable block.

Actual Behavior

zig run stringhashmap_test.zig
Inserting a
Inserting b
Inserting c
Inserting d
Inserting e
Inserting f
Inserting g
thread 6380 panic: reached unreachable code
/home/mark/zig-linux-x86_64-0.9.1/lib/std/debug.zig:225:14: 0x20606b in std.debug.assert (stringhashmap_test)
    if (!ok) unreachable; // assertion failure
             ^
/home/mark/zig-linux-x86_64-0.9.1/lib/std/hash_map.zig:1005:19: 0x2376f5 in std.hash_map.HashMapUnmanaged([]const u8,void,std.hash_map.StringContext,80).putAssumeCapacityNoClobberContext (stringhashmap_test)
            assert(!self.containsContext(key, ctx));
                  ^
/home/mark/zig-linux-x86_64-0.9.1/lib/std/hash_map.zig:1472:62: 0x237114 in std.hash_map.HashMapUnmanaged([]const u8,void,std.hash_map.StringContext,80).grow (stringhashmap_test)
                        map.putAssumeCapacityNoClobberContext(keys_ptr[i], values_ptr[i], ctx);
                                                             ^
/home/mark/zig-linux-x86_64-0.9.1/lib/std/hash_map.zig:1418:30: 0x2368af in std.hash_map.HashMapUnmanaged([]const u8,void,std.hash_map.StringContext,80).growIfNeeded (stringhashmap_test)
                try self.grow(allocator, capacityForSize(self.load() + new_count), ctx);
                             ^
/home/mark/zig-linux-x86_64-0.9.1/lib/std/hash_map.zig:1260:30: 0x236499 in std.hash_map.HashMapUnmanaged([]const u8,void,std.hash_map.StringContext,80).getOrPutContextAdapted (stringhashmap_test)
            self.growIfNeeded(allocator, 1, ctx) catch |err| {
                             ^
/home/mark/zig-linux-x86_64-0.9.1/lib/std/hash_map.zig:1248:56: 0x2363aa in std.hash_map.HashMapUnmanaged([]const u8,void,std.hash_map.StringContext,80).getOrPutContext (stringhashmap_test)
            const gop = try self.getOrPutContextAdapted(allocator, key, ctx, ctx);
                                                       ^
/home/mark/zig-linux-x86_64-0.9.1/lib/std/hash_map.zig:468:50: 0x235d7e in std.hash_map.HashMap([]const u8,void,std.hash_map.StringContext,80).getOrPut (stringhashmap_test)
            return self.unmanaged.getOrPutContext(self.allocator, key, self.ctx);
                                                 ^
/home/mark/aoc2021/stringhashmap_test.zig:17:29: 0x22e452 in main (stringhashmap_test)
        _ = try map.getOrPut(line);
                            ^
/home/mark/zig-linux-x86_64-0.9.1/lib/std/start.zig:561:37: 0x22780a in std.start.callMain (stringhashmap_test)
            const result = root.main() catch |err| {
                                    ^
/home/mark/zig-linux-x86_64-0.9.1/lib/std/start.zig:495:12: 0x20842e in std.start.callMainWithArgs (stringhashmap_test)
    return @call(.{ .modifier = .always_inline }, callMain, .{});
           ^
/home/mark/zig-linux-x86_64-0.9.1/lib/std/start.zig:409:17: 0x2074c6 in std.start.posixCallMainAndExit (stringhashmap_test)
    std.os.exit(@call(.{ .modifier = .always_inline }, callMainWithArgs, .{ argc, argv, envp }));
                ^
/home/mark/zig-linux-x86_64-0.9.1/lib/std/start.zig:322:5: 0x2072d2 in std.start._start (stringhashmap_test)
    @call(.{ .modifier = .never_inline }, posixCallMainAndExit, .{});
    ^
markisus commented 2 years ago

I found that reading the data from file is crucial for reproducing. The error no longer appears if you hard-code the data inside the program.

// this version works fine, even though its lines are checked to be mem.eql to the contents of file
pub fn main() !void {
    var gpa: std.heap.GeneralPurposeAllocator(.{}) = .{};
    defer if (gpa.deinit()) {
        unreachable;
    };

    const allocator = gpa.allocator();
    var map = std.StringHashMap(void).init(allocator);
    defer map.deinit();

    var file_contents: [7][]const u8 = .{ "a", "b", "c", "d", "e", "f", "g" };
    var idx: u32 = 0;
    var file = try std.fs.cwd().openFile("scratch.txt", .{});
    var buf: [50]u8 = undefined;
    while (try file.reader().readUntilDelimiterOrEof(buf[0..], '\n')) |line| {
        std.debug.assert(std.mem.eql(u8, file_contents[idx], line));
        std.debug.print("Inserting {s}\n", .{line});
        _ = try map.getOrPut(file_contents[idx]);
        idx += 1;
    }
}
squeek502 commented 2 years ago

StringHashMap does not copy or take ownership of the memory for the keys, so you're adding the same memory over and over (while overwriting the data being pointed to as well).

If you change your debug.print to:

std.debug.print("Inserting {s} ({*} len:{})\n", .{ line, line.ptr, line.len });

then the output is:

Inserting a (u8@7ffdbd2fe386 len:1)
Inserting b (u8@7ffdbd2fe386 len:1)
Inserting c (u8@7ffdbd2fe386 len:1)
Inserting d (u8@7ffdbd2fe386 len:1)
Inserting e (u8@7ffdbd2fe386 len:1)
Inserting f (u8@7ffdbd2fe386 len:1)
Inserting g (u8@7ffdbd2fe386 len:1)

So once the map needs to grow, it will re-allocate and then try re-inserting everything but now all the keys' data will be "g" so it will try inserting "g" over and over which indicates a bug (i.e. the unreachable is correct, your code is misusing the API).

If you tried to use this map in any other way you'd run into other similar problems, as after all the inserts you'd be left with a map in which all keys are pointing to data that contains "g".

markisus commented 2 years ago

Oops! My bad, thanks for your help @squeek502 and apologies for the faulty bug report!