dwyl / learn-zig

🦎 Learn the Zig programming language for fun!
GNU General Public License v2.0
4 stars 0 forks source link

Ziggy HashMaps #9

Open ndrean opened 1 month ago

ndrean commented 1 month ago

Starting to ❤️ Zig.

Memento on HashMap: a dynamic key/value data store.

Most of the snippets below are run in test mode because it detects even more memory leaks and the terrible "seg fault"

It can be used as a :

HashMap

Memory management: must be initialised with an allocator and de-initialised with deinit()

Common operations: put(key, value) - Adds or updates an entry fetchPut(key, value) - puts a value, returning a value if there was previously a value for that key. get(key) - Retrieves a value contains(key) - Checks if key exists remove(key) - Removes an entry count() - Returns number of entries clearAndFree() - Removes all entries and frees memory

Iteration: keyIterator() - over keys valueIterator() - over values iterator() - Iterate over key-value pairs

Error handling: put() can fail with allocation errors, so use try. Other operations like get(), contains() don't return errors.

Type of keys:

AutoHashMap: the key is an integer

const std = @import("std");
const testing = std.testing

test "hash" {
   const test_allocator = std.testing.allocator;

   const Point = struct { 
      x: i32, 
      y: i32,
     member: Polygone,
     const Polygone = enum {
          square,
          triangle
    }
  };

   var map = std.AutoHashMap(u32, Point).init(testing.allocator);
    defer map.deinit();

   try map.put(1, {.x = 1, .y= 2, .member = Point.Polygone.square })
   try map.put(1, {.x = 3, .y = 4, .member = Point.Polygone.triangle });
   print("{}\n", .{map.count()});
   //1 
   try map.put(2, {.x = 3, .y = 4, .member = Point.Polygone.square});
   print("{}\n", .{map.count()});
   //2

   try std.testing.expect(map.count() == 2);
}
1
2
All 1 tests passed

I had an error " unable to hash type f32" with a type float f32 as a key.

A Set: the value is void.

const std = @import("std");
const testing = std.testing;

test "set" {
   var set = std.AutoHashMap(u32, void).init(testing.allocator);
   defer set.deinit();

   try set.put(1);
   try set.put(1);
   try set.put(2)

  try expect(set.count() == 2);
}

StringHashMap: the key is a string

AutoHashMap does not support slices as keys, thus StringHashMap.

const std = @import("std");
const testing = std.testing;
const print = std.debug.print;

test "string" {
   const Mood = enum { cool, uncool };
   var st_map = std.StringHashMap(Mood).init(testing.allocator);
   defer st_map.deinit();

   try st_map.put("him", .uncool);
   try st_map.put("me", .cool);
   print("{any}\n", .{st_map.get("him")});
   try testing.expect(st_map.get("me").? == .cool)
}
hasmap.test.hasmap.Mood.uncool
All 1 tests passed.

Custom HashMap

Suppose we want to use a User struct with two fields: id: u32 and a name: []const u8 as your "key".

const User = struct {
    id: u32,
    name: []const u8,
};

How to proceed?

Since we cannot use slices with AutoHashMap, we need to build a custom "context" with two functions:

We declare these in a "context" struct so that Zig knows:

In the example below, we use Case-Insensitive String Matching. you make a hash from the two fields of User, the id and the name.

We want to treat as equal uppercased and lowercased; "Alice" and "alice" will represent the same entry. This means that the two entries below are equal:

u1= User{.id = 1, .name= "alice"}
u2= User{.id=1, .name="Alice"}

Then if you add successively, in this order:

HahsMap.put(u1, 10)
HashMap.put(u2, 20)

the HasMap will contain (u1, 20), keeping the original key ("alice") but updating the value.

This behaviour is actually quite common in hash table implementations: they often preserve the original key to avoid having to allocate new memory or copy data unnecessarily.

const std = @import("std");
const print = std.debug.print;

const User = struct {
    id: u32,
    name: []const u8,
};

// we define a struct with two "public" methods named "hash" and "eql"
const UserCtx = struct {
    pub fn hash(_: UserCtx, user: User) u64 {
        var hasher = std.hash.Wyhash.init(0);
        // Hash the id first
        std.hash.autoHash(&hasher, user.id);
        // Then hash the slice contents
        for (user.name) |byte| {
            hasher.update(&[_]u8{byte});
        }
        return hasher.final();
    }

    pub fn eql(_: UserCtx, a: User, b: User) bool {
        if (a.id != b.id) return false;
        if (a.name.len != b.name.len) return false;

        // compare only lowercased string
        for (a.name, b.name) |ac, bc| {
            if (std.ascii.toLower(ac) != std.ascii.toLower(bc)) {
                return false;
            }
        }
        return true;
    }
};

test "custom hash map" {
    const max_load = std.hash_map.default_max_load_percentage;

    var HashMap = std.HashMap(User, i32, UserCtx, max_load).init(std.testing.allocator);
    defer HashMap.deinit();

    try HashMap.put(User{ .id = 1, .name = "alice" }, 10);
    try HashMap.put(User{ .id = 1, .name = "Alice" }, 20);

    try HashMap.put(User{ .id = 1, .name = "Bob" }, 5);

    print("number of H's : {any}\n", .{HashMap.count()});
    std.testing.expectEqual(HashMap.count(), 2);

    var it = HashMap.iterator();
    while (it.next()) |entry| {
        print("{s} has : {any}\n", .{ entry.key_ptr.name, entry.value_ptr.* });
    }

    try std.testing.expectEqual(@as(i32, 20), HashMap.get(User{ .id = 1, .name = "alice" }));

    try std.testing.expectEqual(HashMap.get(User{ .id = 1, .name = "bob" }), null);

    try std.testing.expectEqual(@as(i32, 5), HashMap.get(User{ .id = 2, .name = "bob" }));
}
number of H's: 2

alice has : 20
Bob has : 5

All 1 tests passed.
ndrean commented 1 month ago

Common AutoHashMap usage examples

Counting occurrences of letters in a string

const std = @import("std");

fn count_occurrences(text: []const u8, allocator: std.mem.Allocator) !u32 {
    var char_count = std.AutoHashMap(u8, usize).init(allocator);
    defer char_count.deinit();

    for (text) |char| {
        // lookup if already existing or put 0
        const count = char_count.get(char) orelse 0;
        try char_count.put(char, count + 1);
    }

    // using the general iterator: "key" is a pointer", and the value is `key.key_ptr/*`
    var it = char_count.iterator();
    while (it.next()) |key| {
        print("({c}, {?}), ", .{ key.key_ptr.*, char_count.get(key.key_ptr.*) });
    }

    // using the "key" iterator
    var itk = char_count.keyIterator();
    while (itk.next()) |key| {
        print("{c}, {}", .{ key.*, char_count.get(key.*).? });
    }

    return char_count.count();
}

test "count" {
     const char_count: u32 = try count_occurences("hello world to bob", std.testing.allocator);
     std.testing.expect(char_count == 10);
}

(t, 1), ( , 3), (o, 4), (d, 1), (b, 2), (r, 1), (e, 1), (h, 1), (w, 1), (l, 3), [t, 1], [ , 3], [o, 4], [d, 1], [b, 2], [r, 1], [e, 1], [h, 1], [w, 1], [l, 3] All 1 tests passed