ziglang / zig

General-purpose programming language and toolchain for maintaining robust, optimal, and reusable software.
https://ziglang.org
MIT License
34.64k stars 2.53k forks source link

proposal: learn comptime-known-ranges from branches on runtime values #12872

Open tauoverpi opened 2 years ago

tauoverpi commented 2 years ago

Related to #12863

Instead of narrowing (creating a new type) one could attach the reduced range to the type within the block and treat any branches outside of that range as dead code without changing the underlying type. Consider the example given:

const std = @import("std");

const E = enum {
    a,
    b,
    c,
    d,
};

pub fn main() !void {
    var e: E = .a;

    switch (e) {
        .a => {},
        .b => {},
        else => |narrow| {
            switch (narrow) {
                .c => {},
                .d => {},
            }
        },
    }
}

Here @TypeOf(narrow) == E while the range of narrow only includes the later c and d tags. If we change e to be const then the range of e within the same scope that narrow is introduced is equivalent but the underlying type remains the same.

This can be expanded upon to include other forms of branching such as:

extern fn foo() E;

test {
    const e = foo();

    if (e != .a) {
        // .a is not to be part of the range
        switch (e) {
            .b, c, d => {},
        }
    } else {
        // .a is the only tag within range
        switch (e) {
            .a => {},
        }
    }
}

Thus zig can learn the range of a value at compile-time by keeping track of the set/range of possible values that remain within a branch.

By keeping the same underlying type it's possible to operate on integers :

extern fn foo() u8;

test {
    const e = foo();
    switch (e) {
        0 => switch (e) {
            // only possible value, `e` is comptime known here
            0 => {},
        },

        1...10 => switch (e) {
            // Only possible range, matching on other values is comptime known to be false.
            // Note that the value of `e` remains runtime-known, only the possible range is
            // comptime known
            1...10 => {},
        },

        else => switch (e) {
            // `e` is known to be 11 or above given the other branches
            11...255 => {},
        },
    }
}

And implicitly cast based on the range:

extern fn foo() u8;

test {
    const e = foo();
    if (e < 0x10) {
        // `e` remains as `u8` here but we can avoid the
        // `@intCast` as the range is comptime-known
        var s: u4 = e; 
    _ = s;
    }
}

If the range of a value on the right hand side of a truncating operation is comptime-known then the range of the result is guaranteed to be below the top of the range thus any use of % and & narrows the range of the type. The same applies to comptime_int on the right hand side:

extern fn foo() u8;

test {
    const bar: u4 = foo() % 16;
    const baz: u4 = foo() & 15;
}

The same applies for addition and multiplication:

extern fn foo() u8;

test {
    const x: u4 = foo() % 16;
    const y: u8 = foo() + x; // range 0...(255 + 15) (possible overflow)
    const z: u8 = foo() +| x; // range 0...255 (no overflow)
    _ = x;
    _ = y;
    _ = z;
}

Since the type of the value doesn't change, the value can still be passed safely to functions without casts:

extern fn foo() E;
extern fn bar(E) void;

test {
    const e = foo();
    if (e != .a) {
        bar(e); // no cast, `e` keeps it's type `E`
    }
}

However integers can be narrowed when the range of the value is within the range of a smaller destination type:

extern fn foo() u32;
extern fn bar(u8) void;

test {
    const value = foo();
    const small: u4 = foo() & 15;

    if (value < 256) {
        const sub = value -| small; // range 0...255
        const add = value +| small; // range 0...(255 + 15)

        bar(sub); // within range, narrowing is safe here
        bar(add); // compile error: expected type 'u8', found 'u32' range 0...(255 + 15)
    }

    bar(value); // compile error: expected type 'u8', found 'u32' range 0...4294967295
}

Note that if the range is larger than that of the underlying type then overflow is possible.

Operators

Thus the standard operators should now result in:

extern fn foo() u32;

const x: u32 = foo();
const y: u32 = foo();
// the range of both `x` and `y` in this example
const m = math.maxInt(@TypeOf(x, y)); 

assert(
        @TypeOf(x + 1) == u32 range(1...m + 1) // possible overflow
    and @TypeOf(x + y) == u32 range(0...m + m) // possible overflow
    and @TypeOf(x - 1) == u32 range(-1...m) // possible underflow
    and @TypeOf(x - y) == u32 range(-m...m) // possible underflow
    and @TypeOf(x * 2) == u32 range(0...m * 2) // possible overflow
    and @TypeOf(x * y) == u32 range(0...m * m) // possible overflow
    and @TypeOf(x | y) == u32 range(0...m | m)
    and @TypeOf(x << 5) == u32 range(0...(m << 5) & m)
    and @TypeOf(x >> 5) == u32 range(0...m >> 5)
    and @TypeOf(~x) == u32 range(0...m)

    and @TypeOf(x <<| y) == u32 range(0...m)
    and @TypeOf(x +| 1) == u32 range(1...m)
    and @TypeOf(x +| y) == u32 range(0...m)
    and @TypeOf(x -| 1) == u32 range(0...m - 1)
    and @TypeOf(x -| y) == u32 range(0...m)
    and @TypeOf(x *| 2) == u32 range(0...m)
    and @TypeOf(x *| y) == u32 range(0...m)

    and @TypeOf(x % 8) == u32 range(0...7)
    and @TypeOf(x % y) == u32 range(0...m -| 1) // range of y - 1
    and @TypeOf(x & y) == u32 range(0...m & m) // the minimum range of the two
    and @TypeOf(x +% 1) == u32 range(0...m)
    and @TypeOf(x +% y) == u32 range(0...m)
    and @TypeOf(x -% 1) == u32 range(0...m)
    and @TypeOf(x -% y) == u32 range(0...m)
    and @TypeOf(x *% 2) == u32 range(0...m)
    and @TypeOf(x *% y) == u32 range(0...m)
);

note: the range(n...m) syntax is not part of the proposal

Extension

For safety, allow function parameters and return types to express the range desired and throw a compile-error when the range cannot be satisfied:

fn foo(x: u32 range(0...0xffff)) u32 range(0...0xff) {
    return x & 0xff;
}

or maybe as an expression:

fn foo(x: u32, a: []u32) where (x + 1 < a.len) u32 {
    return a[x] + a[x + 1];
}

However ranges for parameters/return types requires more thought.

Inve1951 commented 3 months ago

Error sets already support narrowing so long as you capture. Example that works today:

test "ErrorSet narrowing" {
    try funcA();
}

fn funcA() error{ A, C }!void {
    return funcB() catch |err| switch (err) {
        // else => err, // error: expected type 'error{C,A}', found 'error{C,A,B}'
        else => |e| e,
        error.B => {},
    };
}
fn funcB() error{ A, B, C }!void {
    return error.B;
}

But the same trick does not work on integers:

test "integer narrowing" {
    var in: usize = 42;
    _ = &in;
    const out: u8 = switch (in) {
        0...255 => |byte| byte, // error: expected type 'u8', found 'usize'
        else => 24,
    };
    try std.testing.expect(in == out);
}
tauoverpi commented 3 weeks ago

An observation that with the new panic interface it's possible to disallow integer overflow as an opt-in. The above provides one possible path to prove overflow doesn't occur by forcing the use of assert or any other way to divert control-flow when it would. There are likely other proposals that are more suitable though.