Open jansel opened 3 days ago
Repro:
import halide as hl @hl.generator(name="kernel") class Kernel: in_ptr0 = hl.InputBuffer(hl.Int(64), 0) in_ptr1 = hl.InputBuffer(hl.Float(32), 2) out_ptr0 = hl.OutputBuffer(hl.Float(32), 1) def generate(g): in_ptr0 = g.in_ptr0 in_ptr1 = g.in_ptr1 out_ptr0 = g.out_ptr0 h0 = hl.Var("h0") tmp6 = hl.Func("tmp6") tmp6[()] = hl.clamp(hl.cast(hl.Int(32), in_ptr0[()]), 0, 7) out_ptr0[h0,] = in_ptr1[h0, tmp6[()]] assert g.using_autoscheduler() in_ptr0.set_estimates([]) in_ptr1.dim(0).set_min(0) in_ptr1.dim(0).set_stride(1) in_ptr1.dim(0).set_extent(4) in_ptr1.dim(1).set_min(0) in_ptr1.dim(1).set_stride(4) in_ptr1.dim(1).set_extent(8) in_ptr1.set_estimates([hl.Range(0, 4), hl.Range(0, 8)]) out_ptr0.set_estimates([hl.Range(0, 4)]) if __name__ == "__main__": import sys, tempfile with tempfile.TemporaryDirectory() as out: sys.argv = [ "repro.py", "-g", "kernel", "-o", out, "-f", "halide_kernel", "-e", "static_library,h,schedule", "-p", "/home/jansel/conda/envs/pytorch/lib/libautoschedule_adams2019.so", "target=host-strict_float-no_runtime-no_asserts", "autoscheduler=Adams2019", "autoscheduler.parallelism=8", ] hl.main()
Output:
terminate called after throwing an instance of 'Halide::Error' zsh: IOT instruction (core dumped) python repro.py
It works if I make in_ptr0 1D (rather than 0D):
in_ptr0
diff --git a/repro.py b/repro.py index c5397a9c0a5..52785bda978 100644 --- a/repro.py +++ b/repro.py @@ -3,7 +3,7 @@ import halide as hl @hl.generator(name="kernel") class Kernel: - in_ptr0 = hl.InputBuffer(hl.Int(64), 0) + in_ptr0 = hl.InputBuffer(hl.Int(64), 1) in_ptr1 = hl.InputBuffer(hl.Float(32), 2) out_ptr0 = hl.OutputBuffer(hl.Float(32), 1) @@ -13,11 +13,11 @@ class Kernel: out_ptr0 = g.out_ptr0 h0 = hl.Var("h0") tmp6 = hl.Func("tmp6") - tmp6[()] = hl.clamp(hl.cast(hl.Int(32), in_ptr0[()]), 0, 7) + tmp6[()] = hl.clamp(hl.cast(hl.Int(32), in_ptr0[0]), 0, 7) out_ptr0[h0,] = in_ptr1[h0, tmp6[()]] assert g.using_autoscheduler() - in_ptr0.set_estimates([]) + in_ptr0.set_estimates([hl.Range(0, 1)]) in_ptr1.dim(0).set_min(0) in_ptr1.dim(0).set_stride(1) in_ptr1.dim(0).set_extent(4)
Repro:
Output:
It works if I make
in_ptr0
1D (rather than 0D):