halide / Halide

a language for fast, portable data-parallel computation
https://halide-lang.org
Other
5.86k stars 1.07k forks source link

Internal Error Symbol not found generated by Li2018 autoscheduler #8279

Closed jansel closed 3 months ago

jansel commented 3 months ago

This example only fails with the Li2018 autoscheduler.

Repro:

import halide as hl

@hl.generator(name="kernel")
class Kernel:
    in_ptr0 = hl.InputBuffer(hl.Float(32), 1)
    in_ptr1 = hl.InputBuffer(hl.Float(32), 1)
    out_ptr2 = hl.OutputBuffer(hl.Float(32), 1)
    out_ptr3 = hl.OutputBuffer(hl.Float(32), 1)

    def generate(g):
        in_ptr0 = g.in_ptr0
        in_ptr1 = g.in_ptr1
        out_ptr2 = g.out_ptr2
        out_ptr3 = g.out_ptr3
        xindex = hl.Var('xindex')
        rindex = hl.Var('rindex')
        x0 = xindex
        rdom = hl.RDom([hl.Range(0, 16)])
        tmp0 = hl.Func('tmp0')
        tmp0[xindex] = in_ptr0[x0]
        tmp1 = hl.Func('tmp1')
        tmp2 = hl.Func('tmp2')
        tmp2[rindex, xindex] = tmp0[xindex]
        tmp1[xindex] = hl.sum(rdom, tmp2[rdom, xindex])
        tmp3 = hl.Func('tmp3')
        tmp3[xindex] = in_ptr1[x0]
        tmp4 = hl.Func('tmp4')
        tmp5 = hl.Func('tmp5')
        tmp5[rindex, xindex] = tmp3[xindex]
        tmp4[xindex] = hl.sum(rdom, tmp5[rdom, xindex])
        tmp6 = hl.cast(hl.Float(32), hl.f64(16.0))
        tmp7 = hl.Func('tmp7')
        tmp7[xindex] = tmp4[xindex] / tmp6
        out_ptr2[x0] = hl.cast(out_ptr2.type(), tmp7[xindex])
        tmp8 = hl.Func('tmp8')
        tmp8[xindex] = tmp1[xindex] / tmp6
        out_ptr3[x0] = hl.cast(out_ptr3.type(), tmp8[xindex])

        assert g.using_autoscheduler()
        in_ptr0.set_estimates([hl.Range(0, 64)])
        in_ptr1.set_estimates([hl.Range(0, 64)])
        out_ptr2.set_estimates([hl.Range(0, 64)])
        out_ptr3.set_estimates([hl.Range(0, 64)])

if __name__ == "__main__":
    import sys, tempfile
    with tempfile.TemporaryDirectory() as out:
        sys.argv = ['repro.py',
                    '-g', 'kernel',
                    '-o', out,
                    '-f', 'halide_kernel',
                    '-e', 'static_library,h,schedule',
                    '-p', '/home/jansel/conda/envs/pytorch/lib/libautoschedule_li2018.so',
                    'target=host-strict_float-no_runtime-no_asserts',
                    'autoscheduler=Li2018', 'autoscheduler.parallelism=8']
        hl.main()

Output:

Unhandled exception: Internal Error at /home/jansel/Halide/src/CodeGen_LLVM.cpp:1286 triggered by user code at : Symbol not found: sum$1_intm.s1.r18$x.r108.loop_extent

Traceback (most recent call last):
  File "/home/jansel/pytorch/repro.py", line 57, in <module>
    hl.main()
RuntimeError: Generator failed: -1
abadams commented 3 months ago

8282 is the root cause, so closing this in favor of that.