cornell-zhang / hcl-dialect

HeteroCL-MLIR dialect for accelerator design
https://cornell-zhang.github.io/heterocl/index.html
Other
40 stars 17 forks source link

[Fronted] Incorrect Intermediate Cast Type #97

Closed zzzDavid closed 2 years ago

zzzDavid commented 2 years ago

Test case

def test_dtype_cast():

    def _test_body(dtype1, dtype2, dtype3):

        hcl.init()
        A = hcl.placeholder((2,), dtype=dtype1)
        B = hcl.placeholder((2,), dtype=dtype2)

        def kernel(A, B):
            C = hcl.compute((2,), lambda x: A[x] + B[x], dtype=dtype3)
            D = hcl.compute((2,), lambda x: A[x] - B[x], dtype=dtype3)
            return C, D

        s = hcl.create_schedule([A, B], kernel)
        f = hcl.build(s)

        npA = np.random.rand(2) * 100
        npB = np.random.rand(2) * 100
        npC = np.random.rand(2)
        npD = np.random.rand(2)

        hclA = hcl.asarray(npA, dtype1)
        hclB = hcl.asarray(npB, dtype2)
        hclC = hcl.asarray(npC, dtype3)
        hclD = hcl.asarray(npD, dtype3)

        f(hclA, hclB, hclC, hclD)

    _test_body(hcl.UInt(1), hcl.UFixed(4, 2), hcl.Int(1))

The IR generated has incorrect intermediate cast types !hcl.Fixed<3, 2>):

error: 'hcl.add_fixed' op requires the same type for all operands and results
error: 'hcl.add_fixed' op requires the same type for all operands and results
// Verification failed, printing generic form
#map0 = affine_map<(d0) -> (d0)>
#map1 = affine_map<() -> (0)>
#map2 = affine_map<() -> (2)>
"builtin.module"() ({
  "builtin.func"() ({
  ^bb0(%arg0: memref<2xi1>, %arg1: memref<2x!hcl.UFixed<4, 2>>):
    %0 = "memref.alloc"() {name = "compute_2", operand_segment_sizes = dense<0> : vector<2xi32>} : () -> memref<2xi1>
    "affine.for"() ({
    ^bb0(%arg2: index):
      %2 = "affine.load"(%arg0, %arg2) {from = "compute_0", map = #map0, unsigned} : (memref<2xi1>, index) -> i1
      %3 = "hcl.int_to_fixed"(%2) : (i1) -> !hcl.Fixed<3, 2>
      %4 = "hcl.fixed_to_fixed"(%3) : (!hcl.Fixed<3, 2>) -> !hcl.Fixed<4, 2>
      %5 = "affine.load"(%arg1, %arg2) {from = "compute_1", map = #map0} : (memref<2x!hcl.UFixed<4, 2>>, index) -> !hcl.UFixed<4, 2>
      %6 = "hcl.add_fixed"(%4, %5) : (!hcl.Fixed<4, 2>, !hcl.UFixed<4, 2>) -> !hcl.Fixed<4, 2>
      %7 = "hcl.fixed_to_int"(%6) : (!hcl.Fixed<4, 2>) -> i1
      "affine.store"(%7, %0, %arg2) {map = #map0, to = "compute_2"} : (i1, memref<2xi1>, index) -> ()
      "affine.yield"() : () -> ()
    }) {loop_name = "x", lower_bound = #map1, stage_name = "compute_2", step = 1 : i32, upper_bound = #map2} : () -> ()
    %1 = "memref.alloc"() {name = "compute_3", operand_segment_sizes = dense<0> : vector<2xi32>} : () -> memref<2xi1>
    "affine.for"() ({
    ^bb0(%arg2: index):
      %2 = "affine.load"(%arg0, %arg2) {from = "compute_0", map = #map0, unsigned} : (memref<2xi1>, index) -> i1
      %3 = "hcl.int_to_fixed"(%2) : (i1) -> !hcl.Fixed<3, 2>
      %4 = "hcl.fixed_to_fixed"(%3) : (!hcl.Fixed<3, 2>) -> !hcl.Fixed<4, 2>
      %5 = "affine.load"(%arg1, %arg2) {from = "compute_1", map = #map0} : (memref<2x!hcl.UFixed<4, 2>>, index) -> !hcl.UFixed<4, 2>
      %6 = "hcl.sub_fixed"(%4, %5) : (!hcl.Fixed<4, 2>, !hcl.UFixed<4, 2>) -> !hcl.Fixed<4, 2>
      %7 = "hcl.fixed_to_int"(%6) : (!hcl.Fixed<4, 2>) -> i1
      "affine.store"(%7, %1, %arg2) {map = #map0, to = "compute_3"} : (i1, memref<2xi1>, index) -> ()
      "affine.yield"() : () -> ()
    }) {loop_name = "x", lower_bound = #map1, stage_name = "compute_3", step = 1 : i32, upper_bound = #map2} : () -> ()
    "std.return"(%0, %1) : (memref<2xi1>, memref<2xi1>) -> ()
  }) {itypes = "u_", otypes = "ss", sym_name = "top", type = (memref<2xi1>, memref<2x!hcl.UFixed<4, 2>>) -> (memref<2xi1>, memref<2xi1>)} : () -> ()
}) : () -> ()
zzzDavid commented 2 years ago

Fixed by 0165c460d05ce9b25c184cd0a28de63955df8177

I updated type cast ranking and rewrote cast_types