Closed zzzDavid closed 2 years ago
def test_dtype_cast(): def _test_body(dtype1, dtype2, dtype3): hcl.init() A = hcl.placeholder((2,), dtype=dtype1) B = hcl.placeholder((2,), dtype=dtype2) def kernel(A, B): C = hcl.compute((2,), lambda x: A[x] + B[x], dtype=dtype3) D = hcl.compute((2,), lambda x: A[x] - B[x], dtype=dtype3) return C, D s = hcl.create_schedule([A, B], kernel) f = hcl.build(s) npA = np.random.rand(2) * 100 npB = np.random.rand(2) * 100 npC = np.random.rand(2) npD = np.random.rand(2) hclA = hcl.asarray(npA, dtype1) hclB = hcl.asarray(npB, dtype2) hclC = hcl.asarray(npC, dtype3) hclD = hcl.asarray(npD, dtype3) f(hclA, hclB, hclC, hclD) _test_body(hcl.UInt(1), hcl.UFixed(4, 2), hcl.Int(1))
The IR generated has incorrect intermediate cast types !hcl.Fixed<3, 2>):
!hcl.Fixed<3, 2>)
error: 'hcl.add_fixed' op requires the same type for all operands and results error: 'hcl.add_fixed' op requires the same type for all operands and results // Verification failed, printing generic form #map0 = affine_map<(d0) -> (d0)> #map1 = affine_map<() -> (0)> #map2 = affine_map<() -> (2)> "builtin.module"() ({ "builtin.func"() ({ ^bb0(%arg0: memref<2xi1>, %arg1: memref<2x!hcl.UFixed<4, 2>>): %0 = "memref.alloc"() {name = "compute_2", operand_segment_sizes = dense<0> : vector<2xi32>} : () -> memref<2xi1> "affine.for"() ({ ^bb0(%arg2: index): %2 = "affine.load"(%arg0, %arg2) {from = "compute_0", map = #map0, unsigned} : (memref<2xi1>, index) -> i1 %3 = "hcl.int_to_fixed"(%2) : (i1) -> !hcl.Fixed<3, 2> %4 = "hcl.fixed_to_fixed"(%3) : (!hcl.Fixed<3, 2>) -> !hcl.Fixed<4, 2> %5 = "affine.load"(%arg1, %arg2) {from = "compute_1", map = #map0} : (memref<2x!hcl.UFixed<4, 2>>, index) -> !hcl.UFixed<4, 2> %6 = "hcl.add_fixed"(%4, %5) : (!hcl.Fixed<4, 2>, !hcl.UFixed<4, 2>) -> !hcl.Fixed<4, 2> %7 = "hcl.fixed_to_int"(%6) : (!hcl.Fixed<4, 2>) -> i1 "affine.store"(%7, %0, %arg2) {map = #map0, to = "compute_2"} : (i1, memref<2xi1>, index) -> () "affine.yield"() : () -> () }) {loop_name = "x", lower_bound = #map1, stage_name = "compute_2", step = 1 : i32, upper_bound = #map2} : () -> () %1 = "memref.alloc"() {name = "compute_3", operand_segment_sizes = dense<0> : vector<2xi32>} : () -> memref<2xi1> "affine.for"() ({ ^bb0(%arg2: index): %2 = "affine.load"(%arg0, %arg2) {from = "compute_0", map = #map0, unsigned} : (memref<2xi1>, index) -> i1 %3 = "hcl.int_to_fixed"(%2) : (i1) -> !hcl.Fixed<3, 2> %4 = "hcl.fixed_to_fixed"(%3) : (!hcl.Fixed<3, 2>) -> !hcl.Fixed<4, 2> %5 = "affine.load"(%arg1, %arg2) {from = "compute_1", map = #map0} : (memref<2x!hcl.UFixed<4, 2>>, index) -> !hcl.UFixed<4, 2> %6 = "hcl.sub_fixed"(%4, %5) : (!hcl.Fixed<4, 2>, !hcl.UFixed<4, 2>) -> !hcl.Fixed<4, 2> %7 = "hcl.fixed_to_int"(%6) : (!hcl.Fixed<4, 2>) -> i1 "affine.store"(%7, %1, %arg2) {map = #map0, to = "compute_3"} : (i1, memref<2xi1>, index) -> () "affine.yield"() : () -> () }) {loop_name = "x", lower_bound = #map1, stage_name = "compute_3", step = 1 : i32, upper_bound = #map2} : () -> () "std.return"(%0, %1) : (memref<2xi1>, memref<2xi1>) -> () }) {itypes = "u_", otypes = "ss", sym_name = "top", type = (memref<2xi1>, memref<2x!hcl.UFixed<4, 2>>) -> (memref<2xi1>, memref<2xi1>)} : () -> () }) : () -> ()
Fixed by 0165c460d05ce9b25c184cd0a28de63955df8177
I updated type cast ranking and rewrote cast_types
cast_types
Test case
The IR generated has incorrect intermediate cast types
!hcl.Fixed<3, 2>)
: