test_verify failing on Navi32

ROCm / AMDMIGraphX

AMD's graph optimization engine.

MIT License

181 stars 82 forks source link

[ RUN ] gemm_2args_mm_5 Benchmarking gpu::mlir_op: 21 configs Fastest solution: 64,64,32,4,4,2 FAILED: gpu RMS Error: 0.504296 Max diff: 1.61328 Mismatch at 24: 0.121094 != -0.171875 module: "main" 2 = @param:2 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 @2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 @3 = dot(@2,2) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 ref: module: "main" 2 = @param:2 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 @2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 @3 = dot(@2,2) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 gpu: module: "main" @0 = check_context::migraphx::gpu::context -> float_type, {}, {}, target_id=0 output = @param:output -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 2 = @param:2 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 @4 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 @5 = gpu::code_object[code_object=3752,symbol_name=mlir_dot,global=384,local=64,](@4,2,output) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 void run_verify::verify(const std::string &, const migraphx::program &, const migraphx::compile_options &) const /code/AMDMIGraphX/AMDMIGraphX/test/verify/run_verify.cpp:264: FAILED: passed [ 0 ] [ FAILED ] gemm_2args_mm_5: Test failure [ RUN ] gemm_2args_mm_6 Benchmarking gpu::mlir_op: 21 configs Fastest solution: 64,64,32,8,4,2 FAILED: gpu RMS Error: 0.292345 Max diff: 1.59375 Mismatch at 24: 0.582031 != 0 module: "main" 2 = @param:2 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 @2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 @3 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](2) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0 @4 = dot(@2,@3) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 ref: module: "main" 2 = @param:2 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 @2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 @3 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](2) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0 @4 = dot(@2,@3) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 gpu: module: "main" @0 = check_context::migraphx::gpu::context -> float_type, {}, {}, target_id=0 2 = @param:2 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 @3 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](2) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0 @4 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 output = @param:output -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 @6 = gpu::code_object[code_object=3880,symbol_name=mlir_dot,global=384,local=64,](@4,@3,output) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 void run_verify::verify(const std::string &, const migraphx::program &, const migraphx::compile_options &) const /code/AMDMIGraphX/AMDMIGraphX/test/verify/run_verify.cpp:264: FAILED: passed [ 0 ] [ FAILED ] gemm_2args_mm_6: Test failure test_verify: /usr/local/cget/build/tmp-82ea508a033d44d9b28ff38af027eb7d/rocMLIR-507bb94ce7873786486d296ec81d2eadaab49003/external/llvm-project/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp:1394: static RankedTensorType mlir::tensor::CollapseShapeOp::inferCollapsedType(RankedTensorType, ArrayRef<AffineMap>): Assertion `isReassociationValid(reassociation) && "invalid reassociation"' failed. CMake Error at gdb/test_test_verify_general/run.cmake:16 (message): Test failed

[ RUN ] gemm_2args_mm_5 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } mlir_dot12:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 mlir_dot12:y1 = @param:y1 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 mlir_dot12:@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot12:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 mlir_dot12:@3 = contiguous(mlir_dot12:@2) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0 mlir_dot12:@4 = dot(mlir_dot12:@3,mlir_dot12:y1) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 mlir_dot12:@5 = @return(mlir_dot12:@4), target_id=0 module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} { %0 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %1 = migraphx.dot(%0, %arg1) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %1 : tensor<2x3x2x4xf32> } } Benchmarking gpu::mlir_op: 21 configs Fastest solution: 64,32,32,4,2,2 FAILED: gpu RMS Error: 0.504296 Max diff: 1.61328 Mismatch at 24: 0.121094 != -0.171875 module: "main" 2 = @param:2 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 @2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 @3 = dot(@2,2) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 ref: module: "main" 2 = @param:2 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 @2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 @3 = dot(@2,2) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 gpu: module: "main" @0 = check_context::migraphx::gpu::context -> float_type, {}, {}, target_id=0 2 = @param:2 -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0 1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0 output = @param:output -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 @4 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0 @5 = gpu::code_object[code_object=3624,symbol_name=mlir_dot,global=384,local=64,](@4,2,output) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0 void run_verify::verify(const std::string &, const migraphx::program &, const migraphx::compile_options &) const /code/AMDMIGraphX/AMDMIGraphX/test/verify/run_verify.cpp:264: FAILED: passed [ 0 ] [ FAILED ] gemm_2args_mm_5: Test failure

With MIGRAPHX_TRACE_MLIR=1: 2/3: gemm_2args_mm_6

[   RUN    ] gemm_2args_mm_6
mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

mlir_dot13:y1.0 = @param:y1.0 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:y0.0 = @param:y0.0 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
mlir_dot13:@2 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](mlir_dot13:y1.0) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
mlir_dot13:@3 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](mlir_dot13:y0.0) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
mlir_dot13:@4 = contiguous(mlir_dot13:@3) -> float_type, {2, 3, 2, 3}, {18, 6, 3, 1}, target_id=0
mlir_dot13:@5 = contiguous(mlir_dot13:@2) -> float_type, {2, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
mlir_dot13:@6 = dot(mlir_dot13:@4,mlir_dot13:@5) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
mlir_dot13:@7 = @return(mlir_dot13:@6), target_id=0

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

module {
  func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1101", kernel = "mixr", num_cu = 27 : i64} {
    %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32>
    %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32>
    %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32>
    return %2 : tensor<2x3x2x4xf32>
  }
}

Benchmarking gpu::mlir_op: 21 configs
Fastest solution: 64,64,32,4,4,2
FAILED: gpu
RMS Error: 0.292345
Max diff: 1.59375
Mismatch at 24: 0.582031 != 0

module: "main"
2 = @param:2 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
@3 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](2) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
@4 = dot(@2,@3) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0

ref:
module: "main"
2 = @param:2 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
@2 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
@3 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](2) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
@4 = dot(@2,@3) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0

gpu:
module: "main"
@0 = check_context::migraphx::gpu::context -> float_type, {}, {}, target_id=0
2 = @param:2 -> float_type, {1, 3, 3, 4}, {36, 12, 4, 1}, target_id=0
1 = @param:1 -> float_type, {2, 1, 2, 3}, {6, 6, 3, 1}, target_id=0
@3 = multibroadcast[out_lens={2, 3, 3, 4},out_dyn_dims={}](2) -> float_type, {2, 3, 3, 4}, {0, 12, 4, 1}, target_id=0
@4 = multibroadcast[out_lens={2, 3, 2, 3},out_dyn_dims={}](1) -> float_type, {2, 3, 2, 3}, {6, 0, 3, 1}, target_id=0
output = @param:output -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0
@6 = gpu::code_object[code_object=3752,symbol_name=mlir_dot,global=384,local=64,](@4,@3,output) -> float_type, {2, 3, 2, 4}, {24, 8, 4, 1}, target_id=0

void run_verify::verify(const std::string &, const migraphx::program &, const migraphx::compile_options &) const
/code/AMDMIGraphX/AMDMIGraphX/test/verify/run_verify.cpp:264:
    FAILED: passed [ 0 ]
[  FAILED  ] gemm_2args_mm_6: Test failure

[ RUN ] test_gemm_transposea_ex test_verify: /usr/local/cget/build/tmp-192f2a4d08984d25a093b9de6fe237ad/rocMLIR-507bb94ce7873786486d296ec81d2eadaab49003/external/llvm-project/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp:1394: static RankedTensorType mlir::tensor::CollapseShapeOp::inferCollapsedType(RankedTensorType, ArrayRef<AffineMap>): Assertion `isReassociationValid(reassociation) && "invalid reassociation"' failed.

func.func @mlir_transpose_dot(%arg0: tensor<1x1x5x4xf32>, %arg1: tensor<1x1x5x3xf32>) -> tensor<1x1x4x3xf32> attributes {arch = "gfx1100", kernel = "mixr", num_cu = 48 : i64} { %cst = arith.constant dense<[0, 1, 3, 2]> : tensor<4xi64> %0 = "tosa.transpose"(%arg0, %cst) : (tensor<1x1x5x4xf32>, tensor<4xi64>) -> tensor<1x1x4x5xf32> %collapsed = tensor.collapse_shape %0 [[0, 1], [2], [3]] : tensor<1x1x4x5xf32> into tensor<1x4x5xf32> %collapsed_0 = tensor.collapse_shape %arg1 [[0, 1], [2], [3]] : tensor<1x1x5x3xf32> into tensor<1x5x3xf32> %1 = "tosa.matmul"(%collapsed, %collapsed_0) : (tensor<1x4x5xf32>, tensor<1x5x3xf32>) -> tensor<1x4x3xf32> %expanded = tensor.expand_shape %1 [[0, 1], [2], [3]] : tensor<1x4x3xf32> into tensor<1x1x4x3xf32> return %expanded : tensor<1x1x4x3xf32> }

module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1100", kernel = "mixr", num_cu = 48 : i64} { %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32> %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %2 : tensor<2x3x2x4xf32> } }

#map = affine_map<(d0, d1, d2, d3) -> (0, d1, d2, d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, 0, d2, d3)> #map2 = affine_map<(d0, d1, d2) -> (d0 floordiv 3, d0 mod 3, d1, d2)> #map3 = affine_map<(d0, d1, d2, d3) -> (d0 * 3 + d1, d2, d3)> #transform_map = #rock.transform_map<#map by [<Broadcast{1} ["dim0"] at [0] -> ["dim0"] at [0]>, <PassThrough ["dim1"] at [1] -> ["dim1"] at [1]>, <PassThrough ["dim2"] at [2] -> ["dim2"] at [2]>, <PassThrough ["dim3"] at [3] -> ["dim3"] at [3]>] bounds = [2, 3, 3 , 4] -> [1, 3, 3, 4]> #transform_map1 = #rock.transform_map<#map1 by [<PassThrough ["dim0"] at [0] -> ["dim0"] at [0]>, <Broadcast{1} ["dim1"] at [1] -> ["dim1"] at [1]>, <PassThrough ["dim2"] at [2] -> ["dim2"] at [2]>, <PassThrough ["dim3"] at [3] -> ["dim3"] at [3]>] bounds = [2, 3, 2, 3] -> [2, 1, 2, 3]> #transform_map2 = #rock.transform_map<#map2 by [<Merge{2, 3} ["dim0"] at [0] -> ["col0", "col1"] at [0, 1]>, <PassThrough ["dim1"] at [1] -> ["dim1"] at [2]>, <PassThrough ["dim2"] at [2] -> ["dim2"] at [3]>] bounds = [6, 2, 3] -> [2, 3, 2, 3]> #transform_map3 = #rock.transform_map<#map2 by [<Merge{2, 3} ["dim0"] at [0] -> ["col0", "col1"] at [0, 1]>, <PassThrough ["dim1"] at [1] -> ["dim1"] at [2]>, <PassThrough ["dim2"] at [2] -> ["dim2"] at [3]>] bounds = [6, 3, 4] -> [2, 3, 3, 4]> #transform_map4 = #rock.transform_map<#map3 by [<Unmerge{2, 3} ["exp0", "exp1"] at [0, 1] -> ["dim0"] at [0]>, <PassThrough ["dim1"] at [2] -> ["dim1"] at [1]>, <PassThrough ["dim2"] at [3] -> ["dim2"] at [2]>] bounds = [2, 3, 2, 4] -> [6, 2, 4]> module { func.func @mlir_dot(%arg0: memref<2x1x2x3xf32>, %arg1: memref<1x3x3x4xf32>, %arg2: memref<2x3x2x4xf32>) attributes {arch = "gfx1100", kernel = "mixr", num_cu = 48 : i64} { %0 = rock.transform %arg1 by #transform_map : memref<1x3x3x4xf32> to memref<2x3x3x4xf32> %1 = rock.transform %arg0 by #transform_map1 : memref<2x1x2x3xf32> to memref<2x3x2x3xf32> %2 = rock.transform %1 by #transform_map2 : memref<2x3x2x3xf32> to memref<6x2x3xf32> %3 = rock.transform %0 by #transform_map3 : memref<2x3x3x4xf32> to memref<6x3x4xf32> %alloc = memref.alloc() {alignment = 64 : i64} : memref<6x2x4xf32> rock.gemm %alloc = %2 * %3 features = dot|atomic_add|atomic_fmax_f32 storeMethod = set {arch = "gfx1100", numCU = 48 : i32} : memref<6x2x4xf32> = memref<6x2x3xf32> * memref<6x3x4xf32> %4 = rock.transform %alloc by #transform_map4 : memref<6x2x4xf32> to memref<2x3x2x4xf32> memref.copy %4, %arg2 : memref<2x3x2x4xf32> to memref<2x3x2x4xf32> return } }

module { func.func @mlir_dot(%arg0: tensor<2x1x2x3xf32>, %arg1: tensor<1x3x3x4xf32>) -> tensor<2x3x2x4xf32> attributes {arch = "gfx1100", num_cu = 48 : i64} { %0 = migraphx.multibroadcast(%arg1) {out_dyn_dims = [], out_lens = [2, 3, 3, 4]} : (tensor<1x3x3x4xf32>) -> tensor<2x3x3x4xf32> %1 = migraphx.multibroadcast(%arg0) {out_dyn_dims = [], out_lens = [2, 3, 2, 3]} : (tensor<2x1x2x3xf32>) -> tensor<2x3x2x3xf32> %2 = migraphx.dot(%1, %0) : (tensor<2x3x2x3xf32>, tensor<2x3x3x4xf32>) -> tensor<2x3x2x4xf32> return %2 : tensor<2x3x2x4xf32> } }

./bin/rocmlir-driver -kernel-pipeline=migraphx ./broadcast-batch-no-kern.mlir | ./bin/rocmlir-driver -host-pipeline=partition,highlevel --targets gfx1100 | ./bin/rocmlir-gen --arch gfx1100 -pv -fut mlir_dot --verifier clone - | ./bin/rocmlir-driver -kernel-pipeline full -host-pipeline mhal,runner | rocm-run

ROCm / AMDMIGraphX

test_verify failing on Navi32 #2365