akmaru / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
0 stars 0 forks source link

Try mlir-opt #3

Open akmaru opened 1 year ago

akmaru commented 1 year ago

TOSA -> Linalg へのconversion

下記の内容を conv-relu.mlir として保存した。

func.func @conv_relu(%input: tensor<1x224x224x3xf32>, %weights: tensor<64x3x3x3xf32>, %bias: tensor<64xf32>) -> tensor<1x224x224x64xf32> {
  %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>, dilation = array<i64: 1, 1>} : (tensor<1x224x224x3xf32>, tensor<64x3x3x3xf32>, tensor<64xf32>) -> tensor<1x224x224x64xf32>
  %cst = "tosa.const"() {value = dense<0.0> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32>
  %1 = "tosa.maximum"(%0, %cst) : (tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32>
  return %1 : tensor<1x224x224x64xf32>
}

下記のコマンドで linalg へのconversion

./bin/mlir-opt -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named))" conv-bn-relu.mlir | ./bin/mlir-opt -pass-pipeline="builtin.module(func.func(tosa-to-linalg))"

下記のようなIRが得られた。

#map = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map2 = affine_map<(d0, d1, d2, d3) -> (d3)>
module {
  func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> {
    %cst = arith.constant 0.000000e+00 : f32
    %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] {
    ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
      tensor.yield %cst : f32
    } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32>
    %cst_0 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64>
    %0 = tensor.empty() : tensor<3x3x3x64xf32>
    %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<64x3x3x3xf32>) outs(%0 : tensor<3x3x3x64xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<3x3x3x64xf32>
    %2 = tensor.empty() : tensor<1x224x224x64xf32>
    %cst_1 = arith.constant 0.000000e+00 : f32
    %3 = linalg.fill ins(%cst_1 : f32) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32>
    %4 = tensor.empty() : tensor<1x224x224x64xf32>
    %5 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %1 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32>
    %6 = linalg.generic {indexing_maps = [#map2, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %5 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%4 : tensor<1x224x224x64xf32>) {
    ^bb0(%in: f32, %in_2: f32, %out: f32):
      %10 = arith.addf %in, %in_2 : f32
      linalg.yield %10 : f32
    } -> tensor<1x224x224x64xf32>
    %7 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32>
    %8 = tensor.empty() : tensor<1x224x224x64xf32>
    %9 = linalg.generic {indexing_maps = [#map1, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%6, %7 : tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) outs(%8 : tensor<1x224x224x64xf32>) {
    ^bb0(%in: f32, %in_2: f32, %out: f32):
      %10 = arith.maxf %in, %in_2 : f32
      linalg.yield %10 : f32
    } -> tensor<1x224x224x64xf32>
    return %9 : tensor<1x224x224x64xf32>
  }
}
akmaru commented 1 year ago

tosa-to-arith も追加すると、 tosa.const もconversionされるようになった。

./bin/mlir-opt -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named))" conv-bn-relu.mlir | ./bin/mlir-opt -pass-pipeline="builtin.module(func.func(tosa-to-linalg))" | ./bin/mlir-opt -pass-pipeline="builtin.module(func.func(tosa-to-arith))"
#map = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map2 = affine_map<(d0, d1, d2, d3) -> (d3)>
module {
  func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> {
    %cst = arith.constant 0.000000e+00 : f32
    %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] {
    ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index):
      tensor.yield %cst : f32
    } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32>
    %cst_0 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64>
    %0 = tensor.empty() : tensor<3x3x3x64xf32>
    %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<64x3x3x3xf32>) outs(%0 : tensor<3x3x3x64xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<3x3x3x64xf32>
    %2 = tensor.empty() : tensor<1x224x224x64xf32>
    %cst_1 = arith.constant 0.000000e+00 : f32
    %3 = linalg.fill ins(%cst_1 : f32) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32>
    %4 = tensor.empty() : tensor<1x224x224x64xf32>
    %5 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %1 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32>
    %6 = linalg.generic {indexing_maps = [#map2, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %5 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%4 : tensor<1x224x224x64xf32>) {
    ^bb0(%in: f32, %in_3: f32, %out: f32):
      %9 = arith.addf %in, %in_3 : f32
      linalg.yield %9 : f32
    } -> tensor<1x224x224x64xf32>
    %cst_2 = arith.constant dense<0.000000e+00> : tensor<1x224x224x64xf32>
    %7 = tensor.empty() : tensor<1x224x224x64xf32>
    %8 = linalg.generic {indexing_maps = [#map1, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%6, %cst_2 : tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) outs(%7 : tensor<1x224x224x64xf32>) {
    ^bb0(%in: f32, %in_3: f32, %out: f32):
      %9 = arith.maxf %in, %in_3 : f32
      linalg.yield %9 : f32
    } -> tensor<1x224x224x64xf32>
    return %8 : tensor<1x224x224x64xf32>
  }
}
akmaru commented 1 year ago

-debug オプションを付与すると、debug logが出力される。

command ```sh /bin/mlir-opt -debug -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named))" conv-bn-relu.mlir ``` output ``` Args: ./bin/mlir-opt -debug -pass-pipeline=builtin.module(func.func(tosa-to-linalg-named)) conv-bn-relu.mlir Load new dialect in Context builtin ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubElementTypeInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedType) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubElementAttrInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ElementsAttr) ImplicitTypeIDRegistry::lookupOrInsert(mlir::TypedAttr) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionKindInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CastOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConditionallySpeculatable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemoryEffectOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ResourceBlobManagerDialectInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmDialectInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::BytecodeDialectInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineBinaryOpExprStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineConstantExprStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineDimExprStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineMapStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::IntegerSetStorage) Load new dialect in Context builtin ImplicitTypeIDRegistry::lookupOrInsert(mlir::DebugActionManager::GenericHandler) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroOperands) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneRegion) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroResults) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroSuccessors) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NoRegionArguments) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NoTerminator) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlock) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OpInvariants) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AffineScope) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsIsolatedFromAbove) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SymbolTable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionKindInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasOnlyGraphRegion) Load new dialect in Context func Load new dialect in Context cf Load new dialect in Context arith ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithFastMathInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::VectorUnrollOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferTypeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferIntRangeInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::DialectInlinerInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::bufferization::BufferizableOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::BranchOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolUserOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallableOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::FunctionOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AutomaticAllocationScope) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallableOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::FunctionOpInterface::Trait) Load new dialect in Context tosa Load new dialect in Context tensor Load new dialect in Context affine ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineMapAccessInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineDmaStartOp) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineDmaWaitOp) ImplicitTypeIDRegistry::lookupOrInsert(mlir::LoopLikeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineReadOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineWriteOpInterface) Load new dialect in Context complex ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedDimOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ReifyRankedShapedTypeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OffsetSizeAndStrideOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestinationStyleOpInterface) Load new dialect in Context linalg Load new dialect in Context math Load new dialect in Context memref ImplicitTypeIDRegistry::lookupOrInsert(mlir::CopyOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ViewLikeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RuntimeVerifiableOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::LinalgOp) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ContractionOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ConvolutionOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::FillOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::TilingInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::PartialReductionOpInterface) Ignoring repeated interface registrationIgnoring repeated interface registrationImplicitTypeIDRegistry::lookupOrInsert(mlir::InferShapedTypeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::tosa::TosaOp) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroRegions) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneResult) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ConstantLike) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConditionallySpeculatable::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AlwaysSpeculatableImplTrait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemoryEffectOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::tosa::TosaOp::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::VariadicOperands) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParent::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::MemRefsNormalizable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ReturnLike) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsTerminator) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NOperands<3>::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferShapedTypeOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NOperands<2>::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ResultsBroadcastableShape) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsCommutative) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::OpToOpPassAdaptor) Load new dialect in Context scf ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchTerminatorOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ParallelCombiningOpInterface) //===-------------------------------------------===// Legalizing operation : 'func.func'(0x5567f3e4c530) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.conv2d'(0x5567f3e80e70) { %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = array, pad = array, stride = array} : (tensor<1x224x224x3xf32>, tensor<64x3x3x3xf32>, tensor<64xf32>) -> tensor<1x224x224x64xf32> * Fold { ImplicitTypeIDRegistry::lookupOrInsert(mlir::DialectFoldInterface) } -> FAILURE : unable to fold * Pattern : 'tosa.conv2d -> ()' { Trying to match "{anonymous}::ConvConverter" ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferIntRangeInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferTypeOpInterface::Trait) ** Insert : 'arith.constant'(0x5567f3e94290) ** Insert : 'tensor.yield'(0x5567f3e3e920) ** Insert : 'tensor.pad'(0x5567f3e9ff30) ** Insert : 'arith.constant'(0x5567f3ea42b0) ** Insert : 'tosa.transpose'(0x5567f3ea5800) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ReifyRankedShapedTypeOpInterface::Trait) ** Insert : 'tensor.empty'(0x5567f3ea58b0) ** Insert : 'arith.constant'(0x5567f3ea5920) ** Insert : 'linalg.fill'(0x5567f3ea5ad0) ** Insert : 'tensor.empty'(0x5567f3eacce0) ** Insert : 'linalg.conv_2d_nhwc_hwcf'(0x5567f3e6a8d0) ** Insert : 'arith.addf'(0x5567f3eb5ff0) ** Insert : 'linalg.yield'(0x5567f3eb6090) ** Insert : 'linalg.generic'(0x5567f3e4f7c0) ** Replace : 'tosa.conv2d'(0x5567f3e80e70) "{anonymous}::ConvConverter" result 1 //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x5567f3e94290) { %0 = "arith.constant"() {value = 0.000000e+00 : f32} : () -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.yield'(0x5567f3e3e920) { "tensor.yield"(%0) : (f32) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.pad'(0x5567f3e9ff30) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x5567f3ea42b0) { %2 = "arith.constant"() {value = dense<[1, 2, 3, 0]> : tensor<4xi64>} : () -> tensor<4xi64> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.transpose'(0x5567f3ea5800) { %3 = "tosa.transpose"(%arg1, %2) : (tensor<64x3x3x3xf32>, tensor<4xi64>) -> tensor<3x3x3x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x5567f3ea58b0) { %4 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x5567f3ea5920) { %5 = "arith.constant"() {value = 0.000000e+00 : f32} : () -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.fill'(0x5567f3ea5ad0) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x5567f3eacce0) { %7 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.conv_2d_nhwc_hwcf'(0x5567f3e6a8d0) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.addf'(0x5567f3eb5ff0) { %13 = "arith.addf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x5567f3eb6090) { "linalg.yield"(%13) : (f32) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.generic'(0x5567f3e4f7c0) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// } -> SUCCESS : pattern applied successfully // *** IR Dump After Pattern Application *** mlir-asm-printer: Verifying operation: func.func ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AtLeastNOperands<1>::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AttrSizedOperandSegments) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlockImplicitTerminator::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneOperand) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParent::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::VariadicResults) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlockImplicitTerminator::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestinationStyleOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::LinalgOp::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::FillOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ConvolutionOpInterface::Trait) func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %cst_0 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64> %0 = "tosa.transpose"(%arg1, %cst_0) : (tensor<64x3x3x3xf32>, tensor<4xi64>) -> tensor<3x3x3x64xf32> %1 = tensor.empty() : tensor<1x224x224x64xf32> %cst_1 = arith.constant 0.000000e+00 : f32 %2 = linalg.fill ins(%cst_1 : f32) outs(%1 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %3 = tensor.empty() : tensor<1x224x224x64xf32> %4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %0 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %5 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %4 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_2: f32, %out: f32): %9 = arith.addf %in, %in_2 : f32 linalg.yield %9 : f32 } -> tensor<1x224x224x64xf32> %6 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = array, pad = array, stride = array} : (tensor<1x224x224x3xf32>, tensor<64x3x3x3xf32>, tensor<64xf32>) -> tensor<1x224x224x64xf32> %7 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> %8 = "tosa.maximum"(%6, %7) : (tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> return %8 : tensor<1x224x224x64xf32> } } -> SUCCESS //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.const'(0x5567f3e57630) { %11 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.maximum'(0x5567f3e878a0) { %12 = "tosa.maximum"(%10, %11) : (tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'func.return'(0x5567f3e879f0) { "func.return"(%12) : (tensor<1x224x224x64xf32>) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::PreservedAnalyses::AllAnalysesType) ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithFastMathInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SameOperandsAndResultType) ImplicitTypeIDRegistry::lookupOrInsert(mlir::VectorUnrollOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Elementwise) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Scalarizable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Vectorizable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Tensorizable) mlir-asm-printer: Verifying operation: builtin.module #map = ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::StorageUserTrait::IsMutable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface::Trait) affine_map<(d0, d1, d2, d3) -> (d3)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> module { func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %cst_0 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64> %0 = "tosa.transpose"(%arg1, %cst_0) : (tensor<64x3x3x3xf32>, tensor<4xi64>) -> tensor<3x3x3x64xf32> %1 = tensor.empty() : tensor<1x224x224x64xf32> %cst_1 = arith.constant 0.000000e+00 : f32 %2 = linalg.fill ins(%cst_1 : f32) outs(%1 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %3 = tensor.empty() : tensor<1x224x224x64xf32> %4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %0 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %5 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %4 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_2: f32, %out: f32): %8 = arith.addf %in, %in_2 : f32 linalg.yield %8 : f32 } -> tensor<1x224x224x64xf32> %6 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> %7 = "tosa.maximum"(%5, %6) : (tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> return %7 : tensor<1x224x224x64xf32> } } ```
akmaru commented 1 year ago

VSCode上でdebugするための、launch.jsonを作成した。

{
    "version": "0.2.0",
    "configurations": [
        {
            "name": "(gdb) mlir-opt",
            "type": "cppdbg",
            "request": "launch",
            "program": "${workspaceFolder}/build/bin/mlir-opt",
            "args": [
                "-debug",
                "-pass-pipeline=builtin.module(func.func(tosa-to-linalg-named))",
                "build/conv-bn-relu.mlir"
            ],
            "stopAtEntry": false,
            "cwd": "${workspaceFolder}",
            "environment": [],
            "externalConsole": false,
            "linux": {
                "MIMode": "gdb",
                "miDebuggerPath": "/usr/bin/gdb",
                "setupCommands": [
                    {
                        "description": "Enable pretty-printing for gdb",
                        "text": "-enable-pretty-printing",
                        "ignoreFailures": true
                    }
                ]
            }
        }
    ]
}
akmaru commented 1 year ago

DialectConversion時の主要なcallstackは下記のようになっている。

  1. OperationLegalizer::OperationLegalizer
  2. OperationConverter::convert
  3. OperationConverter::convertOperations
  4. applyFullConversion
  5. applyFullConversion
  6. TosaToLinalgNamed::runOnOperation
  7. OpToOpPassAdaptor::run
  8. OpToOpPassAdaptor::runPipeline
  9. OpToOpPassAdaptor::runOnOperationAsyncImpl
  10. failableParallelForEach
  11. failableParallelForEach
  12. OpToOpPassAdaptor::runOnOperationAsyncImpl
  13. OpToOpPassAdaptor::runOnOperation
  14. OpToOpPassAdaptor::run
  15. OpToOpPassAdaptor::runPipeline
  16. PassManager::runPasses
  17. PassManager::run
  18. performActions
  19. processBuffer
  20. MlirOptMain
  21. ...
  22. MlirOptMain
  23. main
akmaru commented 1 year ago

下記のように、 func.func 内に複数のpassをcomma区切りで記述することができるらしい。

./bin/mlir-opt -debug -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named,tosa-to-linalg,tosa-to-arith))" conv-bn-relu.mlir
output ``` Args: ./bin/mlir-opt -debug -pass-pipeline=builtin.module(func.func(tosa-to-linalg-named,tosa-to-linalg,tosa-to-arith)) conv-bn-relu.mlir Load new dialect in Context builtin ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubElementTypeInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedType) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubElementAttrInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ElementsAttr) ImplicitTypeIDRegistry::lookupOrInsert(mlir::TypedAttr) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionKindInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CastOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConditionallySpeculatable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemoryEffectOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ResourceBlobManagerDialectInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmDialectInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::BytecodeDialectInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineBinaryOpExprStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineConstantExprStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineDimExprStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineMapStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::IntegerSetStorage) Load new dialect in Context builtin ImplicitTypeIDRegistry::lookupOrInsert(mlir::DebugActionManager::GenericHandler) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroOperands) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneRegion) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroResults) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroSuccessors) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NoRegionArguments) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NoTerminator) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlock) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OpInvariants) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AffineScope) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsIsolatedFromAbove) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SymbolTable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionKindInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasOnlyGraphRegion) Load new dialect in Context func Load new dialect in Context cf Load new dialect in Context arith ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithFastMathInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::VectorUnrollOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferTypeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferIntRangeInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::DialectInlinerInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::bufferization::BufferizableOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::BranchOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolUserOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallableOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::FunctionOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AutomaticAllocationScope) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallableOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::FunctionOpInterface::Trait) Load new dialect in Context tosa Load new dialect in Context tensor Load new dialect in Context affine ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineMapAccessInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineDmaStartOp) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineDmaWaitOp) ImplicitTypeIDRegistry::lookupOrInsert(mlir::LoopLikeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineReadOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineWriteOpInterface) Load new dialect in Context complex ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedDimOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ReifyRankedShapedTypeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OffsetSizeAndStrideOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestinationStyleOpInterface) Load new dialect in Context linalg Load new dialect in Context math Load new dialect in Context memref ImplicitTypeIDRegistry::lookupOrInsert(mlir::CopyOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ViewLikeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RuntimeVerifiableOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::LinalgOp) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ContractionOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ConvolutionOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::FillOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::TilingInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::PartialReductionOpInterface) Ignoring repeated interface registrationIgnoring repeated interface registrationImplicitTypeIDRegistry::lookupOrInsert(mlir::InferShapedTypeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::tosa::TosaOp) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroRegions) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneResult) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ConstantLike) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConditionallySpeculatable::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AlwaysSpeculatableImplTrait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemoryEffectOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::tosa::TosaOp::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::VariadicOperands) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParent::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::MemRefsNormalizable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ReturnLike) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsTerminator) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NOperands<3>::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferShapedTypeOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NOperands<2>::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ResultsBroadcastableShape) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsCommutative) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::OpToOpPassAdaptor) Load new dialect in Context scf ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchTerminatorOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ParallelCombiningOpInterface) //===-------------------------------------------===// Legalizing operation : 'func.func'(0x55c82a0b2400) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.conv2d'(0x55c82a0e7d50) { %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = array, pad = array, stride = array} : (tensor<1x224x224x3xf32>, tensor<64x3x3x3xf32>, tensor<64xf32>) -> tensor<1x224x224x64xf32> * Fold { ImplicitTypeIDRegistry::lookupOrInsert(mlir::DialectFoldInterface) } -> FAILURE : unable to fold * Pattern : 'tosa.conv2d -> ()' { Trying to match "{anonymous}::ConvConverter" ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferIntRangeInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferTypeOpInterface::Trait) ** Insert : 'arith.constant'(0x55c82a118e40) ** Insert : 'tensor.yield'(0x55c82a0a3150) ** Insert : 'tensor.pad'(0x55c82a0a31e0) ** Insert : 'arith.constant'(0x55c82a0a3460) ** Insert : 'tosa.transpose'(0x55c82a122690) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ReifyRankedShapedTypeOpInterface::Trait) ** Insert : 'tensor.empty'(0x55c82a122740) ** Insert : 'arith.constant'(0x55c82a1227b0) ** Insert : 'linalg.fill'(0x55c82a123e70) ** Insert : 'tensor.empty'(0x55c82a12c570) ** Insert : 'linalg.conv_2d_nhwc_hwcf'(0x55c82a0d07a0) ** Insert : 'arith.addf'(0x55c82a138280) ** Insert : 'linalg.yield'(0x55c82a138320) ** Insert : 'linalg.generic'(0x55c82a0b5690) ** Replace : 'tosa.conv2d'(0x55c82a0e7d50) "{anonymous}::ConvConverter" result 1 //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x55c82a118e40) { %0 = "arith.constant"() {value = 0.000000e+00 : f32} : () -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.yield'(0x55c82a0a3150) { "tensor.yield"(%0) : (f32) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.pad'(0x55c82a0a31e0) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x55c82a0a3460) { %2 = "arith.constant"() {value = dense<[1, 2, 3, 0]> : tensor<4xi64>} : () -> tensor<4xi64> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.transpose'(0x55c82a122690) { %3 = "tosa.transpose"(%arg1, %2) : (tensor<64x3x3x3xf32>, tensor<4xi64>) -> tensor<3x3x3x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x55c82a122740) { %4 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x55c82a1227b0) { %5 = "arith.constant"() {value = 0.000000e+00 : f32} : () -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.fill'(0x55c82a123e70) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x55c82a12c570) { %7 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.conv_2d_nhwc_hwcf'(0x55c82a0d07a0) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.addf'(0x55c82a138280) { %13 = "arith.addf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x55c82a138320) { "linalg.yield"(%13) : (f32) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.generic'(0x55c82a0b5690) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// } -> SUCCESS : pattern applied successfully // *** IR Dump After Pattern Application *** mlir-asm-printer: Verifying operation: func.func ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AtLeastNOperands<1>::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AttrSizedOperandSegments) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlockImplicitTerminator::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneOperand) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParent::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::VariadicResults) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlockImplicitTerminator::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestinationStyleOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::LinalgOp::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::FillOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ConvolutionOpInterface::Trait) func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %cst_0 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64> %0 = "tosa.transpose"(%arg1, %cst_0) : (tensor<64x3x3x3xf32>, tensor<4xi64>) -> tensor<3x3x3x64xf32> %1 = tensor.empty() : tensor<1x224x224x64xf32> %cst_1 = arith.constant 0.000000e+00 : f32 %2 = linalg.fill ins(%cst_1 : f32) outs(%1 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %3 = tensor.empty() : tensor<1x224x224x64xf32> %4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %0 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %5 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %4 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_2: f32, %out: f32): %9 = arith.addf %in, %in_2 : f32 linalg.yield %9 : f32 } -> tensor<1x224x224x64xf32> %6 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = array, pad = array, stride = array} : (tensor<1x224x224x3xf32>, tensor<64x3x3x3xf32>, tensor<64xf32>) -> tensor<1x224x224x64xf32> %7 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> %8 = "tosa.maximum"(%6, %7) : (tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> return %8 : tensor<1x224x224x64xf32> } } -> SUCCESS //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.const'(0x55c82a0bd500) { %11 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.maximum'(0x55c82a0edf60) { %12 = "tosa.maximum"(%10, %11) : (tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'func.return'(0x55c82a0ee0b0) { "func.return"(%12) : (tensor<1x224x224x64xf32>) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::PreservedAnalyses::AllAnalysesType) //===-------------------------------------------===// Legalizing operation : 'func.func'(0x55c82a0b2400) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x55c82a118e40) { %0 = "arith.constant"() {value = 0.000000e+00 : f32} : () -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.pad'(0x55c82a0a31e0) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.yield'(0x55c82a0a3150) { "tensor.yield"(%0) : (f32) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x55c82a0a3460) { %2 = "arith.constant"() {value = dense<[1, 2, 3, 0]> : tensor<4xi64>} : () -> tensor<4xi64> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.transpose'(0x55c82a122690) { %3 = "tosa.transpose"(%arg1, %2) : (tensor<64x3x3x3xf32>, tensor<4xi64>) -> tensor<3x3x3x64xf32> * Fold { } -> FAILURE : unable to fold * Pattern : 'tosa.transpose -> ()' { Trying to match "{anonymous}::TransposeConverter" ** Insert : 'tensor.empty'(0x55c82a15e970) ** Insert : 'linalg.yield'(0x55c82a15ea90) ** Insert : 'linalg.generic'(0x55c82a15eb20) ** Replace : 'tosa.transpose'(0x55c82a122690) "{anonymous}::TransposeConverter" result 1 //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x55c82a15e970) { %3 = "tensor.empty"() : () -> tensor<3x3x3x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x55c82a15ea90) { "linalg.yield"(%arg3) : (f32) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.generic'(0x55c82a15eb20) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// } -> SUCCESS : pattern applied successfully // *** IR Dump After Pattern Application *** mlir-asm-printer: Verifying operation: func.func func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %cst_0 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64> %0 = tensor.empty() : tensor<3x3x3x64xf32> %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<64x3x3x3xf32>) outs(%0 : tensor<3x3x3x64xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<3x3x3x64xf32> %2 = "tosa.transpose"(%arg1, %cst_0) : (tensor<64x3x3x3xf32>, tensor<4xi64>) -> tensor<3x3x3x64xf32> %3 = tensor.empty() : tensor<1x224x224x64xf32> %cst_1 = arith.constant 0.000000e+00 : f32 %4 = linalg.fill ins(%cst_1 : f32) outs(%3 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %5 = tensor.empty() : tensor<1x224x224x64xf32> %6 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %2 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%4 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %7 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %6 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%5 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_2: f32, %out: f32): %10 = arith.addf %in, %in_2 : f32 linalg.yield %10 : f32 } -> tensor<1x224x224x64xf32> %8 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> %9 = "tosa.maximum"(%7, %8) : (tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> return %9 : tensor<1x224x224x64xf32> } } -> SUCCESS //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x55c82a122740) { %6 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x55c82a1227b0) { %7 = "arith.constant"() {value = 0.000000e+00 : f32} : () -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.fill'(0x55c82a123e70) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x55c82a123de0) { "linalg.yield"(%arg3) : (f32) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x55c82a12c570) { %9 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.conv_2d_nhwc_hwcf'(0x55c82a0d07a0) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.mulf'(0x55c82a130a00) { %14 = "arith.mulf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.addf'(0x55c82a130ab0) { %15 = "arith.addf"(%arg5, %14) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x55c82a130b50) { "linalg.yield"(%15) : (f32) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.generic'(0x55c82a0b5690) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.addf'(0x55c82a138280) { %14 = "arith.addf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x55c82a138320) { "linalg.yield"(%14) : (f32) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.const'(0x55c82a0bd500) { %12 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.maximum'(0x55c82a0edf60) { %13 = "tosa.maximum"(%11, %12) : (tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> * Fold { } -> FAILURE : unable to fold * Pattern : 'tosa.maximum -> ()' { Trying to match "{anonymous}::PointwiseConverter" ** Insert : 'tensor.empty'(0x55c82a15ebe0) ** Insert : 'arith.maxf'(0x55c82a160a20) ** Insert : 'linalg.yield'(0x55c82a160b90) ** Insert : 'linalg.generic'(0x55c82a118ea0) ** Replace : 'tosa.maximum'(0x55c82a0edf60) "{anonymous}::PointwiseConverter" result 1 //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x55c82a15ebe0) { %13 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.maxf'(0x55c82a160a20) { %16 = "arith.maxf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x55c82a160b90) { "linalg.yield"(%16) : (f32) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.generic'(0x55c82a118ea0) { } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// } -> SUCCESS : pattern applied successfully // *** IR Dump After Pattern Application *** mlir-asm-printer: Verifying operation: func.func func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %cst_0 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64> %0 = tensor.empty() : tensor<3x3x3x64xf32> %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<64x3x3x3xf32>) outs(%0 : tensor<3x3x3x64xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<3x3x3x64xf32> %2 = "tosa.transpose"(%arg1, %cst_0) : (tensor<64x3x3x3xf32>, tensor<4xi64>) -> tensor<3x3x3x64xf32> %3 = tensor.empty() : tensor<1x224x224x64xf32> %cst_1 = arith.constant 0.000000e+00 : f32 %4 = linalg.fill ins(%cst_1 : f32) outs(%3 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %5 = tensor.empty() : tensor<1x224x224x64xf32> %6 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %2 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%4 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %7 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %6 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%5 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_2: f32, %out: f32): %12 = arith.addf %in, %in_2 : f32 linalg.yield %12 : f32 } -> tensor<1x224x224x64xf32> %8 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> %9 = tensor.empty() : tensor<1x224x224x64xf32> %10 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%7, %8 : tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) outs(%9 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_2: f32, %out: f32): %12 = arith.maxf %in, %in_2 : f32 linalg.yield %12 : f32 } -> tensor<1x224x224x64xf32> %11 = "tosa.maximum"(%7, %8) : (tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> return %11 : tensor<1x224x224x64xf32> } } -> SUCCESS //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'func.return'(0x55c82a0ee0b0) { "func.return"(%15) : (tensor<1x224x224x64xf32>) -> () } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'func.func'(0x55c82a0b2400) { * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x55c82a118e40) { %0 = "arith.constant"() {value = 0.000000e+00 : f32} : () -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.pad'(0x55c82a0a31e0) { * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.yield'(0x55c82a0a3150) { "tensor.yield"(%0) : (f32) -> () * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x55c82a0a3460) { %2 = "arith.constant"() {value = dense<[1, 2, 3, 0]> : tensor<4xi64>} : () -> tensor<4xi64> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x55c82a15e970) { %3 = "tensor.empty"() : () -> tensor<3x3x3x64xf32> * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.generic'(0x55c82a15eb20) { * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x55c82a15ea90) { "linalg.yield"(%arg3) : (f32) -> () * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x55c82a122740) { %5 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x55c82a1227b0) { %6 = "arith.constant"() {value = 0.000000e+00 : f32} : () -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.fill'(0x55c82a123e70) { * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x55c82a123de0) { "linalg.yield"(%arg3) : (f32) -> () * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x55c82a12c570) { %8 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.conv_2d_nhwc_hwcf'(0x55c82a0d07a0) { * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.mulf'(0x55c82a130a00) { %14 = "arith.mulf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.addf'(0x55c82a130ab0) { %15 = "arith.addf"(%arg5, %14) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x55c82a130b50) { "linalg.yield"(%15) : (f32) -> () * Fold { ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithFastMathInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SameOperandsAndResultType) ImplicitTypeIDRegistry::lookupOrInsert(mlir::VectorUnrollOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Elementwise) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Scalarizable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Vectorizable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Tensorizable) } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.generic'(0x55c82a0b5690) { * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.addf'(0x55c82a138280) { %14 = "arith.addf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x55c82a138320) { "linalg.yield"(%14) : (f32) -> () * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tosa.const'(0x55c82a0bd500) { %11 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> * Fold { } -> FAILURE : unable to fold * Pattern : 'tosa.const -> ()' { Trying to match "{anonymous}::ConstOpConverter" ** Insert : 'arith.constant'(0x55c82a159ef0) ** Replace : 'tosa.const'(0x55c82a0bd500) "{anonymous}::ConstOpConverter" result 1 //===-------------------------------------------===// Legalizing operation : 'arith.constant'(0x55c82a159ef0) { %11 = "arith.constant"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// } -> SUCCESS : pattern applied successfully // *** IR Dump After Pattern Application *** mlir-asm-printer: Verifying operation: func.func func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %cst_0 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64> %0 = tensor.empty() : tensor<3x3x3x64xf32> %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<64x3x3x3xf32>) outs(%0 : tensor<3x3x3x64xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<3x3x3x64xf32> %2 = tensor.empty() : tensor<1x224x224x64xf32> %cst_1 = arith.constant 0.000000e+00 : f32 %3 = linalg.fill ins(%cst_1 : f32) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %4 = tensor.empty() : tensor<1x224x224x64xf32> %5 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %1 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %5 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%4 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_3: f32, %out: f32): %10 = arith.addf %in, %in_3 : f32 linalg.yield %10 : f32 } -> tensor<1x224x224x64xf32> %cst_2 = arith.constant dense<0.000000e+00> : tensor<1x224x224x64xf32> %7 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> %8 = tensor.empty() : tensor<1x224x224x64xf32> %9 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%6, %7 : tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) outs(%8 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_3: f32, %out: f32): %10 = arith.maxf %in, %in_3 : f32 linalg.yield %10 : f32 } -> tensor<1x224x224x64xf32> return %9 : tensor<1x224x224x64xf32> } } -> SUCCESS //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'tensor.empty'(0x55c82a15ebe0) { %13 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.generic'(0x55c82a118ea0) { * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'arith.maxf'(0x55c82a160a20) { %15 = "arith.maxf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> SUCCESS : operation marked legal by the target //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'linalg.yield'(0x55c82a160b90) { "linalg.yield"(%15) : (f32) -> () * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// //===-------------------------------------------===// Legalizing operation : 'func.return'(0x55c82a0ee0b0) { "func.return"(%14) : (tensor<1x224x224x64xf32>) -> () * Fold { } -> FAILURE : unable to fold } -> FAILURE : no matched legalization pattern //===-------------------------------------------===// mlir-asm-printer: Verifying operation: builtin.module #map = ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::StorageUserTrait::IsMutable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface::Trait) affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> #map2 = affine_map<(d0, d1, d2, d3) -> (d3)> module { func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %cst_0 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64> %0 = tensor.empty() : tensor<3x3x3x64xf32> %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<64x3x3x3xf32>) outs(%0 : tensor<3x3x3x64xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<3x3x3x64xf32> %2 = tensor.empty() : tensor<1x224x224x64xf32> %cst_1 = arith.constant 0.000000e+00 : f32 %3 = linalg.fill ins(%cst_1 : f32) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %4 = tensor.empty() : tensor<1x224x224x64xf32> %5 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %1 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %6 = linalg.generic {indexing_maps = [#map2, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %5 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%4 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_3: f32, %out: f32): %9 = arith.addf %in, %in_3 : f32 linalg.yield %9 : f32 } -> tensor<1x224x224x64xf32> %cst_2 = arith.constant dense<0.000000e+00> : tensor<1x224x224x64xf32> %7 = tensor.empty() : tensor<1x224x224x64xf32> %8 = linalg.generic {indexing_maps = [#map1, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%6, %cst_2 : tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) outs(%7 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_3: f32, %out: f32): %9 = arith.maxf %in, %in_3 : f32 linalg.yield %9 : f32 } -> tensor<1x224x224x64xf32> return %8 : tensor<1x224x224x64xf32> } } ```
akmaru commented 1 year ago

tosa::addTosaToLinalgPassesにTosa -> Linalg にConversionする際のpassが一通り記述されている。

void mlir::tosa::addTosaToLinalgPasses(OpPassManager &pm,
                                       bool disableTosaDecompositions) {
  // Optional decompositions are designed to benefit linalg.
  if (!disableTosaDecompositions)
    pm.addNestedPass<func::FuncOp>(tosa::createTosaOptionalDecompositions());
  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());

  pm.addNestedPass<func::FuncOp>(tosa::createTosaMakeBroadcastablePass());
  pm.addNestedPass<func::FuncOp>(tosa::createTosaToLinalgNamed());
  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
  // TODO: Remove pass that operates on const tensor and enable optionality
  pm.addNestedPass<func::FuncOp>(tosa::createTosaLayerwiseConstantFoldPass());
  pm.addNestedPass<func::FuncOp>(tosa::createTosaMakeBroadcastablePass());
  pm.addNestedPass<func::FuncOp>(tosa::createTosaValidationPass());
  pm.addNestedPass<func::FuncOp>(tosa::createTosaToLinalg());
}
akmaru commented 1 year ago

conversionに関するPassとその名前は include/mlir/Conversion/Passes.td で定義されている。

akmaru commented 1 year ago

Linalg特有のPassとその名前は include/mlir/Dialect/Linalg/Passes.tdで定義されている。

akmaru commented 1 year ago

linalg-fuse-elementwise-ops を試してみる。

./bin/mlir-opt -pass-pipeline="builtin.module(func.func(linalg-fuse-elementwise-ops))" conv-bn-relu-linalg.mlir

下記のように、addf(biasの加算)とmaxf(reluのmax)がgenerics内にfuseされているのがわかる。

%6 = linalg.generic {indexing_maps = [#map2, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %4 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%5 : tensor<1x224x224x64xf32>) {
    ^bb0(%in: f32, %in_0: f32, %out: f32):
      %7 = arith.addf %in, %in_0 : f32
      %8 = arith.maxf %7, %cst : f32
      linalg.yield %8 : f32
    } -> tensor<1x224x224x64xf32>
input ``` #map = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> #map2 = affine_map<(d0, d1, d2, d3) -> (d3)> module { func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %cst_0 = arith.constant dense<[1, 2, 3, 0]> : tensor<4xi64> %0 = tensor.empty() : tensor<3x3x3x64xf32> %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<64x3x3x3xf32>) outs(%0 : tensor<3x3x3x64xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<3x3x3x64xf32> %2 = tensor.empty() : tensor<1x224x224x64xf32> %cst_1 = arith.constant 0.000000e+00 : f32 %3 = linalg.fill ins(%cst_1 : f32) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %4 = tensor.empty() : tensor<1x224x224x64xf32> %5 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %1 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %6 = linalg.generic {indexing_maps = [#map2, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %5 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%4 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_3: f32, %out: f32): %9 = arith.addf %in, %in_3 : f32 linalg.yield %9 : f32 } -> tensor<1x224x224x64xf32> %cst_2 = arith.constant dense<0.000000e+00> : tensor<1x224x224x64xf32> %7 = tensor.empty() : tensor<1x224x224x64xf32> %8 = linalg.generic {indexing_maps = [#map1, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%6, %cst_2 : tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) outs(%7 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_3: f32, %out: f32): %9 = arith.maxf %in, %in_3 : f32 linalg.yield %9 : f32 } -> tensor<1x224x224x64xf32> return %8 : tensor<1x224x224x64xf32> } } ```
output ``` #map = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> #map2 = affine_map<(d0, d1, d2, d3) -> (d3)> module { func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %0 = tensor.empty() : tensor<3x3x3x64xf32> %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<64x3x3x3xf32>) outs(%0 : tensor<3x3x3x64xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<3x3x3x64xf32> %2 = tensor.empty() : tensor<1x224x224x64xf32> %3 = linalg.fill ins(%cst : f32) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %1 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %5 = tensor.empty() : tensor<1x224x224x64xf32> %6 = linalg.generic {indexing_maps = [#map2, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %4 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%5 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_0: f32, %out: f32): %7 = arith.addf %in, %in_0 : f32 %8 = arith.maxf %7, %cst : f32 linalg.yield %8 : f32 } -> tensor<1x224x224x64xf32> return %6 : tensor<1x224x224x64xf32> } } ```
debug log ``` Args: ./bin/mlir-opt -debug -pass-pipeline=builtin.module(func.func(linalg-fuse-elementwise-ops)) conv-bn-relu-linalg.mlir Load new dialect in Context builtin ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubElementTypeInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedType) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubElementAttrInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ElementsAttr) ImplicitTypeIDRegistry::lookupOrInsert(mlir::TypedAttr) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionKindInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CastOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConditionallySpeculatable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemoryEffectOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ResourceBlobManagerDialectInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmDialectInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::BytecodeDialectInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineBinaryOpExprStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineConstantExprStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineDimExprStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineMapStorage) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::IntegerSetStorage) Load new dialect in Context builtin ImplicitTypeIDRegistry::lookupOrInsert(mlir::DebugActionManager::GenericHandler) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroOperands) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneRegion) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroResults) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroSuccessors) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NoRegionArguments) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NoTerminator) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlock) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OpInvariants) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AffineScope) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsIsolatedFromAbove) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SymbolTable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionKindInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasOnlyGraphRegion) Load new dialect in Context func Load new dialect in Context cf Load new dialect in Context arith ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithFastMathInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::VectorUnrollOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferTypeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferIntRangeInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::DialectInlinerInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::bufferization::BufferizableOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::BranchOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolUserOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallableOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::FunctionOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AutomaticAllocationScope) ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallableOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::FunctionOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroRegions) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneResult) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ConstantLike) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConditionallySpeculatable::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AlwaysSpeculatableImplTrait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemoryEffectOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferIntRangeInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferTypeOpInterface::Trait) Load new dialect in Context tensor Load new dialect in Context affine ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineMapAccessInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineDmaStartOp) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineDmaWaitOp) ImplicitTypeIDRegistry::lookupOrInsert(mlir::LoopLikeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineReadOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::AffineWriteOpInterface) Load new dialect in Context complex ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedDimOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ReifyRankedShapedTypeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OffsetSizeAndStrideOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestinationStyleOpInterface) Load new dialect in Context linalg Load new dialect in Context math Load new dialect in Context memref ImplicitTypeIDRegistry::lookupOrInsert(mlir::CopyOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ViewLikeOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RuntimeVerifiableOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::LinalgOp) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ContractionOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ConvolutionOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::FillOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::TilingInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::PartialReductionOpInterface) Ignoring repeated interface registrationIgnoring repeated interface registrationImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AtLeastNOperands<1>::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AttrSizedOperandSegments) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlockImplicitTerminator::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneOperand) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParent::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ReturnLike) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsTerminator) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::VariadicOperands) ImplicitTypeIDRegistry::lookupOrInsert(mlir::ReifyRankedShapedTypeOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::VariadicResults) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlockImplicitTerminator::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestinationStyleOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::LinalgOp::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::FillOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ConvolutionOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NOperands<2>::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithFastMathInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsCommutative) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SameOperandsAndResultType) ImplicitTypeIDRegistry::lookupOrInsert(mlir::VectorUnrollOpInterface::Trait) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Elementwise) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Scalarizable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Vectorizable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Tensorizable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParent::Impl::Impl]::Empty>) ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::MemRefsNormalizable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchTerminatorOpInterface) ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::OpToOpPassAdaptor) ImplicitTypeIDRegistry::lookupOrInsert(mlir::DialectFoldInterface) //===-------------------------------------------===// Processing operation : 'arith.constant'(0x563bdc2ff160) { %2 = "arith.constant"() {value = 0.000000e+00 : f32} : () -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.pad'(0x563bdc345c70) { } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.yield'(0x563bdc32bf30) { "tensor.yield"(%2) : (f32) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.constant'(0x563bdc3272e0) { %1 = "arith.constant"() {value = dense<[1, 2, 3, 0]> : tensor<4xi64>} : () -> tensor<4xi64> ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasRecursiveMemoryEffects) ** Erase : 'arith.constant'(0x563bdc3272e0) } -> success : operation is trivially dead //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.empty'(0x563bdc323190) { %3 = "tensor.empty"() : () -> tensor<3x3x3x64xf32> } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.generic'(0x563bdc3515a0) { * Pattern {anonymous}::FuseElementwiseOps : 'linalg.generic -> ()' { Trying to match "{anonymous}::FuseElementwiseOps" "{anonymous}::FuseElementwiseOps" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldFillWithGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldFillWithGenericOp" "{anonymous}::FoldFillWithGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldScalarOrSplatConstant : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldScalarOrSplatConstant" "{anonymous}::FoldScalarOrSplatConstant" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveOutsDependency : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveOutsDependency" "{anonymous}::RemoveOutsDependency" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::DeduplicateAndRemoveDeadOperandsAndResults : 'linalg.generic -> ()' { Trying to match "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveUnusedCycleInGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveUnusedCycleInGenericOp" "{anonymous}::RemoveUnusedCycleInGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldWithProducerReshapeOpByExpansion : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldWithProducerReshapeOpByExpansion" "{anonymous}::FoldWithProducerReshapeOpByExpansion" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseIdentityGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseIdentityGenericOp" "{anonymous}::EraseIdentityGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseDeadLinalgOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseDeadLinalgOp" "{anonymous}::EraseDeadLinalgOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::InferStaticShapeOfOperands : 'linalg.generic -> ()' { Trying to match "{anonymous}::InferStaticShapeOfOperands" "{anonymous}::InferStaticShapeOfOperands" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldConstantTranspose : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldConstantTranspose" "{anonymous}::FoldConstantTranspose" result 0 } -> failure : pattern failed to match } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.yield'(0x563bdc32bb90) { "linalg.yield"(%arg3) : (f32) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.empty'(0x563bdc338090) { %5 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.fill'(0x563bdc355540) { * Pattern {anonymous}::EraseDeadLinalgOp : 'linalg.fill -> ()' { Trying to match "{anonymous}::EraseDeadLinalgOp" "{anonymous}::EraseDeadLinalgOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::InferStaticShapeOfOperands : 'linalg.fill -> ()' { Trying to match "{anonymous}::InferStaticShapeOfOperands" "{anonymous}::InferStaticShapeOfOperands" result 0 } -> failure : pattern failed to match } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.yield'(0x563bdc32b7d0) { "linalg.yield"(%arg3) : (f32) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.empty'(0x563bdc337210) { %7 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.conv_2d_nhwc_hwcf'(0x563bdc33a590) { * Pattern {anonymous}::EraseDeadLinalgOp : 'linalg.conv_2d_nhwc_hwcf -> ()' { Trying to match "{anonymous}::EraseDeadLinalgOp" "{anonymous}::EraseDeadLinalgOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::InferStaticShapeOfOperands : 'linalg.conv_2d_nhwc_hwcf -> ()' { Trying to match "{anonymous}::InferStaticShapeOfOperands" "{anonymous}::InferStaticShapeOfOperands" result 0 } -> failure : pattern failed to match } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.mulf'(0x563bdc30d040) { %12 = "arith.mulf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.addf'(0x563bdc35b670) { %13 = "arith.addf"(%arg5, %12) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.yield'(0x563bdc32b430) { "linalg.yield"(%13) : (f32) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.generic'(0x563bdc352b50) { * Pattern {anonymous}::FuseElementwiseOps : 'linalg.generic -> ()' { Trying to match "{anonymous}::FuseElementwiseOps" "{anonymous}::FuseElementwiseOps" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldFillWithGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldFillWithGenericOp" "{anonymous}::FoldFillWithGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldScalarOrSplatConstant : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldScalarOrSplatConstant" "{anonymous}::FoldScalarOrSplatConstant" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveOutsDependency : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveOutsDependency" "{anonymous}::RemoveOutsDependency" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::DeduplicateAndRemoveDeadOperandsAndResults : 'linalg.generic -> ()' { Trying to match "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveUnusedCycleInGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveUnusedCycleInGenericOp" "{anonymous}::RemoveUnusedCycleInGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldWithProducerReshapeOpByExpansion : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldWithProducerReshapeOpByExpansion" "{anonymous}::FoldWithProducerReshapeOpByExpansion" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseIdentityGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseIdentityGenericOp" "{anonymous}::EraseIdentityGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseDeadLinalgOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseDeadLinalgOp" "{anonymous}::EraseDeadLinalgOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::InferStaticShapeOfOperands : 'linalg.generic -> ()' { Trying to match "{anonymous}::InferStaticShapeOfOperands" "{anonymous}::InferStaticShapeOfOperands" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldConstantTranspose : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldConstantTranspose" "{anonymous}::FoldConstantTranspose" result 0 } -> failure : pattern failed to match } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.addf'(0x563bdc35ec60) { %12 = "arith.addf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.yield'(0x563bdc32b070) { "linalg.yield"(%12) : (f32) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.constant'(0x563bdc334a50) { %0 = "arith.constant"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.empty'(0x563bdc334690) { %10 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.generic'(0x563bdc35e9f0) { * Pattern {anonymous}::FuseElementwiseOps : 'linalg.generic -> ()' { Trying to match "{anonymous}::FuseElementwiseOps" ** Insert : 'linalg.generic'(0x563bdc305120) ** Insert : 'arith.addf'(0x563bdc315f70) ** Insert : 'arith.maxf'(0x563bdc396860) ** Insert : 'linalg.yield'(0x563bdc332100) ** Replace : 'linalg.generic'(0x563bdc35e9f0) ** Erase : 'linalg.generic'(0x563bdc35e9f0) "{anonymous}::FuseElementwiseOps" result 1 } -> success : pattern applied successfully // *** IR Dump After Pattern Application *** mlir-asm-printer: Verifying operation: func.func func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant dense<0.000000e+00> : tensor<1x224x224x64xf32> %cst_0 = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst_0 : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %0 = tensor.empty() : tensor<3x3x3x64xf32> %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<64x3x3x3xf32>) outs(%0 : tensor<3x3x3x64xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<3x3x3x64xf32> %2 = tensor.empty() : tensor<1x224x224x64xf32> %3 = linalg.fill ins(%cst_0 : f32) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %4 = tensor.empty() : tensor<1x224x224x64xf32> %5 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %1 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %5 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%4 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_1: f32, %out: f32): %9 = arith.addf %in, %in_1 : f32 linalg.yield %9 : f32 } -> tensor<1x224x224x64xf32> %7 = tensor.empty() : tensor<1x224x224x64xf32> %8 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %5, %cst : tensor<64xf32>, tensor<1x224x224x64xf32>, tensor<1x224x224x64xf32>) outs(%7 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_1: f32, %in_2: f32, %out: f32): %9 = arith.addf %in, %in_1 : f32 %10 = arith.maxf %9, %in_2 : f32 linalg.yield %10 : f32 } -> tensor<1x224x224x64xf32> return %8 : tensor<1x224x224x64xf32> } } -> success : pattern matched //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.generic'(0x563bdc352b50) { ** Erase : 'linalg.generic'(0x563bdc352b50) } -> success : operation is trivially dead //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.empty'(0x563bdc337210) { %7 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> ** Erase : 'tensor.empty'(0x563bdc337210) } -> success : operation is trivially dead //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.yield'(0x563bdc332100) { "linalg.yield"(%11) : (f32) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.maxf'(0x563bdc396860) { %11 = "arith.maxf"(%10, %arg5) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.addf'(0x563bdc315f70) { %10 = "arith.addf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.generic'(0x563bdc305120) { * Pattern {anonymous}::FuseElementwiseOps : 'linalg.generic -> ()' { Trying to match "{anonymous}::FuseElementwiseOps" "{anonymous}::FuseElementwiseOps" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldFillWithGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldFillWithGenericOp" "{anonymous}::FoldFillWithGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldScalarOrSplatConstant : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldScalarOrSplatConstant" ** Insert : 'arith.constant'(0x563bdc3272e0) ** Insert : 'linalg.generic'(0x563bdc2facb0) ** Replace : 'linalg.generic'(0x563bdc305120) ** Erase : 'linalg.generic'(0x563bdc305120) "{anonymous}::FoldScalarOrSplatConstant" result 1 } -> success : pattern applied successfully // *** IR Dump After Pattern Application *** mlir-asm-printer: Verifying operation: func.func func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant dense<0.000000e+00> : tensor<1x224x224x64xf32> %cst_0 = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst_0 : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %0 = tensor.empty() : tensor<3x3x3x64xf32> %1 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<64x3x3x3xf32>) outs(%0 : tensor<3x3x3x64xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<3x3x3x64xf32> %2 = tensor.empty() : tensor<1x224x224x64xf32> %3 = linalg.fill ins(%cst_0 : f32) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %1 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %5 = tensor.empty() : tensor<1x224x224x64xf32> %cst_1 = arith.constant 0.000000e+00 : f32 %6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %4 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%5 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_2: f32, %out: f32): %7 = arith.addf %in, %in_2 : f32 %8 = arith.maxf %7, %cst_1 : f32 linalg.yield %8 : f32 } -> tensor<1x224x224x64xf32> return %6 : tensor<1x224x224x64xf32> } } -> success : pattern matched //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.constant'(0x563bdc334a50) { %0 = "arith.constant"() {value = dense<0.000000e+00> : tensor<1x224x224x64xf32>} : () -> tensor<1x224x224x64xf32> ** Erase : 'arith.constant'(0x563bdc334a50) } -> success : operation is trivially dead //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.generic'(0x563bdc2facb0) { * Pattern {anonymous}::FuseElementwiseOps : 'linalg.generic -> ()' { Trying to match "{anonymous}::FuseElementwiseOps" "{anonymous}::FuseElementwiseOps" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldFillWithGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldFillWithGenericOp" "{anonymous}::FoldFillWithGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldScalarOrSplatConstant : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldScalarOrSplatConstant" "{anonymous}::FoldScalarOrSplatConstant" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveOutsDependency : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveOutsDependency" "{anonymous}::RemoveOutsDependency" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::DeduplicateAndRemoveDeadOperandsAndResults : 'linalg.generic -> ()' { Trying to match "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveUnusedCycleInGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveUnusedCycleInGenericOp" "{anonymous}::RemoveUnusedCycleInGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldWithProducerReshapeOpByExpansion : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldWithProducerReshapeOpByExpansion" "{anonymous}::FoldWithProducerReshapeOpByExpansion" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseIdentityGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseIdentityGenericOp" "{anonymous}::EraseIdentityGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseDeadLinalgOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseDeadLinalgOp" "{anonymous}::EraseDeadLinalgOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::InferStaticShapeOfOperands : 'linalg.generic -> ()' { Trying to match "{anonymous}::InferStaticShapeOfOperands" "{anonymous}::InferStaticShapeOfOperands" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldConstantTranspose : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldConstantTranspose" "{anonymous}::FoldConstantTranspose" result 0 } -> failure : pattern failed to match } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.constant'(0x563bdc3272e0) { %8 = "arith.constant"() {value = 0.000000e+00 : f32} : () -> f32 ** Erase : 'arith.constant'(0x563bdc3272e0) } -> success : operation was folded //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.generic'(0x563bdc2facb0) { * Pattern {anonymous}::FuseElementwiseOps : 'linalg.generic -> ()' { Trying to match "{anonymous}::FuseElementwiseOps" "{anonymous}::FuseElementwiseOps" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldFillWithGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldFillWithGenericOp" "{anonymous}::FoldFillWithGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldScalarOrSplatConstant : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldScalarOrSplatConstant" "{anonymous}::FoldScalarOrSplatConstant" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveOutsDependency : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveOutsDependency" "{anonymous}::RemoveOutsDependency" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::DeduplicateAndRemoveDeadOperandsAndResults : 'linalg.generic -> ()' { Trying to match "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveUnusedCycleInGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveUnusedCycleInGenericOp" "{anonymous}::RemoveUnusedCycleInGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldWithProducerReshapeOpByExpansion : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldWithProducerReshapeOpByExpansion" "{anonymous}::FoldWithProducerReshapeOpByExpansion" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseIdentityGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseIdentityGenericOp" "{anonymous}::EraseIdentityGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseDeadLinalgOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseDeadLinalgOp" "{anonymous}::EraseDeadLinalgOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::InferStaticShapeOfOperands : 'linalg.generic -> ()' { Trying to match "{anonymous}::InferStaticShapeOfOperands" "{anonymous}::InferStaticShapeOfOperands" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldConstantTranspose : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldConstantTranspose" "{anonymous}::FoldConstantTranspose" result 0 } -> failure : pattern failed to match } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.maxf'(0x563bdc35e9f0) { %10 = "arith.maxf"(%9, %0) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'func.return'(0x563bdc333ba0) { "func.return"(%8) : (tensor<1x224x224x64xf32>) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.constant'(0x563bdc2ff160) { %0 = "arith.constant"() {value = 0.000000e+00 : f32} : () -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.pad'(0x563bdc345c70) { } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.yield'(0x563bdc32bf30) { "tensor.yield"(%0) : (f32) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.empty'(0x563bdc323190) { %2 = "tensor.empty"() : () -> tensor<3x3x3x64xf32> } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.generic'(0x563bdc3515a0) { * Pattern {anonymous}::FuseElementwiseOps : 'linalg.generic -> ()' { Trying to match "{anonymous}::FuseElementwiseOps" "{anonymous}::FuseElementwiseOps" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldFillWithGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldFillWithGenericOp" "{anonymous}::FoldFillWithGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldScalarOrSplatConstant : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldScalarOrSplatConstant" "{anonymous}::FoldScalarOrSplatConstant" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveOutsDependency : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveOutsDependency" "{anonymous}::RemoveOutsDependency" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::DeduplicateAndRemoveDeadOperandsAndResults : 'linalg.generic -> ()' { Trying to match "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveUnusedCycleInGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveUnusedCycleInGenericOp" "{anonymous}::RemoveUnusedCycleInGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldWithProducerReshapeOpByExpansion : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldWithProducerReshapeOpByExpansion" "{anonymous}::FoldWithProducerReshapeOpByExpansion" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseIdentityGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseIdentityGenericOp" "{anonymous}::EraseIdentityGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseDeadLinalgOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseDeadLinalgOp" "{anonymous}::EraseDeadLinalgOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::InferStaticShapeOfOperands : 'linalg.generic -> ()' { Trying to match "{anonymous}::InferStaticShapeOfOperands" "{anonymous}::InferStaticShapeOfOperands" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldConstantTranspose : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldConstantTranspose" "{anonymous}::FoldConstantTranspose" result 0 } -> failure : pattern failed to match } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.yield'(0x563bdc32bb90) { "linalg.yield"(%arg3) : (f32) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.empty'(0x563bdc338090) { %4 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.fill'(0x563bdc355540) { * Pattern {anonymous}::EraseDeadLinalgOp : 'linalg.fill -> ()' { Trying to match "{anonymous}::EraseDeadLinalgOp" "{anonymous}::EraseDeadLinalgOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::InferStaticShapeOfOperands : 'linalg.fill -> ()' { Trying to match "{anonymous}::InferStaticShapeOfOperands" "{anonymous}::InferStaticShapeOfOperands" result 0 } -> failure : pattern failed to match } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.yield'(0x563bdc32b7d0) { "linalg.yield"(%arg3) : (f32) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.conv_2d_nhwc_hwcf'(0x563bdc33a590) { * Pattern {anonymous}::EraseDeadLinalgOp : 'linalg.conv_2d_nhwc_hwcf -> ()' { Trying to match "{anonymous}::EraseDeadLinalgOp" "{anonymous}::EraseDeadLinalgOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::InferStaticShapeOfOperands : 'linalg.conv_2d_nhwc_hwcf -> ()' { Trying to match "{anonymous}::InferStaticShapeOfOperands" "{anonymous}::InferStaticShapeOfOperands" result 0 } -> failure : pattern failed to match } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.mulf'(0x563bdc30d040) { %9 = "arith.mulf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.addf'(0x563bdc35b670) { %10 = "arith.addf"(%arg5, %9) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.yield'(0x563bdc32b430) { "linalg.yield"(%10) : (f32) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'tensor.empty'(0x563bdc334690) { %7 = "tensor.empty"() : () -> tensor<1x224x224x64xf32> } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.generic'(0x563bdc2facb0) { * Pattern {anonymous}::FuseElementwiseOps : 'linalg.generic -> ()' { Trying to match "{anonymous}::FuseElementwiseOps" "{anonymous}::FuseElementwiseOps" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldFillWithGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldFillWithGenericOp" "{anonymous}::FoldFillWithGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldScalarOrSplatConstant : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldScalarOrSplatConstant" "{anonymous}::FoldScalarOrSplatConstant" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveOutsDependency : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveOutsDependency" "{anonymous}::RemoveOutsDependency" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::DeduplicateAndRemoveDeadOperandsAndResults : 'linalg.generic -> ()' { Trying to match "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" "{anonymous}::DeduplicateAndRemoveDeadOperandsAndResults" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::RemoveUnusedCycleInGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::RemoveUnusedCycleInGenericOp" "{anonymous}::RemoveUnusedCycleInGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldWithProducerReshapeOpByExpansion : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldWithProducerReshapeOpByExpansion" "{anonymous}::FoldWithProducerReshapeOpByExpansion" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseIdentityGenericOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseIdentityGenericOp" "{anonymous}::EraseIdentityGenericOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::EraseDeadLinalgOp : 'linalg.generic -> ()' { Trying to match "{anonymous}::EraseDeadLinalgOp" "{anonymous}::EraseDeadLinalgOp" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::InferStaticShapeOfOperands : 'linalg.generic -> ()' { Trying to match "{anonymous}::InferStaticShapeOfOperands" "{anonymous}::InferStaticShapeOfOperands" result 0 } -> failure : pattern failed to match * Pattern {anonymous}::FoldConstantTranspose : 'linalg.generic -> ()' { Trying to match "{anonymous}::FoldConstantTranspose" "{anonymous}::FoldConstantTranspose" result 0 } -> failure : pattern failed to match } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.addf'(0x563bdc396910) { %9 = "arith.addf"(%arg3, %arg4) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'arith.maxf'(0x563bdc35e9f0) { %10 = "arith.maxf"(%9, %0) {fastmath = #arith.fastmath} : (f32, f32) -> f32 } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'linalg.yield'(0x563bdc31c230) { "linalg.yield"(%10) : (f32) -> () } -> failure : pattern failed to match //===-------------------------------------------===// //===-------------------------------------------===// Processing operation : 'func.return'(0x563bdc333ba0) { "func.return"(%8) : (tensor<1x224x224x64xf32>) -> () } -> failure : pattern failed to match //===-------------------------------------------===// ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::PreservedAnalyses::AllAnalysesType) mlir-asm-printer: Verifying operation: builtin.module #map = ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::StorageUserTrait::IsMutable) ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface::Trait) affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)> #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> #map2 = affine_map<(d0, d1, d2, d3) -> (d3)> module { func.func @conv_bn_relu(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<64x3x3x3xf32>, %arg2: tensor<64xf32>) -> tensor<1x224x224x64xf32> { %cst = arith.constant 0.000000e+00 : f32 %padded = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] { ^bb0(%arg3: index, %arg4: index, %arg5: index, %arg6: index): tensor.yield %cst : f32 } : tensor<1x224x224x3xf32> to tensor<1x226x226x3xf32> %0 = tensor.empty() : tensor<3x3x3x64xf32> %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<64x3x3x3xf32>) outs(%0 : tensor<3x3x3x64xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<3x3x3x64xf32> %2 = tensor.empty() : tensor<1x224x224x64xf32> %3 = linalg.fill ins(%cst : f32) outs(%2 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %4 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %1 : tensor<1x226x226x3xf32>, tensor<3x3x3x64xf32>) outs(%3 : tensor<1x224x224x64xf32>) -> tensor<1x224x224x64xf32> %5 = tensor.empty() : tensor<1x224x224x64xf32> %6 = linalg.generic {indexing_maps = [#map2, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %4 : tensor<64xf32>, tensor<1x224x224x64xf32>) outs(%5 : tensor<1x224x224x64xf32>) { ^bb0(%in: f32, %in_0: f32, %out: f32): %7 = arith.addf %in, %in_0 : f32 %8 = arith.maxf %7, %cst : f32 linalg.yield %8 : f32 } -> tensor<1x224x224x64xf32> return %6 : tensor<1x224x224x64xf32> } } ```