iree-org / iree

A retargetable MLIR-based machine learning compiler and runtime toolkit.
http://iree.dev/
Apache License 2.0
2.79k stars 604 forks source link

math.exp2 fails to compile for llvm-cpu #12974

Closed sogartar closed 1 year ago

sogartar commented 1 year ago

What happened?

math.exp2 for tensor types can't be compiled for the target backend llvm-cpu.

It gives:

lld: error: undefined symbol: exp2f

Steps to reproduce your issue

Compile this MLIR

func.func @f1(%arg0: tensor<1xf32>) -> tensor<1xf32> {
  %0 = math.exp2 %arg0 : tensor<1xf32>
  return %0 : tensor<1xf32>
}

with

iree-compile --iree-hal-target-backends=llvm-cpu -o f1.vmfb f1.mlir

This gives the error

lld: error: undefined symbol: exp2f
>>> referenced by -:2
>>>               /tmp/f1_dispatch_0-f9f85f.o:(f1_dispatch_0_generic)
>>> did you mean: expf
>>> defined in: /tmp/f1_dispatch_0-f9f85f.o
Linking failed; escaped command line returned exit code 256:

LLD_VERSION=IREE /home/petkantchin/ws/iree/build/ninja/RelWithDebInfo/llvm-project/bin/lld -flavor gnu -o /tmp/f1_dispatch_0-f9f85f.so --build-id=none -nostdlib -static -shared --no-undefined --no-allow-shlib-undefined --allow-multiple-definition --gc-sections -z now -z relro --discard-all --icf=all --ignore-data-address-equality --ignore-function-address-equality --hash-style=sysv /tmp/f1_dispatch_0-f9f85f.o

f1.mlir:2:8: error: failed to link executable and generate target dylib (check above for more specific error messages)
  %0 = math.exp2 %arg0 : tensor<1xf32>
       ^
f1.mlir:1:1: note: called from
func.func @f1(%arg0: tensor<1xf32>) -> tensor<1xf32> {
^
f1.mlir:2:8: error: failed to serialize executable for target backend llvm-cpu
  %0 = math.exp2 %arg0 : tensor<1xf32>
       ^
f1.mlir:1:1: note: called from
func.func @f1(%arg0: tensor<1xf32>) -> tensor<1xf32> {
^
f1.mlir:2:8: note: see current operation: 
"hal.executable.variant"() ({
  "hal.executable.export"() ({
  ^bb0(%arg0: !hal.device):
    %0 = "arith.constant"() {value = 1 : index} : () -> index
    "hal.return"(%0, %0, %0) : (index, index, index) -> ()
  }) {layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "f1_dispatch_0_generic", translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
  "builtin.module"() ({
    "llvm.func"() ({
    ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
      %0 = "llvm.mlir.constant"() {value = 0 : i32} : () -> i32
      %1 = "llvm.mlir.constant"() {value = 63 : index} : () -> i64
      %2 = "llvm.mlir.constant"() {value = 0 : index} : () -> i64
      %3 = "llvm.load"(%arg1) {ordering = 0 : i64} : (!llvm.ptr) -> !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)>
      %4 = "llvm.extractvalue"(%3) {position = array<i64: 10>} : (!llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)>) -> !llvm.ptr
      %5 = "llvm.load"(%4) {ordering = 0 : i64} : (!llvm.ptr) -> !llvm.ptr
      %6 = "llvm.ptrtoint"(%5) : (!llvm.ptr) -> i64
      %7 = "llvm.and"(%6, %1) : (i64, i64) -> i64
      %8 = "llvm.icmp"(%7, %2) {predicate = 0 : i64} : (i64, i64) -> i1
      "llvm.intr.assume"(%8) : (i1) -> ()
      %9 = "llvm.load"(%arg1) {ordering = 0 : i64} : (!llvm.ptr) -> !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)>
      %10 = "llvm.extractvalue"(%9) {position = array<i64: 10>} : (!llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)>) -> !llvm.ptr
      %11 = "llvm.getelementptr"(%10) {elem_type = !llvm.ptr, rawConstantIndices = array<i32: 1>} : (!llvm.ptr) -> !llvm.ptr
      %12 = "llvm.load"(%11) {ordering = 0 : i64} : (!llvm.ptr) -> !llvm.ptr
      %13 = "llvm.ptrtoint"(%12) : (!llvm.ptr) -> i64
      %14 = "llvm.and"(%13, %1) : (i64, i64) -> i64
      %15 = "llvm.icmp"(%14, %2) {predicate = 0 : i64} : (i64, i64) -> i1
      "llvm.intr.assume"(%15) : (i1) -> ()
      %16 = "llvm.load"(%5) {ordering = 0 : i64} : (!llvm.ptr) -> f32
      %17 = "llvm.intr.exp2"(%16) {fastmathFlags = #llvm.fastmath<none>} : (f32) -> f32
      "llvm.store"(%17, %12) {ordering = 0 : i64} : (f32, !llvm.ptr) -> ()
      "llvm.return"(%0) : (i32) -> ()
    }) {CConv = #llvm.cconv<ccc>, arg_attrs = [{llvm.align = 16 : i64, llvm.noalias}, {llvm.align = 16 : i64, llvm.noalias}, {llvm.align = 16 : i64, llvm.noalias}], function_type = !llvm.func<i32 (ptr, ptr, ptr)>, linkage = #llvm.linkage<external>, sym_name = "f1_dispatch_0_generic", visibility_ = 0 : i64} : () -> ()
  }) {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} : () -> ()
  "hal.executable.variant_end"() : () -> ()
}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> ()
  %0 = math.exp2 %arg0 : tensor<1xf32>
       ^
f1.mlir:2:8: error: failed to serialize executables
  %0 = math.exp2 %arg0 : tensor<1xf32>
       ^
f1.mlir:1:1: note: called from
func.func @f1(%arg0: tensor<1xf32>) -> tensor<1xf32> {
^
f1.mlir:2:8: note: see current operation: 
"hal.executable"() ({
  "hal.executable.variant"() ({
    "hal.executable.export"() ({
    ^bb0(%arg0: !hal.device):
      %0 = "arith.constant"() {value = 1 : index} : () -> index
      "hal.return"(%0, %0, %0) : (index, index, index) -> ()
    }) {layout = #hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>, ordinal = 0 : index, sym_name = "f1_dispatch_0_generic", translation_info = #iree_codegen.translation_info<CPUDefault>} : () -> ()
    "builtin.module"() ({
      "llvm.func"() ({
      ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
        %0 = "llvm.mlir.constant"() {value = 0 : i32} : () -> i32
        %1 = "llvm.mlir.constant"() {value = 63 : index} : () -> i64
        %2 = "llvm.mlir.constant"() {value = 0 : index} : () -> i64
        %3 = "llvm.load"(%arg1) {ordering = 0 : i64} : (!llvm.ptr) -> !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)>
        %4 = "llvm.extractvalue"(%3) {position = array<i64: 10>} : (!llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)>) -> !llvm.ptr
        %5 = "llvm.load"(%4) {ordering = 0 : i64} : (!llvm.ptr) -> !llvm.ptr
        %6 = "llvm.ptrtoint"(%5) : (!llvm.ptr) -> i64
        %7 = "llvm.and"(%6, %1) : (i64, i64) -> i64
        %8 = "llvm.icmp"(%7, %2) {predicate = 0 : i64} : (i64, i64) -> i1
        "llvm.intr.assume"(%8) : (i1) -> ()
        %9 = "llvm.load"(%arg1) {ordering = 0 : i64} : (!llvm.ptr) -> !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)>
        %10 = "llvm.extractvalue"(%9) {position = array<i64: 10>} : (!llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)>) -> !llvm.ptr
        %11 = "llvm.getelementptr"(%10) {elem_type = !llvm.ptr, rawConstantIndices = array<i32: 1>} : (!llvm.ptr) -> !llvm.ptr
        %12 = "llvm.load"(%11) {ordering = 0 : i64} : (!llvm.ptr) -> !llvm.ptr
        %13 = "llvm.ptrtoint"(%12) : (!llvm.ptr) -> i64
        %14 = "llvm.and"(%13, %1) : (i64, i64) -> i64
        %15 = "llvm.icmp"(%14, %2) {predicate = 0 : i64} : (i64, i64) -> i1
        "llvm.intr.assume"(%15) : (i1) -> ()
        %16 = "llvm.load"(%5) {ordering = 0 : i64} : (!llvm.ptr) -> f32
        %17 = "llvm.intr.exp2"(%16) {fastmathFlags = #llvm.fastmath<none>} : (f32) -> f32
        "llvm.store"(%17, %12) {ordering = 0 : i64} : (f32, !llvm.ptr) -> ()
        "llvm.return"(%0) : (i32) -> ()
      }) {CConv = #llvm.cconv<ccc>, arg_attrs = [{llvm.align = 16 : i64, llvm.noalias}, {llvm.align = 16 : i64, llvm.noalias}, {llvm.align = 16 : i64, llvm.noalias}], function_type = !llvm.func<i32 (ptr, ptr, ptr)>, linkage = #llvm.linkage<external>, sym_name = "f1_dispatch_0_generic", visibility_ = 0 : i64} : () -> ()
    }) {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} : () -> ()
    "hal.executable.variant_end"() : () -> ()
  }) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> ()
  "hal.executable_end"() : () -> ()
}) {sym_name = "f1_dispatch_0", sym_visibility = "private"} : () -> ()
  %0 = math.exp2 %arg0 : tensor<1xf32>

What component(s) does this issue relate to?

MLIR, Compiler

Version information

d1ae167ad685e068f55a736e2e0861e324175313

Additional context

No response

jpienaar commented 1 year ago

Related to https://github.com/openxla/iree/issues/12783

bviyer commented 1 year ago

I tried your code with your test code and I was able to compile it successfully:

iree/build$ cat f1.mlir 
func.func @f1(%arg0: tensor<1xf32>) -> tensor<1xf32> {
  %0 = math.exp2 %arg0 : tensor<1xf32>
  return %0 : tensor<1xf32>
}
iree/build$ ./tools/iree-compile --iree-hal-target-backends=llvm-cpu  -o f1.vmfb f1.mlir
iree/build$