llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
27.17k stars 11.12k forks source link

[mlir] Inconsistent results for arith.remf #94431

Closed wangyongj1a closed 1 month ago

wangyongj1a commented 1 month ago

I have the following MLIR program: test.mlir:

module {
  func.func private @func1() {
    %cst_0 = arith.constant 7.668000e+03 : f32
    %cst_2 = arith.constant 2.515200e+04 : f32
    %cst_3 = arith.constant 3.904000e+04 : f32
    %1 = arith.remf %cst_2, %cst_3 : f32 
    vector.print %1 : f32
    %26 = arith.remf %cst_0, %1 : f32 
    vector.print %26 : f32
    return
  }
}

When I ran mlir-opt --convert-arith-to-llvm --convert-vector-to-llvm --convert-func-to-llvm test.mlir on the program, and executed the executable file, I got the final result of:

-13888
7668

However, when I ran mlir-opt --int-range-optimizations --convert-arith-to-llvm --convert-vector-to-llvm --convert-func-to-llvm test.mlir on the program, and executed the executable file, I got the final result of:

-13888
-6220

I also wrote a C++ program that I supposed to be equivalent to the above MLIR program:

#include <cmath>
#include <iostream> 
int main()
{
  float a = 7.668000e+03;
  float b = 2.515200e+04;
  float c = 3.904000e+04;
  float res1, res2;
  res1 = std::fmod(b, c);
  std::cout << res1 << "\n";
  res2 = std::fmod(a, res1);
  std::cout << res2 << "\n";
  return 0;
}

The execute result of this C++ program is:

25152
7668

The three above results seem to be inconsistent. I'm not sure if there is any bug in my program or if the wrong usage of the above passes caused these results. My git version is dd82fd4744397e0510c8204f1a6031441e21858e.

bviyer commented 1 month ago

The issue seem to be in the int-range-optimization flag. When I moved the flag to the end (or anywhere but the beginning) I am getting the same answer as the C++ program and the program without int-range-optimization flag.

build ➤ ./bin/mlir-opt --mlir-print-ir-after-all  --convert-arith-to-llvm --convert-vector-to-llvm --convert-func-to-llvm --int-range-optimizations ./99431.mlir | ./bin/mlir-cpu-runner -e func1 --shared-libs=$PWD/lib/libmlir_runner_utils.so,$PWD/lib/libmlir_c_runner_utils.so
// -----// IR Dump After ArithToLLVMConversionPass (convert-arith-to-llvm) //----- //
module {
  func.func private @func1() -> f32 {
    %0 = llvm.mlir.constant(7.668000e+03 : f32) : f32
    %1 = llvm.mlir.constant(2.515200e+04 : f32) : f32
    %2 = llvm.mlir.constant(3.904000e+04 : f32) : f32
    %3 = llvm.mlir.constant(-1.388800e+04 : f32) : f32
    vector.print %3 : f32
    %4 = llvm.frem %0, %3  : f32
    vector.print %4 : f32
    return %4 : f32
  }
}

// -----// IR Dump After ConvertVectorToLLVMPass (convert-vector-to-llvm) //----- //
module {
  llvm.func @printNewline()
  llvm.func @printF32(f32)
  func.func private @func1() -> f32 {
    %0 = llvm.mlir.constant(7.668000e+03 : f32) : f32
    %1 = llvm.mlir.constant(-1.388800e+04 : f32) : f32
    llvm.call @printF32(%1) : (f32) -> ()
    llvm.call @printNewline() : () -> ()
    %2 = llvm.frem %0, %1  : f32
    llvm.call @printF32(%2) : (f32) -> ()
    llvm.call @printNewline() : () -> ()
    return %2 : f32
  }
}

// -----// IR Dump After ConvertFuncToLLVMPass (convert-func-to-llvm) //----- //
module {
  llvm.func @printNewline()
  llvm.func @printF32(f32)
  llvm.func @func1() -> f32 attributes {sym_visibility = "private"} {
    %0 = llvm.mlir.constant(7.668000e+03 : f32) : f32
    %1 = llvm.mlir.constant(-1.388800e+04 : f32) : f32
    llvm.call @printF32(%1) : (f32) -> ()
    llvm.call @printNewline() : () -> ()
    %2 = llvm.frem %0, %1  : f32
    llvm.call @printF32(%2) : (f32) -> ()
    llvm.call @printNewline() : () -> ()
    llvm.return %2 : f32
  }
}

// -----// IR Dump After ArithIntRangeOpts (int-range-optimizations) //----- //
module {
  llvm.func @printNewline()
  llvm.func @printF32(f32)
  llvm.func @func1() -> f32 attributes {sym_visibility = "private"} {
    %0 = llvm.mlir.constant(7.668000e+03 : f32) : f32
    %1 = llvm.mlir.constant(-1.388800e+04 : f32) : f32
    llvm.call @printF32(%1) : (f32) -> ()
    llvm.call @printNewline() : () -> ()
    %2 = llvm.frem %0, %1  : f32
    llvm.call @printF32(%2) : (f32) -> ()
    llvm.call @printNewline() : () -> ()
    llvm.return %2 : f32
  }
}

-13888
7668
7.668000e+03

The issue seem to be that the int-range-optimization seem to remove the calculation and replace it with a constant. Here is what I am seeing when the int-range-optimization is used first.

build ➤ ./bin/mlir-opt --mlir-print-ir-after-all --int-range-optimizations   --convert-arith-to-llvm --convert-vector-to-llvm --co
nvert-func-to-llvm  ./99431.mlir | ./bin/mlir-cpu-runner -e func1 --shared-libs=$PWD/lib/libmlir_runner_utils.so,$PWD/lib/libmlir_c_runner_utils.so
// -----// IR Dump After ArithIntRangeOpts (int-range-optimizations) //----- //
module {
  func.func private @func1() -> f32 {
    %cst = arith.constant -6.220000e+03 : f32
    %cst_0 = arith.constant -1.388800e+04 : f32
    vector.print %cst_0 : f32
    vector.print %cst : f32
    return %cst : f32
  }
}

// -----// IR Dump After ArithToLLVMConversionPass (convert-arith-to-llvm) //----- //
module {
  func.func private @func1() -> f32 {
    %0 = llvm.mlir.constant(-6.220000e+03 : f32) : f32
    %1 = llvm.mlir.constant(-1.388800e+04 : f32) : f32
    vector.print %1 : f32
    vector.print %0 : f32
    return %0 : f32
  }
}

// -----// IR Dump After ConvertVectorToLLVMPass (convert-vector-to-llvm) //----- //
module {
  llvm.func @printNewline()
  llvm.func @printF32(f32)
  func.func private @func1() -> f32 {
    %0 = llvm.mlir.constant(-6.220000e+03 : f32) : f32
    %1 = llvm.mlir.constant(-1.388800e+04 : f32) : f32
    llvm.call @printF32(%1) : (f32) -> ()
    llvm.call @printNewline() : () -> ()
    llvm.call @printF32(%0) : (f32) -> ()
    llvm.call @printNewline() : () -> ()
    return %0 : f32
  }
}

// -----// IR Dump After ConvertFuncToLLVMPass (convert-func-to-llvm) //----- //
module {
  llvm.func @printNewline()
  llvm.func @printF32(f32)
  llvm.func @func1() -> f32 attributes {sym_visibility = "private"} {
    %0 = llvm.mlir.constant(-6.220000e+03 : f32) : f32
    %1 = llvm.mlir.constant(-1.388800e+04 : f32) : f32
    llvm.call @printF32(%1) : (f32) -> ()
    llvm.call @printNewline() : () -> ()
    llvm.call @printF32(%0) : (f32) -> ()
    llvm.call @printNewline() : () -> ()
    llvm.return %0 : f32
  }
}

-13888
-6220
-6.220000e+03

NOTE: I made a small change where I made the function return the last calculated f32 to satisfy mlir-cpu-runner.