Open unterumarmung opened 1 year ago
When working on Simplifying Optimization of Flang intrinsics codegen, it was discovered that it is not possible to use llvm.intr.maximum.f128.
llvm.intr.maximum.f128
subroutine test(x, t) real(16) :: x(100), t t = maxval(x) end subroutine test
$ ./build/bin/flang-new -c -O2 f.f90
LLVM ERROR: Cannot select: t27: f128 = fmaximum contract t26, t25 t26: f128,ch = load<(load (s128) from %ir.scevgep, !tbaa !3)> t0, t5, undef:i64 t5: i64 = add t2, t4 t2: i64,ch = CopyFromReg t0, Register:i64 %4 t1: i64 = Register %4 t4: i64,ch = CopyFromReg t0, Register:i64 %0 t3: i64 = Register %0 t9: i64 = undef t25: f128 = fmaximum contract t24, t21 t24: f128,ch = load<(load (s128) from %ir.scevgep2, !tbaa !3)> t0, t23, undef:i64 t23: i64 = add t5, Constant:i64<-16> t5: i64 = add t2, t4 t2: i64,ch = CopyFromReg t0, Register:i64 %4 t1: i64 = Register %4 t4: i64,ch = CopyFromReg t0, Register:i64 %0 t3: i64 = Register %0 t22: i64 = Constant<-16> t9: i64 = undef t21: f128 = fmaximum contract t20, t17 t20: f128,ch = load<(load (s128) from %ir.scevgep4, !tbaa !3)> t0, t19, undef:i64 t19: i64 = add t5, Constant:i64<-32> t5: i64 = add t2, t4 t2: i64,ch = CopyFromReg t0, Register:i64 %4 t1: i64 = Register %4 t4: i64,ch = CopyFromReg t0, Register:i64 %0 t3: i64 = Register %0 t18: i64 = Constant<-32> t9: i64 = undef t17: f128 = fmaximum contract t16, t13 t16: f128,ch = load<(load (s128) from %ir.scevgep6, !tbaa !3)> t0, t15, undef:i64 t15: i64 = add t5, Constant:i64<-48> t5: i64 = add t2, t4 t2: i64,ch = CopyFromReg t0, Register:i64 %4 t1: i64 = Register %4 t4: i64,ch = CopyFromReg t0, Register:i64 %0 t3: i64 = Register %0 t14: i64 = Constant<-48> t9: i64 = undef t13: f128 = fmaximum contract t10, t12 t10: f128,ch = load<(load (s128) from %ir.scevgep8, !tbaa !3)> t0, t7, undef:i64 t7: i64 = add t5, Constant:i64<-64> t5: i64 = add t2, t4 t2: i64,ch = CopyFromReg t0, Register:i64 %4 t1: i64 = Register %4 t4: i64,ch = CopyFromReg t0, Register:i64 %0 t3: i64 = Register %0 t6: i64 = Constant<-64> t9: i64 = undef t12: f128,ch = CopyFromReg t0, Register:f128 %1 t11: f128 = Register %1 In function: test_ PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace. Stack dump: 0. Program arguments: /home/unterumarmung/dev/llvm-project/build/bin/flang-new -fc1 -triple x86_64-unknown-linux-gnu -emit-obj -fcolor-diagnostics -mrelocation-model pic -pic-level 2 -pic-is-pie -target-cpu x86-64 -O2 -o f.o -x f95-cpp-input f.f90 1. Running pass 'Function Pass Manager' on module 'FIRModule'. 2. Running pass 'X86 DAG->DAG Instruction Selection' on function '@test_' #0 0x00000000014e45f7 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x14e45f7) #1 0x00000000014e205e llvm::sys::RunSignalHandlers() (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x14e205e) #2 0x00000000014e4f4f SignalHandler(int) Signals.cpp:0:0 #3 0x00007fbbb0211420 __restore_rt (/lib/x86_64-linux-gnu/libpthread.so.0+0x14420) #4 0x00007fbbafc0e00b raise /build/glibc-SzIz7B/glibc-2.31/signal/../sysdeps/unix/sysv/linux/raise.c:51:1 #5 0x00007fbbafbed859 abort /build/glibc-SzIz7B/glibc-2.31/stdlib/abort.c:81:7 #6 0x0000000001487f43 llvm::report_fatal_error(llvm::Twine const&, bool) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x1487f43) #7 0x00000000019ca53e llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x19ca53e) #8 0x00000000019c978c llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x19c978c) #9 0x0000000001178537 (anonymous namespace)::X86DAGToDAGISel::Select(llvm::SDNode*) X86ISelDAGToDAG.cpp:0:0 #10 0x00000000019bdfd5 llvm::SelectionDAGISel::DoInstructionSelection() (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x19bdfd5) #11 0x00000000019bceef llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x19bceef) #12 0x00000000019bab01 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x19bab01) #13 0x00000000019b7524 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x19b7524) #14 0x000000000116a2e9 (anonymous namespace)::X86DAGToDAGISel::runOnMachineFunction(llvm::MachineFunction&) X86ISelDAGToDAG.cpp:0:0 #15 0x0000000004010d85 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x4010d85) #16 0x000000000579f404 llvm::FPPassManager::runOnFunction(llvm::Function&) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x579f404) #17 0x00000000057a5f01 llvm::FPPassManager::runOnModule(llvm::Module&) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x57a5f01) #18 0x000000000579fab1 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x579fab1) #19 0x0000000001b0554e Fortran::frontend::CodeGenAction::executeAction() (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x1b0554e) #20 0x0000000001516f8c Fortran::frontend::FrontendAction::execute() (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x1516f8c) #21 0x0000000001504976 Fortran::frontend::CompilerInstance::executeAction(Fortran::frontend::FrontendAction&) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x1504976) #22 0x000000000151b798 Fortran::frontend::executeCompilerInvocation(Fortran::frontend::CompilerInstance*) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x151b798) #23 0x00000000010a2987 fc1_main(llvm::ArrayRef, char const*) (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x10a2987) #24 0x00000000010a0f92 main (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x10a0f92) #25 0x00007fbbafbef083 __libc_start_main /build/glibc-SzIz7B/glibc-2.31/csu/../csu/libc-start.c:342:3 #26 0x00000000010a0aae _start (/home/unterumarmung/dev/llvm-project/build/bin/flang-new+0x10a0aae) flang-new: error: unable to execute command: Aborted flang-new: error: flang frontend command failed due to signal (use -v to see invocation) flang-new version 18.0.0 (https://github.com/llvm/llvm-project 1a0cbb9c32edf64ab620d31242029b31795fcfd8) Target: x86_64-unknown-linux-gnu Thread model: posix InstalledDir: /home/unterumarmung/dev/llvm-project/build/bin flang-new: note: diagnostic msg: ******************** PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT: Preprocessed source(s) and associated run script(s) are located at: flang-new: note: diagnostic msg: /tmp/f-b4496e flang-new: note: diagnostic msg: /tmp/f-b4496e.sh flang-new: note: diagnostic msg: ********************
Same with -ffast-math flag
-ffast-math
; ModuleID = 'FIRModule' source_filename = "FIRModule" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: nofree nosync nounwind memory(argmem: readwrite) define void @test_(ptr nocapture readonly %0, ptr nocapture writeonly %1) local_unnamed_addr #0 { .lr.ph.i: br label %2 2: ; preds = %2, %.lr.ph.i %3 = phi fp128 [ 0xLFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFF, %.lr.ph.i ], [ %28, %2 ] %4 = phi i64 [ 0, %.lr.ph.i ], [ %29, %2 ] %5 = shl i64 %4, 4 %6 = getelementptr i8, ptr %0, i64 %5 %7 = load fp128, ptr %6, align 16, !tbaa !3 %8 = tail call contract fp128 @llvm.maximum.f128(fp128 %7, fp128 %3) %9 = shl i64 %4, 4 %10 = add i64 %9, 16 %11 = getelementptr i8, ptr %0, i64 %10 %12 = load fp128, ptr %11, align 16, !tbaa !3 %13 = tail call contract fp128 @llvm.maximum.f128(fp128 %12, fp128 %8) %14 = shl i64 %4, 4 %15 = add i64 %14, 32 %16 = getelementptr i8, ptr %0, i64 %15 %17 = load fp128, ptr %16, align 16, !tbaa !3 %18 = tail call contract fp128 @llvm.maximum.f128(fp128 %17, fp128 %13) %19 = shl i64 %4, 4 %20 = add i64 %19, 48 %21 = getelementptr i8, ptr %0, i64 %20 %22 = load fp128, ptr %21, align 16, !tbaa !3 %23 = tail call contract fp128 @llvm.maximum.f128(fp128 %22, fp128 %18) %24 = shl i64 %4, 4 %25 = add i64 %24, 64 %26 = getelementptr i8, ptr %0, i64 %25 %27 = load fp128, ptr %26, align 16, !tbaa !3 %28 = tail call contract fp128 @llvm.maximum.f128(fp128 %27, fp128 %23) %29 = add nuw nsw i64 %4, 5 %exitcond.not.i.4 = icmp eq i64 %29, 100 br i1 %exitcond.not.i.4, label %_FortranAMaxvalReal16x1_contract_simplified.exit, label %2 _FortranAMaxvalReal16x1_contract_simplified.exit: ; preds = %2 store fp128 %28, ptr %1, align 16, !tbaa !3 ret void } ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) declare fp128 @llvm.maximum.f128(fp128, fp128) #1 attributes #0 = { nofree nosync nounwind memory(argmem: readwrite) } attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } !llvm.module.flags = !{!0, !1, !2} !0 = !{i32 2, !"Debug Info Version", i32 3} !1 = !{i32 8, !"PIC Level", i32 2} !2 = !{i32 7, !"PIE Level", i32 2} !3 = !{!4, !4, i64 0} !4 = !{!"any data access", !5, i64 0} !5 = !{!"any access", !6, i64 0} !6 = !{!"Flang Type TBAA Root"}
$ ./build/bin/llc f.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @test_(fp128 %0) { .lr.ph.i: %1 = tail call fp128 @llvm.maximum.f128(fp128 %0, fp128 0xL00000000000000000000000000000000) store fp128 %1, ptr null, align 16 ret void } ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare fp128 @llvm.maximum.f128(fp128, fp128) #0 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
@llvm/issue-subscribers-backend-x86
It was tested only with X86 backend, but I believe this issue might be occur on other backends also
CC: @vzakhari @dcaballe
@llvm/issue-subscribers-bug
When working on Simplifying Optimization of Flang intrinsics codegen, it was discovered that it is not possible to use
llvm.intr.maximum.f128
.1. Original Fortran program
Error log
Same with
-ffast-math
flag2. LLVM IR
3. Reduced LLVM IR