Open linzj opened 1 year ago
I am now using the patch to fixed this:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c7a6dd7deb45..617ce528e485 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15624,6 +15624,27 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
SDValue ShiftLHS = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // If shift are used in the pattern
+ // load (add t0, (shl x, c0))
+ // we will return false.
+ if (N->hasOneUse() && (isa<ConstantSDNode>(N->getOperand(0)) ||
+ isa<ConstantSDNode>(N->getOperand(1)))) {
+ SDNode *User = *N->use_begin();
+ unsigned LevelCount = 0;
+ while (LevelCount < 2 &&
+ (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB ||
+ User->getOpcode() == ISD::ZERO_EXTEND) &&
+ User->hasOneUse()) {
+ SDNode *Load = *User->use_begin();
+ if (Load->getOpcode() == ISD::LOAD || Load->getOpcode() == ISD::STORE)
+ return false;
+ User = Load;
+ }
+ // CopyToReg means export, let's be conservative.
+ if (User->getOpcode() == ISD::CopyToReg)
+ return false;
+ }
+
// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not
// combine it with shift 'N' to let it be lowered to UBFX except:
// ((x >> C) & mask) << C.
@@ -15680,6 +15701,26 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
if (!N->getOperand(0)->hasOneUse())
return false;
+ // If shift are used in the pattern
+ // load (add t0, (shift x, c0))
+ // we will return false.
+ if (N->hasOneUse() && isa<ConstantSDNode>(N->getOperand(1))) {
+ SDNode *User = *N->use_begin();
+ unsigned LevelCount = 0;
+ while (LevelCount < 2 &&
+ (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SUB ||
+ User->getOpcode() == ISD::ZERO_EXTEND) &&
+ User->hasOneUse()) {
+ SDNode *Load = *User->use_begin();
+ if (Load->getOpcode() == ISD::LOAD || Load->getOpcode() == ISD::STORE)
+ return false;
+ User = Load;
+ }
+ // CopyToReg means export, let's be conservative.
+ if (User->getOpcode() == ISD::CopyToReg)
+ return false;
+ }
+
// Only fold srl(shl(x,c1),c2) iff C1 >= C2 to prevent loss of UBFX patterns.
EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) {
@llvm/issue-subscribers-backend-aarch64
Sample C++ code:
Generates asm:
I think
and x9, x9, #0x7ffff8
is redundant, and needs to be removed.like the following: