Open iains opened 8 years ago
oops the whole chunk of experimental code is
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
// Match LOAD (ADD (X, Lo(G))), when no fixup will be required.
assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
&& "Cannot handle constant offsets yet!");
//N.getNode()->dumpr(&DAG);
Disp = N.getOperand(1).getOperand(0); // The global address.
Base = N.getOperand(0);
if (Disp.getOpcode() == ISD::TargetGlobalAddress) {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Disp);
unsigned char TF = GA->getTargetFlags();
bool IsComm = GA->getGlobal()->hasCommonLinkage ();
bool IsSDL = GA->getGlobal()->isStrongDefinitionForLinker ();
//GA->getGlobal()->dump();
bool IsPic = (TF & PPCII::MO_PIC_FLAG);
if ((IsPic && (TF & PPCII::MO_NLP_FLAG)) || (!IsPic && !IsSDL && !IsComm))
return true; // [&g+r] is OK
// else don't transform.
} else {
assert(Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
Disp.getOpcode() == ISD::TargetConstantPool ||
Disp.getOpcode() == ISD::TargetJumpTable);
return true; // [&g+r]
}
}
The problem here is that this interacts with other parts of the lowering; for linux this path only seems to be executed when non-pic. However, if I fix it to not generate bad insns (e.g. with the concoction below) - then we get a regression in MergeConsecutiveStores.ll. Hm.
For Darwin, the "common" case is not problematic, since the OS indirects accesses to common.
--- anyway, probably out of cycles to look at this for now.
Anyone know how this is supposed to interact with following opts?
some experimentation:
lib/Target/PowerPC/PPCISelLowering.cpp;
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
// Match LOAD (ADD (X, Lo(G))), under what conditions ???
assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
&& "Cannot handle constant offsets yet!");
//N.getNode()->dumpr(&DAG);
Disp = N.getOperand(1).getOperand(0); // The global address.
Base = N.getOperand(0);
if (Disp.getOpcode() == ISD::TargetGlobalAddress) {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Disp);
unsigned char TF = GA->getTargetFlags();
bool IsComm = GA->getGlobal()->hasCommonLinkage ();
bool IsSDL = GA->getGlobal()->isStrongDefinitionForLinker ();
//GA->getGlobal()->dump();
bool IsPic = (TF & PPCII::MO_PIC_FLAG);
if ((IsPic && (TF & PPCII::MO_NLP_FLAG)) || (!IsPic && !IsSDL && !IsComm))
return true; // [&g+r] is OK
// else don't transform.
disabling this transformation "fixes" it (and, incidentally, generates what looks like better code), however, not sure of the wider implications…
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1763,7 +1763,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
Base = N.getOperand(0);
}
return true; // [r+i]
- } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo && 0) {
// Match LOAD (ADD (X, Lo(G))).
assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
&& "Cannot handle constant offsets yet!");
Could not reproduce it on trunk - https://godbolt.org/z/T8ezja59n.
it now needs -fno-pic (it seems the default changed at some point) still fails for me with 18.0.1 https://godbolt.org/z/YYnKcofn8
Extended Description
For 32bit powerpc; for at least 3.8 and trunk. For Linux (no PIC) and for Darwin (fPIC, -fno-common) invalid insns are generated
original c (reduced from a GCC ABI compatibility test) is;
cat reduced-t002.c:
enum E4 { e4_0, e4_1, e4_2, e4_3, e4_253 = 253, e4_254, e4_255 };
struct S482 { enum E4 a : 18; char b; };
struct S482 s482;
void test482(void) { s482.a = e4_255; }
====
Linux; clang -target powerpc-gnu-linux reduced-t002.ll -fverbose-asm -save-temps -c -mllvm -print-after-all
IR Dump After Live DEBUG_VALUE analysis :
Machine code for function test482: Post SSA
Frame Objects: fi#-1: size=4, align=4, fixed, at location [SP-4]
BB#0: derived from LLVM BB %entry STW %R31, -4, %R1 %R1<def,tied3> = STWU %R1, -16, %R1
%R31 = OR %R1, %R1
%R3 = ADDIS ga:@s482[TF=16], ga:@s482[TF=32]
%R4 = LBZ 2, %R3; mem:LD1bitcast (%struct.S482 @s482 to i24)+2
%R5 = LIS ga:@s482[TF=32]
%R6 = LI 63
%R4 = ORI %R4, 16320
STHX %R6, %R5, ga:@s482[TF=16]; mem:ST2bitcast (%struct.S482 @s482 to i24)
STB %R4, 2, %R3; mem:ST1bitcast (%struct.S482 @s482 to i24)+2
%R1 = ADDI %R1, 16
%R31 = LWZ -4, %R1
BLR %LR, %RM
End machine code for function test482.
reduced-t002.s:12:11: error: invalid operand for instruction addis 3, s482@l, s482@ha ^ reduced-t002.s:17:13: error: invalid operand for instruction sthx 6, 5, s482@l ^
Darwin: clang -target powerpc-apple-darwin reduced-t002.c -fno-common -fverbose-asm -save-temps -c -mllvm -print-machineinstr
After Live DEBUG_VALUE analysis:
Machine code for function test482: Post SSA
Frame Objects: fi#-1: size=4, align=4, fixed, at location [SP-4]
BB#0: derived from LLVM BB %entry %R0 = MFLR %LR
STW %R31, -4, %R1
STW %R0, 8, %R1
%R1<def,tied3> = STWU %R1, -32, %R1
%R31 = OR %R1, %R1
MovePCtoLR %LR
%R2 = MFLR %LR
%R2 = ADDIS %R2, ga:@s482[TF=34]
%R3 = ADD4 %R2, ga:@s482[TF=18]
%R4 = LBZ 2, %R3; mem:LD1bitcast (%struct.S482 @s482 to i24)+2
%R5 = LI 63
%R4 = ORI %R4, 16320
STHX %R5, %R2, ga:@s482[TF=18]; mem:ST2bitcast (%struct.S482 @s482 to i24)
STB %R4, 2, %R3; mem:ST1bitcast (%struct.S482 @s482 to i24)+2
%R1 = ADDI %R1, 32
%R0 = LWZ 8, %R1
%R31 = LWZ -4, %R1
MTLR %R0, %LR
BLR %LR, %RM
End machine code for function test482.
reduced-t002.s:20:14: error: invalid operand for instruction add r3, r2, lo16(_s482-L0$pb) ^ reduced-t002.s:24:15: error: invalid operand for instruction sthx r5, r2, lo16(_s482-L0$pb) ^