Quuxplusone / LLVMBugzillaTest

0 stars 0 forks source link

Invalid instructions generated for bitfield within a global structure. #27900

Open Quuxplusone opened 8 years ago

Quuxplusone commented 8 years ago
Bugzilla Link PR27901
Status NEW
Importance P normal
Reported by Iain Sandoe (iains-llvm@btconnect.com)
Reported on 2016-05-26 18:04:21 -0700
Last modified on 2016-05-28 07:43:00 -0700
Version trunk
Hardware PC Linux
CC ehsanamiri@gmail.com, hfinkel@anl.gov, kit.barton@gmail.com, llvm-bugs@lists.llvm.org, nemanja.i.ibm@gmail.com
Fixed by commit(s)
Attachments reduced-t002.ll (1020 bytes, application/octet-stream)
view-combine2.dot (4928 bytes, text/plain)
view-isel.dot (3387 bytes, text/plain)
Blocks
Blocked by
See also
Created attachment 16425
testcase .ll

For 32bit powerpc; for at least 3.8 and trunk. For Linux (no PIC) and for
Darwin (fPIC, -fno-common) invalid insns are generated

original c (reduced from a GCC ABI compatibility test) is;

cat reduced-t002.c:

enum E4 { e4_0, e4_1, e4_2, e4_3, e4_253 = 253, e4_254, e4_255 };

struct S482 {
  enum E4 a : 18;
  char b;
};

struct S482 s482;

void test482(void) {
    s482.a = e4_255;
}

====

Linux;
clang -target powerpc-gnu-linux reduced-t002.ll -fverbose-asm -save-temps -c -
mllvm -print-after-all

# *** IR Dump After Live DEBUG_VALUE analysis ***:
# Machine code for function test482: Post SSA
Frame Objects:
  fi#-1: size=4, align=4, fixed, at location [SP-4]

BB#0: derived from LLVM BB %entry
    STW %R31, -4, %R1
    %R1<def,tied3> = STWU %R1, -16, %R1<tied0>
    %R31<def> = OR %R1, %R1
    %R3<def> = ADDIS <ga:@s482>[TF=16], <ga:@s482>[TF=32]
    %R4<def> = LBZ 2, %R3; mem:LD1[bitcast (%struct.S482* @s482 to
i24*)+2](align=2)
    %R5<def> = LIS <ga:@s482>[TF=32]
    %R6<def> = LI 63
    %R4<def> = ORI %R4<kill>, 16320
    STHX %R6<kill>, %R5<kill>, <ga:@s482>[TF=16]; mem:ST2[bitcast (%struct.S482*
@s482 to i24*)](align=4)
    STB %R4<kill>, 2, %R3<kill>; mem:ST1[bitcast (%struct.S482* @s482 to
i24*)+2](align=2)
    %R1<def> = ADDI %R1, 16
    %R31<def> = LWZ -4, %R1
    BLR %LR<imp-use>, %RM<imp-use>

# End machine code for function test482.

reduced-t002.s:12:11: error: invalid operand for instruction
        addis 3, s482@l, s482@ha
                 ^
reduced-t002.s:17:13: error: invalid operand for instruction
        sthx 6, 5, s482@l
                   ^

Darwin:
clang -target powerpc-apple-darwin reduced-t002.c -fno-common -fverbose-asm -
save-temps -c -mllvm -print-machineinstr

# After Live DEBUG_VALUE analysis:
# Machine code for function test482: Post SSA
Frame Objects:
  fi#-1: size=4, align=4, fixed, at location [SP-4]

BB#0: derived from LLVM BB %entry
    %R0<def> = MFLR %LR<imp-use>
    STW %R31, -4, %R1
    STW %R0, 8, %R1
    %R1<def,tied3> = STWU %R1, -32, %R1<tied0>
    %R31<def> = OR %R1, %R1
    MovePCtoLR %LR<imp-def>
    %R2<def> = MFLR %LR<imp-use>
    %R2<def> = ADDIS %R2<kill>, <ga:@s482>[TF=34]
    %R3<def> = ADD4 %R2, <ga:@s482>[TF=18]
    %R4<def> = LBZ 2, %R3; mem:LD1[bitcast (%struct.S482* @s482 to
i24*)+2](align=2)
    %R5<def> = LI 63
    %R4<def> = ORI %R4<kill>, 16320
    STHX %R5<kill>, %R2<kill>, <ga:@s482>[TF=18]; mem:ST2[bitcast (%struct.S482*
@s482 to i24*)](align=4)
    STB %R4<kill>, 2, %R3<kill>; mem:ST1[bitcast (%struct.S482* @s482 to
i24*)+2](align=2)
    %R1<def> = ADDI %R1, 32
    %R0<def> = LWZ 8, %R1
    %R31<def> = LWZ -4, %R1
    MTLR %R0, %LR<imp-def>
    BLR %LR<imp-use>, %RM<imp-use>

# End machine code for function test482.

reduced-t002.s:20:14: error: invalid operand for instruction
        add r3, r2, lo16(_s482-L0$pb)
                    ^
reduced-t002.s:24:15: error: invalid operand for instruction
        sthx r5, r2, lo16(_s482-L0$pb)
                     ^
Quuxplusone commented 8 years ago

Attached reduced-t002.ll (1020 bytes, application/octet-stream): testcase .ll

Quuxplusone commented 8 years ago

Attached view-combine2.dot (4928 bytes, text/plain): view-dag-combine2-dags dot file

Quuxplusone commented 8 years ago

Attached view-isel.dot (3387 bytes, text/plain): view-isel-dags dot file

Quuxplusone commented 8 years ago
disabling this transformation "fixes" it (and, incidentally, generates what
looks like better code), however, not sure of the wider implications…

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1763,7 +1763,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N,
SDValue &Disp,
         Base = N.getOperand(0);
       }
       return true; // [r+i]
-    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo && 0) {
       // Match LOAD (ADD (X, Lo(G))).
       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
              && "Cannot handle constant offsets yet!");
Quuxplusone commented 8 years ago
The problem here is that this interacts with other parts of the lowering;
for linux this path only seems to be executed when non-pic.  However, if I fix
it to not generate bad insns (e.g. with the concoction below) - then we get a
regression in MergeConsecutiveStores.ll.  Hm.

For Darwin, the "common" case is not problematic, since the OS indirects
accesses to common.

--- anyway, probably out of cycles to look at this for now.

Anyone know how this is supposed to interact with following opts?

some experimentation:

lib/Target/PowerPC/PPCISelLowering.cpp;

    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
      // Match LOAD (ADD (X, Lo(G))), under what conditions ???
      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
             && "Cannot handle constant offsets yet!");
//N.getNode()->dumpr(&DAG);
      Disp = N.getOperand(1).getOperand(0);  // The global address.
      Base = N.getOperand(0);
      if (Disp.getOpcode() == ISD::TargetGlobalAddress) {
        GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Disp);
        unsigned char TF = GA->getTargetFlags();
        bool IsComm = GA->getGlobal()->hasCommonLinkage ();
        bool IsSDL = GA->getGlobal()->isStrongDefinitionForLinker ();
//GA->getGlobal()->dump();
        bool IsPic = (TF & PPCII::MO_PIC_FLAG);
        if ((IsPic && (TF & PPCII::MO_NLP_FLAG)) || (!IsPic && !IsSDL && !IsComm))
          return true;  // [&g+r] is OK
        // else don't transform.
Quuxplusone commented 8 years ago
oops the whole chunk of experimental code is

    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
      // Match LOAD (ADD (X, Lo(G))), when no fixup will be required.
      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
             && "Cannot handle constant offsets yet!");
//N.getNode()->dumpr(&DAG);
      Disp = N.getOperand(1).getOperand(0);  // The global address.
      Base = N.getOperand(0);
      if (Disp.getOpcode() == ISD::TargetGlobalAddress) {
        GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Disp);
        unsigned char TF = GA->getTargetFlags();
        bool IsComm = GA->getGlobal()->hasCommonLinkage ();
        bool IsSDL = GA->getGlobal()->isStrongDefinitionForLinker ();
//GA->getGlobal()->dump();
        bool IsPic = (TF & PPCII::MO_PIC_FLAG);
        if ((IsPic && (TF & PPCII::MO_NLP_FLAG)) || (!IsPic && !IsSDL && !IsComm))
          return true;  // [&g+r] is OK
        // else don't transform.
      } else {
        assert(Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
               Disp.getOpcode() == ISD::TargetConstantPool ||
               Disp.getOpcode() == ISD::TargetJumpTable);
        return true;  // [&g+r]
      }
    }