Closed Quuxplusone closed 14 years ago
Bugzilla Link | PR931 |
Status | RESOLVED FIXED |
Importance | P normal |
Reported by | Andrew Lenharth (andrewl@lenharth.org) |
Reported on | 2006-10-02 16:27:32 -0700 |
Last modified on | 2010-02-22 12:55:12 -0800 |
Version | trunk |
Hardware | PC Linux |
CC | dpatel@apple.com, llvm-bugs@lists.llvm.org |
Fixed by commit(s) | |
Attachments | |
Blocks | |
Blocked by | |
See also |
Hrm, interesting. I guess the real question is: why are arrays of ubytes being
generated, and can we stop
that.
Here is one reduction (without the patch):
struct restart_block {
}
mm_segment_t;
struct tss_struct {
unsigned long esp0;
unsigned short ss0,__ss0h;
unsigned short ss1,__ss1h;
unsigned short trace, io_bitmap_base;
unsigned long io_bitmap[((65536/8)/sizeof(long)) + 1];
}
__attribute__((packed));
struct thread_info {
};
union thread_union {
unsigned long stack[(4096)/sizeof(long)];
};
extern union thread_union init_thread_union;
__typeof__(struct tss_struct) per_cpu__init_tss = {
.esp0 = sizeof((init_thread_union.stack)) + (long)&(init_thread_union.stack),
.ss0 = ((12 + 1) * 8), .ss1 = ((12 + 0) * 8), .io_bitmap_base = 0x8000,
.io_bitmap = { [ 0 ... ((65536/8)/sizeof(long))] = ~0 }
, };
I was looking at this and it seems that arrays of bytes are being generated for
all packed structures, even if they don't need it. If only the portion of the
structure that was affected by packed was transformed, this would mostly go
away.
This c program:
#include <stdio.h>
struct thread_struct;
#define IO_BITMAP_LONGS 2
struct tss_struct {
unsigned short back_link,__blh;
unsigned long esp0;
unsigned short ss0,__ss0h;
unsigned long esp1;
unsigned short ss1,__ss1h; /* ss1 is used to cache
MSR_IA32_SYSENTER_CS */
unsigned long esp2;
unsigned short ss2,__ss2h;
unsigned long __cr3;
unsigned long eip;
unsigned long eflags;
unsigned long eax,ecx,edx,ebx;
unsigned long esp;
unsigned long ebp;
unsigned long esi;
unsigned long edi;
unsigned short es, __esh;
unsigned short cs, __csh;
unsigned short ss, __ssh;
unsigned short ds, __dsh;
unsigned short fs, __fsh;
unsigned short gs, __gsh;
unsigned short ldt, __ldth;
unsigned short trace, io_bitmap_base;
/*
* The extra 1 is there because the CPU will access an
* additional byte beyond the end of the IO permission
* bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
/*
* Cache the current maximum and the last task that used the bitmap:
*/
unsigned long io_bitmap_max;
struct thread_struct *io_bitmap_owner;
/*
* pads the TSS to be cacheline-aligned (size is 0x100)
*/
unsigned long __cacheline_filler[35];
/*
* .. and then another 0x100 bytes for emergency kernel stack
*/
unsigned long stack[64];
} __attribute__((packed));
struct tss_new {
unsigned short back_link,__blh;
unsigned long esp0;
unsigned short ss0,__ss0h;
unsigned long esp1;
unsigned short ss1,__ss1h; /* ss1 is used to cache
MSR_IA32_SYSENTER_CS */
unsigned long esp2;
unsigned short ss2,__ss2h;
unsigned long __cr3;
unsigned long eip;
unsigned long eflags;
unsigned long eax,ecx,edx,ebx;
unsigned long esp;
unsigned long ebp;
unsigned long esi;
unsigned long edi;
unsigned short es, __esh;
unsigned short cs, __csh;
unsigned short ss, __ssh;
unsigned short ds, __dsh;
unsigned short fs, __fsh;
unsigned short gs, __gsh;
unsigned short ldt, __ldth;
unsigned short trace, io_bitmap_base;
/*
* The extra 1 is there because the CPU will access an
* additional byte beyond the end of the IO permission
* bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
/*
* Cache the current maximum and the last task that used the bitmap:
*/
unsigned long io_bitmap_max;
struct thread_struct *io_bitmap_owner;
/*
* pads the TSS to be cacheline-aligned (size is 0x100)
*/
unsigned long __cacheline_filler[35];
/*
* .. and then another 0x100 bytes for emergency kernel stack
*/
unsigned long stack[64];
};
int main() {
printf("packed: %d\nunpacked: %d\n", sizeof(struct tss_struct), sizeof(struct
tss_new));
return 0;
}
gives:
bash-3.00$ ./a.out
packed: 520
unpacked: 520
with normal gcc.
llvm-gcc turns these types into:
%struct.tss_new = type { ushort, ushort, uint, ushort, ushort, uint,
ushort, ushort, uint, ushort, ushort, uint, uint, uint, uint, uint, uint, uint,
uint, uint, uint, uint, ushort, ushort, ushort, ushort, ushort, ushort, ushort,
ushort, ushort, ushort, ushort, ushort, ushort, ushort, ushort, ushort, [3 x
uint], uint, %struct.thread_struct*, [35 x uint], [64 x uint] }
%struct.tss_struct = type { [2 x ubyte], [2 x ubyte], [4 x ubyte], [2 x
ubyte], [2 x ubyte], [4 x ubyte], [2 x ubyte], [2 x ubyte], [4 x ubyte], [2 x
ubyte], [2 x ubyte], [4 x ubyte], [4 x ubyte], [4 x ubyte], [4 x ubyte], [4 x
ubyte],[4 x ubyte], [4 x ubyte], [4 x ubyte], [4 x ubyte], [4 x ubyte], [4 x
ubyte], [2 x ubyte], [2 x ubyte], [2 x ubyte], [2 xubyte], [2 x ubyte], [2 x
ubyte], [2 x ubyte], [2 x ubyte], [2 x ubyte], [2 x ubyte], [2 x ubyte], [2 x
ubyte], [2 x ubyte], [2 x ubyte], [2 x ubyte], [2 x ubyte], [12 x ubyte], [4 x
ubyte], [4 x ubyte], [140 x ubyte], [256 x ubyte] }
which is really bad.
This patch causes fields to be output as their normal type (rather than an array
of bytes) if the type of the field is naturally aligned at the offset it is
being output.
Index: llvm-types.cpp
===================================================================
--- llvm-types.cpp (revision 215)
+++ llvm-types.cpp (working copy)
@@ -793,7 +793,8 @@
unsigned ByteAlignment = Info.getTypeAlignment(Ty);
unsigned NextByteOffset = Info.getNewElementByteOffset(ByteAlignment);
if (NextByteOffset > StartOffsetInBytes ||
- ByteAlignment > Info.getGCCStructAlignmentInBytes()) {
+ (ByteAlignment > Info.getGCCStructAlignmentInBytes()
+ && (NextByteOffset % ByteAlignment) != 0)) {
// If the LLVM type is aligned more than the GCC type, we cannot use this
// LLVM type for the field. Instead, insert the field as an array of bytes,
// which we know will have the least alignment possible.
This patch causes some unexplained asserts in rare cases, but it definately is
at the problem code.
the new assertions (with the type patch applied) come from:
#include <stdio.h>
struct hci_sco_hdr {
short handle;
char dlen;
} __attribute__ ((packed));
int main() {
struct hci_sco_hdr a, b, c[2];
printf("%d %d %d\n", sizeof(struct hci_sco_hdr), (int)&a - (int)&b, sizeof(c));
return 0;
}
the llvm type has post structure padding since it isn't an array. This makes it
size 4 instead of 3 that gcc produces.
Converting packed structs to arrays of bytes obfuscates the type quite a bit.
Would it make more sense to add a packed struct type to llvm? If a subclass of
StructType, by updating the struct sizing and target code for calculating field
offset, most other code would be unaware of the difference between a packed
struct and a struct. GEPs would continue to work on the logical type
identically to normal structs.
Devang, this is related to the packed structs work you're doing recently.
Is this fixed?
I just verified that example from comment #6 is fixed in lastest llvm-gcc. I am
not sure, if it was fixed
before my recent patch or not.
Closing this as fixed. Andrew if there is a case still unhandled, please file another bug, thanks!