Open rfaucett opened 9 years ago
So you are saying that compiler might be smart enough to find that in all the occurrences of the calling of function usdf_functions, use_lock is either input as 0 or 1, and they are compile time constants, so that compiler actually generates two functions usdf_function_thread_safe and usdf_function_thread_unsafe by removing all these if statements?
I think this is probably a long shot. I tried to compile a source file like this with gcc 4.9.1 and use objdump to look at the assemble. Looks like there is still a usdf_fuction there, and there are jump instructions based on the parameter value. Although I am not sure the assemble code generated by objdump is exactly the same as in the executable.
BTW, the 'const' quantifier for use_lock has no use. As use_lock is passed by value, the function won't change the variable that passed as use_lock.
Did you compile with -O[high_level], like -O9?
Reese is correct. I don't know what went wrong so that I got the wrong conclusion. Now I recompiled the code I have and did objdump again, I can see compiler actually remove the conditional jump instruction.
C source code
static inline int usdf_function(use_lock)
{
int i = 0;
if (use_lock) {
i = 1;
printf("%s, use lock\n", __func__);
}
else {
printf("%s, don't use lock\n", __func__);
}
return i;
}
int
usdf_function_thread_safe(void)
{
int ret;
ret = usdf_function(1);
printf("%d\n", ret);
}
int
usdf_function_thread_unsafe(void)
{
int ret = usdf_function(0);
printf("%d\n", ret);
}
int main()
{
usdf_function_thread_safe();
usdf_function_thread_unsafe();
}
Corresponding assembly
0000000000400520 <usdf_function_thread_safe>:
400520: 48 83 ec 08 sub $0x8,%rsp
400524: be b2 06 40 00 mov $0x4006b2,%esi
400529: bf 8c 06 40 00 mov $0x40068c,%edi
40052e: 31 c0 xor %eax,%eax
400530: e8 6b fe ff ff callq 4003a0 <printf@plt>
400535: be 01 00 00 00 mov $0x1,%esi
40053a: bf 9a 06 40 00 mov $0x40069a,%edi
40053f: 31 c0 xor %eax,%eax
400541: 48 83 c4 08 add $0x8,%rsp
400545: e9 56 fe ff ff jmpq 4003a0 <printf@plt>
40054a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
0000000000400550 <usdf_function_thread_unsafe>:
400550: 48 83 ec 08 sub $0x8,%rsp
400554: be b2 06 40 00 mov $0x4006b2,%esi
400559: bf 9e 06 40 00 mov $0x40069e,%edi
40055e: 31 c0 xor %eax,%eax
400560: e8 3b fe ff ff callq 4003a0 <printf@plt>
400565: 31 f6 xor %esi,%esi
400567: bf 9a 06 40 00 mov $0x40069a,%edi
40056c: 31 c0 xor %eax,%eax
40056e: 48 83 c4 08 add $0x8,%rsp
400572: e9 29 fe ff ff jmpq 4003a0 <printf@plt>
400577: 90 nop
400578: 90 nop
400579: 90 nop
40057a: 90 nop
40057b: 90 nop
40057c: 90 nop
40057d: 90 nop
40057e: 90 nop
40057f: 90 nop
0000000000400580 <main>:
400580: 48 83 ec 08 sub $0x8,%rsp
400584: e8 97 ff ff ff callq 400520 <usdf_function_thread_safe>
400589: 48 83 c4 08 add $0x8,%rsp
40058d: e9 be ff ff ff jmpq 400550 <usdf_function_thread_unsafe>
More complete example.
struct usdf_data_t {
int protected;
pthread_spinlock_t lock;
};
static inline int usdf_function(struct usdf_data_t* usdt, int use_lock)
{
int i = 0;
if (use_lock) {
pthread_spin_lock(&usdt->lock);
i = 1;
usdt->protected = 100;
printf("%s, use lock\n", __func__);
pthread_spin_unlock(&usdt->lock);
}
else {
printf("%s, don't use lock\n", __func__);
}
return i;
}
int
usdf_function_thread_safe(struct usdf_data_t* usdt)
{
int ret;
ret = usdf_function(usdt, 1);
printf("%d\n", ret);
}
int
usdf_function_thread_unsafe(struct usdf_data_t* usdt)
{
int ret = usdf_function(usdt, 0);
printf("%d\n", ret);
}
int main()
{
struct usdf_data_t usd;
pthread_spin_init(&usd.lock, PTHREAD_PROCESS_PRIVATE);
usdf_function_thread_safe(&usd);
usdf_function_thread_unsafe(&usd);
pthread_spin_destroy(&usd.lock);
}
00000000004006f0 <usdf_function_thread_safe>:
4006f0: 55 push %rbp
4006f1: 48 89 fd mov %rdi,%rbp
4006f4: 53 push %rbx
4006f5: 48 8d 5f 04 lea 0x4(%rdi),%rbx
4006f9: 48 83 ec 08 sub $0x8,%rsp
4006fd: 48 89 df mov %rbx,%rdi
400700: e8 73 fe ff ff callq 400578 <pthread_spin_lock@plt>
400705: be c2 08 40 00 mov $0x4008c2,%esi
40070a: c7 45 00 64 00 00 00 movl $0x64,0x0(%rbp)
400711: bf 9c 08 40 00 mov $0x40089c,%edi
400716: 31 c0 xor %eax,%eax
400718: e8 0b fe ff ff callq 400528 <printf@plt>
40071d: 48 89 df mov %rbx,%rdi
400720: e8 23 fe ff ff callq 400548 <pthread_spin_unlock@plt>
400725: 48 83 c4 08 add $0x8,%rsp
400729: be 01 00 00 00 mov $0x1,%esi
40072e: bf aa 08 40 00 mov $0x4008aa,%edi
400733: 5b pop %rbx
400734: 5d pop %rbp
400735: 31 c0 xor %eax,%eax
400737: e9 ec fd ff ff jmpq 400528 <printf@plt>
40073c: 0f 1f 40 00 nopl 0x0(%rax)
0000000000400740 <usdf_function_thread_unsafe>:
400740: 48 83 ec 08 sub $0x8,%rsp
400744: be c2 08 40 00 mov $0x4008c2,%esi
400749: bf ae 08 40 00 mov $0x4008ae,%edi
40074e: 31 c0 xor %eax,%eax
400750: e8 d3 fd ff ff callq 400528 <printf@plt>
400755: 31 f6 xor %esi,%esi
400757: bf aa 08 40 00 mov $0x4008aa,%edi
40075c: 31 c0 xor %eax,%eax
40075e: 48 83 c4 08 add $0x8,%rsp
400762: e9 c1 fd ff ff jmpq 400528 <printf@plt>
400767: 90 nop
400768: 90 nop
400769: 90 nop
40076a: 90 nop
40076b: 90 nop
40076c: 90 nop
40076d: 90 nop
40076e: 90 nop
40076f: 90 nop
I am hoping that we can get clean-looking code to provide both thread-safe and non-threadsafe functions by writing like this:
The hope here is that the compiler completely removes the conditional on the constant "use_lock" so that there is no performance penalty in the non-locking case. Otherwise, we may have to resort to nasty tricky with #ifdefs and compiling the same source multiple ways.