rfaucett / libfabric

Open Fabric Interfaces
Other
0 stars 0 forks source link

add optional thread-safety #6

Open rfaucett opened 9 years ago

rfaucett commented 9 years ago

I am hoping that we can get clean-looking code to provide both thread-safe and non-threadsafe functions by writing like this:

static inline int
usdf_function(arg1, arg2, const int use_lock)
{
    if (use_lock) pthread_spin_lock(&arg1->lock);
    do_stuff();
    if (use_lock) pthread_spin_unlock(&arg1->lock);
}

int
usdf_function_thread_safe(arg1, arg2)
{
     return usdf_function(arg1, arg2, 1);
}

int
usdf_function_thread_unsafe(arg1, arg2)
{
     return usdf_function(arg1, arg2, 0);
}

The hope here is that the compiler completely removes the conditional on the constant "use_lock" so that there is no performance penalty in the non-locking case. Otherwise, we may have to resort to nasty tricky with #ifdefs and compiling the same source multiple ways.

xuywang commented 9 years ago

So you are saying that compiler might be smart enough to find that in all the occurrences of the calling of function usdf_functions, use_lock is either input as 0 or 1, and they are compile time constants, so that compiler actually generates two functions usdf_function_thread_safe and usdf_function_thread_unsafe by removing all these if statements?

I think this is probably a long shot. I tried to compile a source file like this with gcc 4.9.1 and use objdump to look at the assemble. Looks like there is still a usdf_fuction there, and there are jump instructions based on the parameter value. Although I am not sure the assemble code generated by objdump is exactly the same as in the executable.

BTW, the 'const' quantifier for use_lock has no use. As use_lock is passed by value, the function won't change the variable that passed as use_lock.

jsquyres commented 9 years ago

Did you compile with -O[high_level], like -O9?

xuywang commented 9 years ago

Reese is correct. I don't know what went wrong so that I got the wrong conclusion. Now I recompiled the code I have and did objdump again, I can see compiler actually remove the conditional jump instruction.

C source code

static inline int usdf_function(use_lock)
{ 
        int i = 0;
        if (use_lock) {             
                i = 1;              
                printf("%s, use lock\n", __func__);
        }   
        else { 
                printf("%s, don't use lock\n", __func__);
        }   
        return i; 
} 

int
usdf_function_thread_safe(void)     
{ 
     int ret;
     ret = usdf_function(1);
     printf("%d\n", ret);           
} 

int
usdf_function_thread_unsafe(void)   
{ 
     int ret = usdf_function(0);    
     printf("%d\n", ret);           
} 

int main()  
{
        usdf_function_thread_safe();
        usdf_function_thread_unsafe();
} 

Corresponding assembly

0000000000400520 <usdf_function_thread_safe>:
  400520:   48 83 ec 08             sub    $0x8,%rsp
  400524:   be b2 06 40 00          mov    $0x4006b2,%esi
  400529:   bf 8c 06 40 00          mov    $0x40068c,%edi
  40052e:   31 c0                   xor    %eax,%eax
  400530:   e8 6b fe ff ff          callq  4003a0 <printf@plt>
  400535:   be 01 00 00 00          mov    $0x1,%esi
  40053a:   bf 9a 06 40 00          mov    $0x40069a,%edi
  40053f:   31 c0                   xor    %eax,%eax
  400541:   48 83 c4 08             add    $0x8,%rsp
  400545:   e9 56 fe ff ff          jmpq   4003a0 <printf@plt>
  40054a:   66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

0000000000400550 <usdf_function_thread_unsafe>:
  400550:   48 83 ec 08             sub    $0x8,%rsp
  400554:   be b2 06 40 00          mov    $0x4006b2,%esi
  400559:   bf 9e 06 40 00          mov    $0x40069e,%edi
  40055e:   31 c0                   xor    %eax,%eax
  400560:   e8 3b fe ff ff          callq  4003a0 <printf@plt>
  400565:   31 f6                   xor    %esi,%esi
  400567:   bf 9a 06 40 00          mov    $0x40069a,%edi
  40056c:   31 c0                   xor    %eax,%eax
  40056e:   48 83 c4 08             add    $0x8,%rsp
  400572:   e9 29 fe ff ff          jmpq   4003a0 <printf@plt>
  400577:   90                      nop
  400578:   90                      nop
  400579:   90                      nop
  40057a:   90                      nop
  40057b:   90                      nop
  40057c:   90                      nop
  40057d:   90                      nop
  40057e:   90                      nop
  40057f:   90                      nop

0000000000400580 <main>:
  400580:   48 83 ec 08             sub    $0x8,%rsp
  400584:   e8 97 ff ff ff          callq  400520 <usdf_function_thread_safe>
  400589:   48 83 c4 08             add    $0x8,%rsp
  40058d:   e9 be ff ff ff          jmpq   400550 <usdf_function_thread_unsafe>
xuywang commented 9 years ago

More complete example.

struct usdf_data_t {                
        int                     protected;
        pthread_spinlock_t      lock;
};

static inline int usdf_function(struct usdf_data_t* usdt, int use_lock)
{ 
        int i = 0;
        if (use_lock) {             
                pthread_spin_lock(&usdt->lock);
                i = 1;
                usdt->protected = 100;
                printf("%s, use lock\n", __func__);
                pthread_spin_unlock(&usdt->lock);
        }   
        else { 
                printf("%s, don't use lock\n", __func__);
        }   
        return i; 
} 

int
usdf_function_thread_safe(struct usdf_data_t* usdt)
{ 
     int ret;
     ret = usdf_function(usdt, 1);  
     printf("%d\n", ret);
}

int
usdf_function_thread_unsafe(struct usdf_data_t* usdt)
{ 
     int ret = usdf_function(usdt, 0);
     printf("%d\n", ret);           
} 

int main()  
{ 
        struct usdf_data_t usd;     
        pthread_spin_init(&usd.lock, PTHREAD_PROCESS_PRIVATE);

        usdf_function_thread_safe(&usd);
        usdf_function_thread_unsafe(&usd);

        pthread_spin_destroy(&usd.lock);
} 
00000000004006f0 <usdf_function_thread_safe>:
  4006f0:   55                      push   %rbp
  4006f1:   48 89 fd                mov    %rdi,%rbp
  4006f4:   53                      push   %rbx
  4006f5:   48 8d 5f 04             lea    0x4(%rdi),%rbx
  4006f9:   48 83 ec 08             sub    $0x8,%rsp
  4006fd:   48 89 df                mov    %rbx,%rdi
  400700:   e8 73 fe ff ff          callq  400578 <pthread_spin_lock@plt>
  400705:   be c2 08 40 00          mov    $0x4008c2,%esi
  40070a:   c7 45 00 64 00 00 00    movl   $0x64,0x0(%rbp)
  400711:   bf 9c 08 40 00          mov    $0x40089c,%edi
  400716:   31 c0                   xor    %eax,%eax
  400718:   e8 0b fe ff ff          callq  400528 <printf@plt>
  40071d:   48 89 df                mov    %rbx,%rdi
  400720:   e8 23 fe ff ff          callq  400548 <pthread_spin_unlock@plt>
  400725:   48 83 c4 08             add    $0x8,%rsp
  400729:   be 01 00 00 00          mov    $0x1,%esi
  40072e:   bf aa 08 40 00          mov    $0x4008aa,%edi
  400733:   5b                      pop    %rbx
  400734:   5d                      pop    %rbp
  400735:   31 c0                   xor    %eax,%eax
  400737:   e9 ec fd ff ff          jmpq   400528 <printf@plt>
  40073c:   0f 1f 40 00             nopl   0x0(%rax)

0000000000400740 <usdf_function_thread_unsafe>:
  400740:   48 83 ec 08             sub    $0x8,%rsp
  400744:   be c2 08 40 00          mov    $0x4008c2,%esi
  400749:   bf ae 08 40 00          mov    $0x4008ae,%edi
  40074e:   31 c0                   xor    %eax,%eax
  400750:   e8 d3 fd ff ff          callq  400528 <printf@plt>
  400755:   31 f6                   xor    %esi,%esi
  400757:   bf aa 08 40 00          mov    $0x4008aa,%edi
  40075c:   31 c0                   xor    %eax,%eax
  40075e:   48 83 c4 08             add    $0x8,%rsp
  400762:   e9 c1 fd ff ff          jmpq   400528 <printf@plt>
  400767:   90                      nop
  400768:   90                      nop
  400769:   90                      nop
  40076a:   90                      nop
  40076b:   90                      nop
  40076c:   90                      nop
  40076d:   90                      nop
  40076e:   90                      nop
  40076f:   90                      nop