llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
29.41k stars 12.15k forks source link

[ASan] sem_timedwait hangs with 32-bit binary #58023

Open mpolacek opened 2 years ago

mpolacek commented 2 years ago

The following program hangs with -m32, but not without:

$ clang -m32 -fsanitize=address -lpthread sem_timedwait.c; ./a.out

(and therefore the same happens with gcc)

#include <stdio.h>
#include <errno.h>
#include <pthread.h>
#include <sys/types.h>
#include <unistd.h>
#include <time.h>
#include <semaphore.h>

typedef struct {
    unsigned int start;
    unsigned int count;
    unsigned int other;
    unsigned int iter;
} threadlocal_t;

threadlocal_t data_one __attribute__((aligned(64))) = { 0, 0, 0, 0 };
threadlocal_t data_two __attribute__((aligned(64))) = { 0, 0, 0, 0 };

sem_t sem_one;
sem_t sem_two;

void *thread_func_one(void *arg)
{
    int n;
    unsigned int other;
    struct timespec timeout;
    struct timespec sleeptime;

    timeout.tv_sec  = 0;
    timeout.tv_nsec = 1000;

    sleeptime.tv_sec  = 1;
    sleeptime.tv_nsec = 0;

    for ( n = 1; n < 10 ; n++ ) {
        while ( 0 != sem_timedwait(&sem_one,&timeout) );
        //sem_wait(&sem_one);
        printf("%s loop %i\n",__FUNCTION__,n);
        data_one.count = 0;
        nanosleep(&sleeptime,0);
        sem_post(&sem_two);
    }

    printf("%s done\n",__FUNCTION__);
    return 0;
}

void *thread_func_two(void *arg)
{
    int n;
    unsigned int other;
    struct timespec timeout;
    struct timespec sleeptime;

    timeout.tv_sec  = 0;
    timeout.tv_nsec = 1000;

    sleeptime.tv_sec  = 1;
    sleeptime.tv_nsec = 0;

    for ( n = 1; n < 10 ; n++ ) {
        while ( 0 != sem_timedwait(&sem_two,&timeout) );
        //sem_wait(&sem_two);
        printf("%s loop %i\n",__FUNCTION__,n);
        data_one.count = 0;
        nanosleep(&sleeptime,0);
        sem_post(&sem_one);
    }

    printf("%s done\n",__FUNCTION__);

    return 0;
}

int create_thread(int policy,int priority,void *(*start_routine)(void*),pthread_t *tidp)
{

    int retval = 1;

    pthread_attr_t attr;
    struct sched_param sched_param;

    if ( retval )
    {
        if ( 0 != ( errno = pthread_attr_init(&attr)) )
        {
            perror("Error: pthread_attr_init failed");
            retval = 0;
        }
    }

    if ( retval )
    {
        if ( 0 != ( errno = pthread_attr_setschedpolicy(&attr,policy)) )
        {
            perror("Error:pthread_attr_setschedpolicy failed");
            retval = 0;
        }
    }

    if ( retval )
    {

        if ( 0 != ( errno = pthread_attr_getschedparam(&attr,&sched_param)) )
        {
            perror("Error:pthread_attr_getschedparam failed");
            retval = 0;
        }
    }

    if ( retval )
    {
        sched_param.sched_priority = priority;

        if ( 0 != (errno = pthread_attr_setschedparam(&attr,&sched_param)) )
        {
            perror("Error:pthread_attr_setschedparam failed");
            retval = 0;
        }
    }

    if ( 0 != ( errno = pthread_create(tidp,&attr,start_routine,0)) )
    {
        perror("Error: pthread_create failed");
        retval = 0;
    }

    if ( retval )
    {
        if ( 0 != ( errno = pthread_attr_destroy(&attr)) )
        {
            perror("Error:pthread_attr_destroy failed");
            retval = 0;
        }
    }
    return retval;
}

int main(int argc, char *argv[])
{
    int retval[2] = { 1,1 };
    pthread_t tid[2];
    void *status;

    sem_init(&sem_one,0,1);
    sem_init(&sem_two,0,0);

    if ( retval[0] )
    {
        retval[0] = create_thread(SCHED_FIFO,84,thread_func_one,&tid[0]);
    }

    if ( retval[1] )
    {
        retval[1] = create_thread(SCHED_FIFO,84,thread_func_two,&tid[1]);
    }

    if ( retval[0] )
    {
        if ( 0 != ( errno = pthread_join(tid[0],&status)) )
        {
            perror("Error: pthread_join failed");
            retval[0] = 0;
        }
    }

    if ( retval[1] )
    {
        if ( 0 != ( errno = pthread_join(tid[1],&status)) )
        {
            perror("Error: pthread_join failed");
            retval[1] = 0;
        }
    }

    printf ("data_one.count=%d data_one.other=%d\n",data_one.count,data_one.other);
    printf ("data_two.count=%d data_two.other=%d\n",data_two.count,data_two.other);
    return retval[0]&retval[1]?0:1;
}
mpolacek commented 2 years ago

The problem seems to be that i686 libasan doesn't intercept some libpthread functions it should, likely these (thanks to Jakub Jelinek):

sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_attr_getaffinity_np);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_attr_getguardsize);   \
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_attr_getstacksize);   \
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_attr_getstack);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_attr_getstacksize);   \
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_barrierattr_getpshared);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_condattr_getclock);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_condattr_getpshared);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_getaffinity_np);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_getcpuclockid);
sanitizer_common_interceptors.inc:#define INIT_PTHREAD_GETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_getname_np);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getprioceiling);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getprotocol);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getpshared);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getrobust);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getrobust_np);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_gettype);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_rwlockattr_getkind_np);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_rwlockattr_getpshared);
sanitizer_common_interceptors.inc:#define INIT_PTHREAD_SETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_setname_np);
sanitizer_common_interceptors.inc:#define INIT_PTHREAD_SETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_setname_np);
sanitizer_common_interceptors.inc:#define INIT_PTHREAD_SIGMASK COMMON_INTERCEPT_FUNCTION(pthread_sigmask);
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_destroy);   \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_getvalue);  \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_init);      \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_open);      \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_post);      \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_timedwait); \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_trywait);   \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_unlink);
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_wait);      \
jakubjelinek commented 2 years ago
$ readelf -Ws /lib/libc.so.6 > /tmp/4; for i in `grep '[^@]@[^@]' /tmp/4 | awk '{print $NF}' | sort -u | sed '/GLIBC_PRIVATE/d;s/@.*//;/^__pthread/d' | grep '\(pthread\|sem\)_'`; do grep -q $i@@ /tmp/4 && grep -q COMMON_INTERCEPT_FUNCTION'('$i')' sanitizer_common/* && echo $i; done | sort -u
pthread_attr_getaffinity_np
pthread_attr_getguardsize
pthread_attr_getstack
pthread_attr_getstacksize
pthread_barrierattr_getpshared
pthread_condattr_getclock
pthread_condattr_getpshared
pthread_getaffinity_np
pthread_getcpuclockid
pthread_getname_np
pthread_mutexattr_getprioceiling
pthread_mutexattr_getprotocol
pthread_mutexattr_getpshared
pthread_mutexattr_getrobust
pthread_mutexattr_gettype
pthread_rwlockattr_getkind_np
pthread_rwlockattr_getpshared
pthread_setname_np
pthread_sigmask
sem_destroy
sem_getvalue
sem_init
sem_open
sem_post
sem_timedwait
sem_trywait
sem_unlink
sem_wait

is what I've used. For some symbols even on x86_64 I can't see how the interception can currently work, say pthread_attr_getaffinity_np had just 2 arguments in 2.3.3 and only got 3 in 2.3.4, so when dlsym resolves to the oldest one, it will just crash. Note, for x86_64 running the above command with /lib64/ instead of /lib results in similar list, but for some symbols it isn't that urgent, e.g. some GLIBC_2.34 symbols are just the same as 2.3.4 ones and the newer symver was probably added just because it now handles some new flags or something similar.

GeekOffTheStreet commented 1 year ago

I'm seeing a similar issue using Ubuntu 22.04 with gcc-12 and clang-17 packages when using asan on multilib (32-bit builds). sem_timedwait will never wakeup. 32-bit builds without asan are fine and building natively for x86_64 with asan also works.

Couple traces showing mix of old and new:

frame #0: 0xf7fc5129 [vdso]`__kernel_vsyscall + 9
frame #1: 0xf7a23366 libc.so.6`__libc_do_syscall at libc-do-syscall.S:41
frame #2: 0xf7990e81 libc.so.6`__futex_abstimed_wait_common at futex-internal.c:40:12
frame #3: 0xf7990e40 libc.so.6`__futex_abstimed_wait_common(futex_word=0xf6a043a4, expected=1, clockid=<unavailable>, abstime=0xf72e8cbc, private=0, cancel=true) at futex-internal.c:99:11
frame #4: 0xf7990fff libc.so.6`__GI___futex_abstimed_wait_cancelable64(futex_word=<unavailable>, expected=<unavailable>, clockid=<unavailable>, abstime=<no summary available>, private=<no summary available>) at futex-internal.c:139:10 [artificial]
frame #5: 0xf799d031 libc.so.6`do_futex_wait(sem=<unavailable>, abstime=<unavailable>, clockid=<unavailable>) at sem_waitcommon.c:116:9
frame #6: 0xf799d0d9 libc.so.6`__new_sem_wait_slow64(sem=0xf6a043a4, abstime=0xf72e8cbc, clockid=<unavailable>) at sem_waitcommon.c:284:14
frame #7: 0xf799d213 libc.so.6`___sem_timedwait [inlined] ___sem_timedwait64(abstime=0xf72e8cbc, sem=0xf6a043a4) at sem_timedwait.c:40:12
frame #8: 0xf799d208 libc.so.6`___sem_timedwait [inlined] ___sem_timedwait64(abstime=0xf72e8cbc, sem=0xf6a043a4) at sem_timedwait.c:26:1
frame #9: 0xf799d208 libc.so.6`___sem_timedwait(sem=0xf6a043a4, abstime=0xf7230890) at sem_timedwait.c:55:10
frame #10: 0x56aee191 ut_orca_mt`__interceptor_sem_timedwait + 145

and:

  * frame #0: 0xf7fc5129 [vdso]`__kernel_vsyscall + 9
    frame #1: 0xf7a8249c libc.so.6`__old_sem_wait(sem=0xf6a04054) at sem_wait.c:65:13
    frame #2: 0x56aee0b2 ut_orca_mt`__interceptor_sem_wait + 50

See also this other report on SO: https://stackoverflow.com/questions/75005217/linux-32bit-compiled-sem-timedwait-example-with-small-mod-fails-on-64-bit-wh