llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
27.75k stars 11.43k forks source link

[ASan] sem_timedwait hangs with 32-bit binary #58023

Open mpolacek opened 1 year ago

mpolacek commented 1 year ago

The following program hangs with -m32, but not without:

$ clang -m32 -fsanitize=address -lpthread sem_timedwait.c; ./a.out

(and therefore the same happens with gcc)

#include <stdio.h>
#include <errno.h>
#include <pthread.h>
#include <sys/types.h>
#include <unistd.h>
#include <time.h>
#include <semaphore.h>

typedef struct {
    unsigned int start;
    unsigned int count;
    unsigned int other;
    unsigned int iter;
} threadlocal_t;

threadlocal_t data_one __attribute__((aligned(64))) = { 0, 0, 0, 0 };
threadlocal_t data_two __attribute__((aligned(64))) = { 0, 0, 0, 0 };

sem_t sem_one;
sem_t sem_two;

void *thread_func_one(void *arg)
{
    int n;
    unsigned int other;
    struct timespec timeout;
    struct timespec sleeptime;

    timeout.tv_sec  = 0;
    timeout.tv_nsec = 1000;

    sleeptime.tv_sec  = 1;
    sleeptime.tv_nsec = 0;

    for ( n = 1; n < 10 ; n++ ) {
        while ( 0 != sem_timedwait(&sem_one,&timeout) );
        //sem_wait(&sem_one);
        printf("%s loop %i\n",__FUNCTION__,n);
        data_one.count = 0;
        nanosleep(&sleeptime,0);
        sem_post(&sem_two);
    }

    printf("%s done\n",__FUNCTION__);
    return 0;
}

void *thread_func_two(void *arg)
{
    int n;
    unsigned int other;
    struct timespec timeout;
    struct timespec sleeptime;

    timeout.tv_sec  = 0;
    timeout.tv_nsec = 1000;

    sleeptime.tv_sec  = 1;
    sleeptime.tv_nsec = 0;

    for ( n = 1; n < 10 ; n++ ) {
        while ( 0 != sem_timedwait(&sem_two,&timeout) );
        //sem_wait(&sem_two);
        printf("%s loop %i\n",__FUNCTION__,n);
        data_one.count = 0;
        nanosleep(&sleeptime,0);
        sem_post(&sem_one);
    }

    printf("%s done\n",__FUNCTION__);

    return 0;
}

int create_thread(int policy,int priority,void *(*start_routine)(void*),pthread_t *tidp)
{

    int retval = 1;

    pthread_attr_t attr;
    struct sched_param sched_param;

    if ( retval )
    {
        if ( 0 != ( errno = pthread_attr_init(&attr)) )
        {
            perror("Error: pthread_attr_init failed");
            retval = 0;
        }
    }

    if ( retval )
    {
        if ( 0 != ( errno = pthread_attr_setschedpolicy(&attr,policy)) )
        {
            perror("Error:pthread_attr_setschedpolicy failed");
            retval = 0;
        }
    }

    if ( retval )
    {

        if ( 0 != ( errno = pthread_attr_getschedparam(&attr,&sched_param)) )
        {
            perror("Error:pthread_attr_getschedparam failed");
            retval = 0;
        }
    }

    if ( retval )
    {
        sched_param.sched_priority = priority;

        if ( 0 != (errno = pthread_attr_setschedparam(&attr,&sched_param)) )
        {
            perror("Error:pthread_attr_setschedparam failed");
            retval = 0;
        }
    }

    if ( 0 != ( errno = pthread_create(tidp,&attr,start_routine,0)) )
    {
        perror("Error: pthread_create failed");
        retval = 0;
    }

    if ( retval )
    {
        if ( 0 != ( errno = pthread_attr_destroy(&attr)) )
        {
            perror("Error:pthread_attr_destroy failed");
            retval = 0;
        }
    }
    return retval;
}

int main(int argc, char *argv[])
{
    int retval[2] = { 1,1 };
    pthread_t tid[2];
    void *status;

    sem_init(&sem_one,0,1);
    sem_init(&sem_two,0,0);

    if ( retval[0] )
    {
        retval[0] = create_thread(SCHED_FIFO,84,thread_func_one,&tid[0]);
    }

    if ( retval[1] )
    {
        retval[1] = create_thread(SCHED_FIFO,84,thread_func_two,&tid[1]);
    }

    if ( retval[0] )
    {
        if ( 0 != ( errno = pthread_join(tid[0],&status)) )
        {
            perror("Error: pthread_join failed");
            retval[0] = 0;
        }
    }

    if ( retval[1] )
    {
        if ( 0 != ( errno = pthread_join(tid[1],&status)) )
        {
            perror("Error: pthread_join failed");
            retval[1] = 0;
        }
    }

    printf ("data_one.count=%d data_one.other=%d\n",data_one.count,data_one.other);
    printf ("data_two.count=%d data_two.other=%d\n",data_two.count,data_two.other);
    return retval[0]&retval[1]?0:1;
}
mpolacek commented 1 year ago

The problem seems to be that i686 libasan doesn't intercept some libpthread functions it should, likely these (thanks to Jakub Jelinek):

sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_attr_getaffinity_np);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_attr_getguardsize);   \
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_attr_getstacksize);   \
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_attr_getstack);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_attr_getstacksize);   \
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_barrierattr_getpshared);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_condattr_getclock);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_condattr_getpshared);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_getaffinity_np);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_getcpuclockid);
sanitizer_common_interceptors.inc:#define INIT_PTHREAD_GETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_getname_np);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getprioceiling);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getprotocol);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getpshared);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getrobust);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getrobust_np);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_gettype);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_rwlockattr_getkind_np);
sanitizer_common_interceptors.inc:  COMMON_INTERCEPT_FUNCTION(pthread_rwlockattr_getpshared);
sanitizer_common_interceptors.inc:#define INIT_PTHREAD_SETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_setname_np);
sanitizer_common_interceptors.inc:#define INIT_PTHREAD_SETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_setname_np);
sanitizer_common_interceptors.inc:#define INIT_PTHREAD_SIGMASK COMMON_INTERCEPT_FUNCTION(pthread_sigmask);
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_destroy);   \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_getvalue);  \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_init);      \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_open);      \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_post);      \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_timedwait); \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_trywait);   \
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_unlink);
sanitizer_common_interceptors.inc:    COMMON_INTERCEPT_FUNCTION(sem_wait);      \
jakubjelinek commented 1 year ago
$ readelf -Ws /lib/libc.so.6 > /tmp/4; for i in `grep '[^@]@[^@]' /tmp/4 | awk '{print $NF}' | sort -u | sed '/GLIBC_PRIVATE/d;s/@.*//;/^__pthread/d' | grep '\(pthread\|sem\)_'`; do grep -q $i@@ /tmp/4 && grep -q COMMON_INTERCEPT_FUNCTION'('$i')' sanitizer_common/* && echo $i; done | sort -u
pthread_attr_getaffinity_np
pthread_attr_getguardsize
pthread_attr_getstack
pthread_attr_getstacksize
pthread_barrierattr_getpshared
pthread_condattr_getclock
pthread_condattr_getpshared
pthread_getaffinity_np
pthread_getcpuclockid
pthread_getname_np
pthread_mutexattr_getprioceiling
pthread_mutexattr_getprotocol
pthread_mutexattr_getpshared
pthread_mutexattr_getrobust
pthread_mutexattr_gettype
pthread_rwlockattr_getkind_np
pthread_rwlockattr_getpshared
pthread_setname_np
pthread_sigmask
sem_destroy
sem_getvalue
sem_init
sem_open
sem_post
sem_timedwait
sem_trywait
sem_unlink
sem_wait

is what I've used. For some symbols even on x86_64 I can't see how the interception can currently work, say pthread_attr_getaffinity_np had just 2 arguments in 2.3.3 and only got 3 in 2.3.4, so when dlsym resolves to the oldest one, it will just crash. Note, for x86_64 running the above command with /lib64/ instead of /lib results in similar list, but for some symbols it isn't that urgent, e.g. some GLIBC_2.34 symbols are just the same as 2.3.4 ones and the newer symver was probably added just because it now handles some new flags or something similar.

GeekOffTheStreet commented 10 months ago

I'm seeing a similar issue using Ubuntu 22.04 with gcc-12 and clang-17 packages when using asan on multilib (32-bit builds). sem_timedwait will never wakeup. 32-bit builds without asan are fine and building natively for x86_64 with asan also works.

Couple traces showing mix of old and new:

frame #0: 0xf7fc5129 [vdso]`__kernel_vsyscall + 9
frame #1: 0xf7a23366 libc.so.6`__libc_do_syscall at libc-do-syscall.S:41
frame #2: 0xf7990e81 libc.so.6`__futex_abstimed_wait_common at futex-internal.c:40:12
frame #3: 0xf7990e40 libc.so.6`__futex_abstimed_wait_common(futex_word=0xf6a043a4, expected=1, clockid=<unavailable>, abstime=0xf72e8cbc, private=0, cancel=true) at futex-internal.c:99:11
frame #4: 0xf7990fff libc.so.6`__GI___futex_abstimed_wait_cancelable64(futex_word=<unavailable>, expected=<unavailable>, clockid=<unavailable>, abstime=<no summary available>, private=<no summary available>) at futex-internal.c:139:10 [artificial]
frame #5: 0xf799d031 libc.so.6`do_futex_wait(sem=<unavailable>, abstime=<unavailable>, clockid=<unavailable>) at sem_waitcommon.c:116:9
frame #6: 0xf799d0d9 libc.so.6`__new_sem_wait_slow64(sem=0xf6a043a4, abstime=0xf72e8cbc, clockid=<unavailable>) at sem_waitcommon.c:284:14
frame #7: 0xf799d213 libc.so.6`___sem_timedwait [inlined] ___sem_timedwait64(abstime=0xf72e8cbc, sem=0xf6a043a4) at sem_timedwait.c:40:12
frame #8: 0xf799d208 libc.so.6`___sem_timedwait [inlined] ___sem_timedwait64(abstime=0xf72e8cbc, sem=0xf6a043a4) at sem_timedwait.c:26:1
frame #9: 0xf799d208 libc.so.6`___sem_timedwait(sem=0xf6a043a4, abstime=0xf7230890) at sem_timedwait.c:55:10
frame #10: 0x56aee191 ut_orca_mt`__interceptor_sem_timedwait + 145

and:

  * frame #0: 0xf7fc5129 [vdso]`__kernel_vsyscall + 9
    frame #1: 0xf7a8249c libc.so.6`__old_sem_wait(sem=0xf6a04054) at sem_wait.c:65:13
    frame #2: 0x56aee0b2 ut_orca_mt`__interceptor_sem_wait + 50

See also this other report on SO: https://stackoverflow.com/questions/75005217/linux-32bit-compiled-sem-timedwait-example-with-small-mod-fails-on-64-bit-wh