namhyung / uftrace

Function graph tracer for C/C++/Rust/Python
https://uftrace.github.io/slide/
GNU General Public License v2.0
3.06k stars 474 forks source link

segfault when dealing with libunwind in ARM #722

Open honggyukim opened 5 years ago

honggyukim commented 5 years ago

Here is the code example.

$ cat backtrace.cc
#define __STDC_FORMAT_MACROS
#include <inttypes.h>

#define UNW_LOCAL_ONLY
#include <libunwind.h>

#include <cxxabi.h>

#include <stdio.h>
#include <stdlib.h>

void backtrace()
{
  unw_cursor_t cursor;
  unw_context_t context;

  unw_getcontext(&context);
  unw_init_local(&cursor, &context);

  int n = 0;
  while ( unw_step(&cursor) ) {
    unw_word_t ip, sp, off;

    unw_get_reg(&cursor, UNW_REG_IP, &ip);
    unw_get_reg(&cursor, UNW_REG_SP, &sp);

    char symbol[256] = {"<unknown>"};
    char *name = symbol;

    if ( !unw_get_proc_name(&cursor, symbol, sizeof(symbol), &off) ) {
      int status;
      if ( (name = abi::__cxa_demangle(symbol, NULL, NULL, &status)) == 0 )
        name = symbol;
    }

    printf("#%-2d 0x%016" PRIxPTR " sp=0x%016" PRIxPTR " %s + 0x%" PRIxPTR "\n",
        ++n,
        static_cast<uintptr_t>(ip),
        static_cast<uintptr_t>(sp),
        name,
        static_cast<uintptr_t>(off));

    if ( name != symbol )
      free(name);
  }
}

int fac(int n)
{
  if ( n == 0 ) {
    backtrace();
    return 1;
  } else {
    return n * fac(n - 1);
  }
}

int main()
{
  fac(10);
  return 0;
}

This code can be traced properly as follows in x86_64:

$ g++ -pg -g backtrace.cc -lunwind

$ uftrace record ./a.out 
#1  0x00007fe9c3f51358 sp=0x00007ffe60c1f978  + 0x0
#2  0x0000000000400ca9 sp=0x00007ffe60c1f988 fac(int) + 0x1b
#3  0x0000000000400cbd sp=0x00007ffe60c20290 fac(int) + 0x2f
#4  0x0000000000400cbd sp=0x00007ffe60c202b0 fac(int) + 0x2f
#5  0x0000000000400cbd sp=0x00007ffe60c202d0 fac(int) + 0x2f
#6  0x0000000000400cbd sp=0x00007ffe60c202f0 fac(int) + 0x2f
#7  0x0000000000400cbd sp=0x00007ffe60c20310 fac(int) + 0x2f
#8  0x0000000000400cbd sp=0x00007ffe60c20330 fac(int) + 0x2f
#9  0x0000000000400cbd sp=0x00007ffe60c20350 fac(int) + 0x2f
#10 0x0000000000400cbd sp=0x00007ffe60c20370 fac(int) + 0x2f
#11 0x0000000000400cbd sp=0x00007ffe60c20390 fac(int) + 0x2f
#12 0x0000000000400cbd sp=0x00007ffe60c203b0 fac(int) + 0x2f
#13 0x0000000000400cd6 sp=0x00007ffe60c203d0 main + 0x13
#14 0x00007fe9c31af830 sp=0x00007ffe60c203e0 __libc_start_main + 0xf0
#15 0x0000000000400999 sp=0x00007ffe60c204a0 _start + 0x29
#16 0x0000000000000000 sp=0x00007ffe60c204a8  + 0x29

$ uftrace replay
# DURATION     TID     FUNCTION
            [ 11162] | main() {
            [ 11162] |   fac() {
            [ 11162] |     fac() {
            [ 11162] |       fac() {
            [ 11162] |         fac() {
            [ 11162] |           fac() {
            [ 11162] |             fac() {
            [ 11162] |               fac() {
            [ 11162] |                 fac() {
            [ 11162] |                   fac() {
            [ 11162] |                     fac() {
            [ 11162] |                       fac() {
            [ 11162] |                         backtrace() {
   0.979 us [ 11162] |                           _Ux86_64_getcontext();
  41.242 us [ 11162] |                           _ULx86_64_init_local();
  10.963 us [ 11162] |                           _ULx86_64_step();
   0.476 us [ 11162] |                           _ULx86_64_get_reg();
   0.194 us [ 11162] |                           _ULx86_64_get_reg();
 149.538 us [ 11162] |                           _ULx86_64_get_proc_name();
  12.200 us [ 11162] |                           printf();
   5.160 us [ 11162] |                           _ULx86_64_step();
   0.105 us [ 11162] |                           _ULx86_64_get_reg();
   0.114 us [ 11162] |                           _ULx86_64_get_reg();
 113.358 us [ 11162] |                           _ULx86_64_get_proc_name();
  16.554 us [ 11162] |                           __cxa_demangle();
...
   5.806 us [ 11162] |                           printf();
   6.242 us [ 11162] |                           _ULx86_64_step();
   0.097 us [ 11162] |                           _ULx86_64_get_reg();
   0.092 us [ 11162] |                           _ULx86_64_get_reg();
 148.340 us [ 11162] |                           _ULx86_64_get_proc_name();
   5.833 us [ 11162] |                           printf();
   5.638 us [ 11162] |                           _ULx86_64_step();
   1.921 ms [ 11162] |                         } /* backtrace */
   1.921 ms [ 11162] |                       } /* fac */
   1.921 ms [ 11162] |                     } /* fac */
   1.922 ms [ 11162] |                   } /* fac */
   1.922 ms [ 11162] |                 } /* fac */
   1.922 ms [ 11162] |               } /* fac */
   1.922 ms [ 11162] |             } /* fac */
   1.922 ms [ 11162] |           } /* fac */
   1.922 ms [ 11162] |         } /* fac */
   1.922 ms [ 11162] |       } /* fac */
   1.923 ms [ 11162] |     } /* fac */
   1.923 ms [ 11162] |   } /* fac */
   1.923 ms [ 11162] | } /* main */

But it gets crashed when tracing in ARM.

root@raspberrypi3:~# uftrace record a.out 
#1  0x0000000076ed18e4 sp=0x000000007ef2f788 __gnu_mcount_nc + 0x34
child terminated by signal: 11: Segmentation fault

root@raspberrypi3:~# uftrace replay
# DURATION     TID     FUNCTION
            [  7272] | main() {
            [  7272] |   fac() {
            [  7272] |     fac() {
            [  7272] |       fac() {
            [  7272] |         fac() {
            [  7272] |           fac() {
            [  7272] |             fac() {
            [  7272] |               fac() {
            [  7272] |                 fac() {
            [  7272] |                   fac() {
            [  7272] |                     fac() {
            [  7272] |                       fac() {
            [  7272] |                         backtrace() {
 504.834 us [  7272] |                           _ULarm_init_local();
 858.993 us [  7272] |                           _ULarm_step();
   5.469 us [  7272] |                           _ULarm_get_reg();
   0.833 us [  7272] |                           _ULarm_get_reg();
   6.354 us [  7272] |                           memset();
 810.765 us [  7272] |                           _ULarm_get_proc_name();
  76.613 us [  7272] |                           __cxa_demangle();
  77.134 us [  7272] |                           printf();
 669.258 us [  7272] |                           /* linux:schedule */
  14.101 ms [  7272] |                           /* linux:schedule */
            [  7272] |                           /* linux:task-exit */

uftrace stopped tracing with remaining functions
================================================
task: 7272
[12] backtrace
[11] fac
[10] fac
[9] fac
[8] fac
[7] fac
[6] fac
[5] fac
[4] fac
[3] fac
[2] fac
[1] fac
[0] main
honggyukim commented 5 years ago

Here is the debug output.

root@raspberrypi3:~# uftrace record --debug-domain mcount:3 a.out
mcount: initializing mcount library
mcount: mcount setup done
mcount: new session started: 943a81e175472a8f: a.out
mcount: preparing shmem buffers: tid = 7316
mcount: <0> enter 10c03                               
mcount:  tr->flags: 0, filter mode, count: [0] 0/0
mcount: <1> enter 10bcd       
    ...
mcount: <14> exit  10808                   
mcount: task 7316 recorded 32 bytes (record count = 2)
mcount: rstack[13] ENTRY 10808                    
mcount: rstack[13] EXIT  10808
mcount: <13> enter 10844                          
mcount:  tr->flags: 0, filter mode, count: [0] 0/0
mcount: <14> exit  10844                          
mcount: task 7316 recorded 32 bytes (record count = 2)
mcount: rstack[13] ENTRY 10844                    
mcount: rstack[13] EXIT  10844
mcount: <13> enter 10814                          
mcount:  tr->flags: 0, filter mode, count: [0] 0/0
#1  0x0000000076f2c2a4 sp=0x000000007ea13768 __gnu_mcount_nc + 0x34
mcount: <14> exit  10814
mcount: task 7316 recorded 32 bytes (record count = 2)
mcount: rstack[13] ENTRY 10814
mcount: rstack[13] EXIT  10814                    
mcount: <13> enter 10820
mcount:  tr->flags: 0, filter mode, count: [0] 0/0
child terminated by signal: 11: Segmentation fault
honggyukim commented 5 years ago

After applying recover trigger, it goes one more step, but also crashes. It needs to find where to apply recover trigger more.

root@raspberrypi3:~# uftrace record -T _ULarm_step@recover ./backtrace 
#1  0x0000000000010bd9 sp=0x000000007efab658 fac(int) + 0x18
#2  0x0000000076fbc2a4 sp=0x000000007efab668 __gnu_mcount_nc + 0x34
child terminated by signal: 11: Segmentation fault

root@raspberrypi3:~# uftrace replay
# DURATION     TID     FUNCTION
            [  9054] | main() {
            [  9054] |   fac() {
            [  9054] |     fac() {
            [  9054] |       fac() {
            [  9054] |         fac() {
            [  9054] |           fac() {
            [  9054] |             fac() {
            [  9054] |               fac() {
            [  9054] |                 fac() {
            [  9054] |                   fac() {
            [  9054] |                     fac() {
            [  9054] |                       fac() {
            [  9054] |                         backtrace() {
 288.905 us [  9054] |                           _ULarm_init_local();
 484.998 us [  9054] |                           _ULarm_step();
   3.542 us [  9054] |                           _ULarm_get_reg();
   0.521 us [  9054] |                           _ULarm_get_reg();
   3.958 us [  9054] |                           memset();
 331.092 us [  9054] |                           _ULarm_get_proc_name();
  87.968 us [  9054] |                           __cxa_demangle();
            [  9054] |                           printf() {
 304.270 us [  9054] |                             /* linux:schedule */
  76.770 us [  9054] |                             /* linux:schedule */
 488.071 us [  9054] |                           } /* printf */
   5.677 us [  9054] |                           free();
   0.625 us [  9054] |                           _ULarm_get_reg();
   0.364 us [  9054] |                           _ULarm_get_reg();
   0.521 us [  9054] |                           memset();
 580.467 us [  9054] |                           _ULarm_get_proc_name();
  12.916 us [  9054] |                           __cxa_demangle();
            [  9054] |                           printf() {
 139.375 us [  9054] |                             /* linux:schedule */
  68.854 us [  9054] |                             /* linux:schedule */
 270.780 us [  9054] |                           } /* printf */
  75.208 us [  9054] |                           /* linux:schedule */
 323.957 us [  9054] |                           /* linux:schedule */
   8.463 ms [  9054] |                           /* linux:schedule */
            [  9054] |                           /* linux:task-exit */

uftrace stopped tracing with remaining functions
================================================
...
honggyukim commented 5 years ago

In #721, the first prebuilt binary trace backtrace log was as follows:

task: 3632
[10] _Uarm_step
[9] blink::ThreadHeap::enterSafePoint
[8] blink::ThreadState::enterSafePoint
[7] blink::ThreadState::addInterruptor
[6] blink::Platform::initialize
[5] blink::initialize
[4] content::RenderThreadImpl::InitializeWebKit
[3] content::RenderThreadImpl::Init
[2] content::RenderThreadImpl::RenderThreadImpl
[1] content::RenderThreadImpl::Create
[0] content::ContentMain

It also shows _Uarm_step and it seems to be related to fail when trying to get some backtrace.

void ThreadHeap::enterSafePoint(ThreadState* threadState)
{
    m_safePointBarrier->enterSafePoint(threadState);
}

void SafePointBarrier::enterSafePoint(ThreadState* state)
{
    ASSERT(!state->sweepForbidden());
    pushAllRegisters(this, state, enterSafePointAfterPushRegisters);
}
$ cat third_party/WebKit/Source/platform/heap/asm/SaveRegisters_arm.S
/*
 * typedef void (*PushAllRegistersCallback)(SafePointBarrier*, ThreadState*, intptr_t*);
 * extern "C" void pushAllRegisters(SafePointBarrier*, ThreadState*,
 *                                  PushAllRegistersCallback)
 */

.type pushAllRegisters, %function
.global pushAllRegisters
.hidden pushAllRegisters
#ifdef __thumb__
/* In THUMB Mode jump to ARM stub via bx to ensure CPU mode switch.
 * FIXME: This trampoline is provided to workaround bugs in
 * the THUMB/ARM interworking that appear in the component build.
 * When these issues are resolved this stub can be removed.
 */
.align 2
.code 16
.thumb_func
pushAllRegisters:
        adr r3, pushAllRegistersARM
        bx r3

.type pushAllRegistersARM, %function
.hidden pushAllRegistersARM
.align 4
.code 32
pushAllRegistersARM:
#else
/* ARM Mode */
.align 4
.code 32
pushAllRegisters:
#endif
        /* Push all callee-saved registers and save return address. */
        push {r4-r11, lr}
        /* Pass the two first arguments unchanged (r0, r1)
         * and pass the stack pointer after pushing callee-saved
         * registers to the callback function.
         */
        mov r3, r2
        mov r2, sp .       /* stackEnd */
        blx r3
        /* Discard all the registers, and pop lr into pc which returns
         * and switches mode if needed.
         */
        add sp, sp, #32
        pop {pc}
class SafePointBarrier final {
        ...
    static void enterSafePointAfterPushRegisters(SafePointBarrier* barrier,
                                                 ThreadState* state,
                                                 intptr_t* stackEnd)    // r2: stackEnd
    {
        barrier->doEnterSafePoint(state, stackEnd);
    }
        ...
};

void SafePointBarrier::doEnterSafePoint(ThreadState* state, intptr_t* stackEnd)
{
    state->recordStackEnd(stackEnd);
    state->copyStackUntilSafePointScope();
    if (!atomicDecrement(&m_unparkedThreadCount)) {
        MutexLocker locker(m_mutex);
        m_parked.signal(); // Safe point reached.
    }
}
honggyukim commented 5 years ago

Hi @namhyung, do you think it's tricky to apply auto recover to ARM?

honggyukim commented 5 years ago

After simply applying the following change, backtrace example works fine in ARM.

diff --git a/arch/arm/mcount-arch.h b/arch/arm/mcount-arch.h
index 3f48a89a..7269d962 100644
--- a/arch/arm/mcount-arch.h
+++ b/arch/arm/mcount-arch.h
@@ -32,4 +32,7 @@ unsigned long * mcount_arch_parent_location(struct symtabs *symtabs,
 #define ARCH_PLT0_SIZE  20
 #define ARCH_PLTHOOK_ADDR_OFFSET  0

+#define ARCH_SUPPORT_AUTO_RECOVER  1
+#define ARCH_CAN_RESTORE_PLTHOOK   1
+
 #endif /* MCOUNT_ARCH_H */
root@raspberrypi3:~# uftrace record ./backtrace
#1  0x0000000000010bd9 sp=0x000000007e899678 fac(int) + 0x18
#2  0x0000000000010be7 sp=0x000000007e899688 fac(int) + 0x26
#3  0x0000000000010be7 sp=0x000000007e899698 fac(int) + 0x26
#4  0x0000000000010be7 sp=0x000000007e8996a8 fac(int) + 0x26
#5  0x0000000000010be7 sp=0x000000007e8996b8 fac(int) + 0x26
#6  0x0000000000010be7 sp=0x000000007e8996c8 fac(int) + 0x26
#7  0x0000000000010be7 sp=0x000000007e8996d8 fac(int) + 0x26
#8  0x0000000000010be7 sp=0x000000007e8996e8 fac(int) + 0x26
#9  0x0000000000010be7 sp=0x000000007e8996f8 fac(int) + 0x26
#10 0x0000000000010be7 sp=0x000000007e899708 fac(int) + 0x26
#11 0x0000000000010be7 sp=0x000000007e899718 fac(int) + 0x26
#12 0x0000000000010c09 sp=0x000000007e899728 main + 0x10
#13 0x0000000076cdf680 sp=0x000000007e899730 __libc_start_main + 0x118
#14 0x00000000000108cd sp=0x000000007e899878 _start + 0x34

root@raspberrypi3:~# uftrace replay
# DURATION     TID     FUNCTION
            [  9951] | main() {
            [  9951] |   fac() {
            [  9951] |     fac() {
            [  9951] |       fac() {
            [  9951] |         fac() {
            [  9951] |           fac() {
            [  9951] |             fac() {
            [  9951] |               fac() {
            [  9951] |                 fac() {
            [  9951] |                   fac() {
            [  9951] |                     fac() {
            [  9951] |                       fac() {
            [  9951] |                         backtrace() {
 445.050 us [  9951] |                           _ULarm_init_local();
 835.829 us [  9951] |                           _ULarm_step();
   5.781 us [  9951] |                           _ULarm_get_reg();
   1.042 us [  9951] |                           _ULarm_get_reg();
   8.489 us [  9951] |                           memset();
 594.997 us [  9951] |                           _ULarm_get_proc_name();
  70.937 us [  9951] |                           __cxa_demangle();
 803.851 us [  9951] |                           printf();
  11.250 us [  9951] |                           free();
  36.770 us [  9951] |                           _ULarm_step();
   0.885 us [  9951] |                           _ULarm_get_reg();
   0.833 us [  9951] |                           _ULarm_get_reg();
   1.093 us [  9951] |                           memset();
 664.007 us [  9951] |                           _ULarm_get_proc_name();
  10.624 us [  9951] |                           __cxa_demangle();
 497.394 us [  9951] |                           printf();
...
 811.402 us [  9951] |                           _ULarm_get_proc_name();
   2.865 us [  9951] |                           __cxa_demangle();
 437.706 us [  9951] |                           printf();
 426.196 us [  9951] |                           _ULarm_step();
   1.094 us [  9951] |                           _ULarm_get_reg();
   0.989 us [  9951] |                           _ULarm_get_reg();
   1.771 us [  9951] |                           memset();
 667.914 us [  9951] |                           _ULarm_get_proc_name();
   3.125 us [  9951] |                           __cxa_demangle();
 528.748 us [  9951] |                           printf();
  32.344 us [  9951] |                           _ULarm_step();
  18.304 ms [  9951] |                         } /* backtrace */
  18.326 ms [  9951] |                       } /* fac */
  18.329 ms [  9951] |                     } /* fac */
  18.331 ms [  9951] |                   } /* fac */
  18.333 ms [  9951] |                 } /* fac */
  18.335 ms [  9951] |               } /* fac */
  18.337 ms [  9951] |             } /* fac */
  18.339 ms [  9951] |           } /* fac */
  18.341 ms [  9951] |         } /* fac */
  18.343 ms [  9951] |       } /* fac */
  18.346 ms [  9951] |     } /* fac */
  18.348 ms [  9951] |   } /* fac */
  18.353 ms [  9951] | } /* main */

However, it gets crashed when tracing other bigger programs.

honggyukim commented 5 years ago

@namhyung I don't know if you agree to put critical tag to here but it's not possible to apply uftrace to chrome without fixing or avoiding this problem. The solution could be an auto recover or just a workaround. I have to find a way to avoid this to move forward.

honggyukim commented 5 years ago

Here is a related commit.

commit 0ba73d0b324d82946095d898c9fadb977eeb7853 Author: Namhyung Kim namhyung@gmail.com CommitDate: Wed Jan 16 19:54:44 2019 +0900

    mcount: Add ARCH_CAN_RESTORE_PLTHOOK logic

    Some arch like ARM/AArch64 call PLT functions before setting return
    address in the stack as they have the linker register to save it.  So
    plt_hooker() sees an intermediate address it sets up during the hooking
    and the address cannot be used to restore real return address after
    plt_hooker() returns.

    That means it cannot restore all return stacks (rstack) from the finish
    trigger and needs to keep up the rstack for PLT functions to be returned
    to the real callers after plthook_exit().

    Signed-off-by: Namhyung Kim <namhyung@gmail.com>
diff --git a/libmcount/misc.c b/libmcount/misc.c
--- a/libmcount/misc.c
+++ b/libmcount/misc.c
@@ -130,16 +141,21 @@
 /* restore saved original return address */
 void mcount_rstack_restore(struct mcount_thread_data *mtdp)
 {
        int idx;
+       struct mcount_ret_stack *rstack;

        /* reverse order due to tail calls */
        for (idx = mtdp->idx - 1; idx >= 0; idx--) {
-               unsigned long parent_ip = mtdp->rstack[idx].parent_ip;
+               rstack = &mtdp->rstack[idx];

-               if (parent_ip == (unsigned long)mcount_return ||
-                   parent_ip == (unsigned long)plthook_return)
+               if (rstack->parent_ip == (unsigned long)mcount_return ||
+                   rstack->parent_ip == (unsigned long)plthook_return)
+                       continue;
+
+               if (!ARCH_CAN_RESTORE_PLTHOOK &&
+                   rstack->dyn_idx != MCOUNT_INVALID_DYNIDX)
                        continue;

-               *mtdp->rstack[idx].parent_loc = parent_ip;
+               *rstack->parent_loc = rstack->parent_ip;
        }
 }
honggyukim commented 5 years ago

@namhyung According to the description above, we cannot restore return address only for PLT functions. Is it possible to use auto recover when --no-libcall is given in ARM/AArch64?

honggyukim commented 5 years ago

In addition to this, it seems that we can get the return address for known problematic functions such as backtrace, and exception handling functions.

Is it possible to handle them in a limited way after getting the return address? It looks as follows:

diff --git a/libmcount/wrap.c b/libmcount/wrap.c
index 291c7a66..3ef62a44 100644
--- a/libmcount/wrap.c
+++ b/libmcount/wrap.c
@@ -260,6 +260,10 @@ __visible_default int backtrace(void **buffer, int sz)
 {
        int ret;
        struct mcount_thread_data *mtdp;
+#if defined(__arm__) || defined(__thumb__)
+       register unsigned long *lr asm("lr");
+       /* "lr" register is to be used as a return address */
+#endif

        if (unlikely(real_backtrace == NULL))
                mcount_hook_functions();
namhyung commented 5 years ago

Due to the link register in ARM it's a diiferent situation to x86 which always use stack for saving return address. I need to think about what we can do safely..

honggyukim commented 5 years ago

It seems that the case you're worrying about is only for library function calls because ARM pushes lr register to stack before calling general mcount functions. Is it right?

namhyung commented 5 years ago

I also worry about the mcount case but not sure.. Currently I don't have a box to test unfortunately.

honggyukim commented 5 years ago

It doesn't crash on AArch64 machine but it didn't work as expected.

$ file a.out 
a.out: ELF 64-bit LSB shared object, ARM aarch64, version 1 (SYSV), dynamically linked, interpreter /lib/ld-linux-aarch64.so.1, for GNU/Linux 3.7.0, BuildID[sha1]=a09e61748782844a670e10df2d2497cbf7f8027c, with debug_info, not stripped

$ ./a.out
#1  0x0000aaaad098ddc0 sp=0x0000ffffd6c2aca0 fac(int) + 0x28
#2  0x0000aaaad098ddd4 sp=0x0000ffffd6c2acc0 fac(int) + 0x3c
#3  0x0000aaaad098ddd4 sp=0x0000ffffd6c2ace0 fac(int) + 0x3c
#4  0x0000aaaad098ddd4 sp=0x0000ffffd6c2ad00 fac(int) + 0x3c
#5  0x0000aaaad098ddd4 sp=0x0000ffffd6c2ad20 fac(int) + 0x3c
#6  0x0000aaaad098ddd4 sp=0x0000ffffd6c2ad40 fac(int) + 0x3c
#7  0x0000aaaad098ddd4 sp=0x0000ffffd6c2ad60 fac(int) + 0x3c
#8  0x0000aaaad098ddd4 sp=0x0000ffffd6c2ad80 fac(int) + 0x3c
#9  0x0000aaaad098ddd4 sp=0x0000ffffd6c2ada0 fac(int) + 0x3c
#10 0x0000aaaad098ddd4 sp=0x0000ffffd6c2adc0 fac(int) + 0x3c
#11 0x0000aaaad098ddd4 sp=0x0000ffffd6c2ade0 fac(int) + 0x3c
#12 0x0000aaaad098de00 sp=0x0000ffffd6c2ae00 main + 0x18
#13 0x0000ffff8a412d24 sp=0x0000ffffd6c2ae10 __libc_start_main + 0xe4
#14 0x0000aaaad098da74 sp=0x0000ffffd6c2af50 _start + 0x34

$ uftrace record a.out
#1  0x0000ffff805a3784 sp=0x0000ffffc548e6c0 _mcount + 0x44

$ uftrace replay
# DURATION     TID     FUNCTION
            [ 28388] | main() {
            [ 28388] |   fac() {
            [ 28388] |     fac() {
            [ 28388] |       fac() {
            [ 28388] |         fac() {
            [ 28388] |           fac() {
            [ 28388] |             fac() {
            [ 28388] |               fac() {
            [ 28388] |                 fac() {
            [ 28388] |                   fac() {
            [ 28388] |                     fac() {
            [ 28388] |                       fac() {
            [ 28388] |                         backtrace() {
 880.363 us [ 28388] |                           _ULaarch64_init_local();
 802.081 us [ 28388] |                           _ULaarch64_step();
   3.645 us [ 28388] |                           _ULaarch64_get_reg();
   1.823 us [ 28388] |                           _ULaarch64_get_reg();
   4.219 us [ 28388] |                           memset();
   1.221 ms [ 28388] |                           _ULaarch64_get_proc_name();
  67.708 us [ 28388] |                           __cxa_demangle();
  79.791 us [ 28388] |                           printf();
 129.375 us [ 28388] |                           _ULaarch64_step();
   3.230 ms [ 28388] |                         } /* backtrace */
   3.244 ms [ 28388] |                       } /* fac */
   3.246 ms [ 28388] |                     } /* fac */
   3.247 ms [ 28388] |                   } /* fac */
   3.248 ms [ 28388] |                 } /* fac */
   3.249 ms [ 28388] |               } /* fac */
   3.250 ms [ 28388] |             } /* fac */
   3.251 ms [ 28388] |           } /* fac */
   3.252 ms [ 28388] |         } /* fac */
   3.253 ms [ 28388] |       } /* fac */
   3.256 ms [ 28388] |     } /* fac */
   3.258 ms [ 28388] |   } /* fac */
   3.260 ms [ 28388] | } /* main */
honggyukim commented 3 years ago

Same program in C version.

#define UNW_LOCAL_ONLY
#include <libunwind.h>

#include <stdio.h>
#include <stdlib.h>

void backtrace()
{
  unw_cursor_t cursor;
  unw_context_t context;

  unw_getcontext(&context);
  unw_init_local(&cursor, &context);

  int n = 0;
  while ( unw_step(&cursor) ) {
    unw_word_t ip, sp, off;

    unw_get_reg(&cursor, UNW_REG_IP, &ip);
    unw_get_reg(&cursor, UNW_REG_SP, &sp);

    char symbol[256] = {"<unknown>"};
    char *name = symbol;

    if ( !unw_get_proc_name(&cursor, symbol, sizeof(symbol), &off) ) {
      name = symbol;
    }

    printf("#%-2d 0x%016" PRIxPTR " sp=0x%016" PRIxPTR " %s + 0x%" PRIxPTR "\n",
        ++n,
        (uintptr_t)(ip),
        (uintptr_t)(sp),
        name,
        (uintptr_t)(off));

    if ( name != symbol )
      free(name);
  }
}

int fac(int n)
{
  if ( n == 0 ) {
    backtrace();
    return 1;
  } else {
    return n * fac(n - 1);
  }
}

int main()
{
  fac(10);
  return 0;
}
namhyung commented 3 years ago

How about using --estimate-return?

honggyukim commented 3 years ago

Sorry, I don't have an ARM 32-bit board to test it.