namhyung / uftrace

Function graph tracer for C/C++/Rust/Python
https://uftrace.github.io/slide/
GNU General Public License v2.0
3.04k stars 473 forks source link

clang compiled uftrace cannot record gcc compiled `t-dwaf1` with args #767

Open honggyukim opened 5 years ago

honggyukim commented 5 years ago

Here is the build sequence.

$ CC=clang ./configure

$ make

$ clang -pg -g tests/s-dwarf1.c -o t-dwarf1.clang
$ gcc   -pg -g tests/s-dwarf1.c -o t-dwarf1.gcc

clang compiled uftrace can record t-dwarf1.clang with a problem.

$ ./uftrace record -L. -a t-dwarf1.clang

$ ./uftrace replay
# DURATION     TID     FUNCTION
   2.068 us [ 15269] | __monstartup();
   0.588 us [ 15269] | __cxa_atexit();
            [ 15269] | main() {
   2.370 us [ 15269] |   foo(-1, 32768) = 32767;
            [ 15269] |   bar("string argument", 'c', 0.000000, &null) {
   0.398 us [ 15269] |     null("NULL");
 209.638 us [ 15269] |   } = 0.000000; /* bar */
            [ 15269] |   baz(ONE) {
   0.229 us [ 15269] |     foo(1, -1) = 0;
   0.983 us [ 15269] |   } /* baz */
 215.485 us [ 15269] | } = 0; /* main */

However, it gets segfault when recording t-dwarf1.gcc with args.

$ ./uftrace record -L. -a t-dwarf1.gcc
child terminated by signal: 11: Segmentation fault

$ ./uftrace replay
# DURATION     TID     FUNCTION
   1.591 us [ 15310] | __monstartup();
   0.661 us [ 15310] | __cxa_atexit();
            [ 15310] | main() {
   1.797 us [ 15310] |   foo(-1, 32768) = 32767;
            [ 15310] |   bar("string argument", 'c', 0.000000, &null) {
   0.367 us [ 15310] |     null("NULL");
 216.886 us [ 15310] |   } = 0.000000; /* bar */
            [ 15310] |   baz(ONE) {
   0.202 us [ 15310] |     foo(1, -1);
 326.641 us [ 15310] |     /* linux:schedule */
 173.762 ms [ 15310] |     /* linux:schedule */
            [ 15310] |     /* linux:task-exit */

uftrace stopped tracing with remaining functions
================================================
task: 15310
[1] baz
[0] main

Here is a backtrace from gdb.

(gdb) bt
#0  save_retval (rstack=0x63ca00, mtdp=<optimized out>, retval=<optimized out>) at /home/honggyu/work/uftrace/libmcount/record.c:527
#1  record_trace_data (mtdp=<optimized out>, mrstack=<optimized out>, retval=<optimized out>) at /home/honggyu/work/uftrace/libmcount/record.c:1054
#2  0x00007ffff7ba6677 in mcount_exit_filter_record (mtdp=0x7ffff7fc6728, rstack=0x63ca00, retval=<optimized out>) at /home/honggyu/work/uftrace/libmcount/mcount.c:1121
#3  0x00007ffff7ba6aa7 in mcount_exit (retval=0x7fffffffdf08) at /home/honggyu/work/uftrace/libmcount/mcount.c:1285
#4  0x00007ffff7bc474a in mcount_return () at mcount.S:98
#5  0x00007ffff7bc472f in mcount () at mcount.S:80
#6  0x0000000100000000 in ?? ()
#7  0x00007fffffffdf60 in ?? ()
#8  0x0000000000400710 in main () at tests/s-dwarf1.c:29
Backtrace stopped: frame did not save the PC
 521│ void save_retval(struct mcount_thread_data *mtdp,
 522│                  struct mcount_ret_stack *rstack, long *retval)
 523│ {
 524│         struct list_head *args_spec = rstack->pargs;
 525│         void *argbuf = get_argbuf(mtdp, rstack);
 526│         unsigned size;
 527├───────> struct mcount_arg_context ctx = {
 528│                 .retval = retval,
 529│                 .regions = &mtdp->mem_regions,
 530│                 .arch = &mtdp->arch,
 531│         };
 532│
 533│         size = save_to_argbuf(argbuf, args_spec, &ctx);
 534│         if (size == -1U) {
 535│                 pr_warn("retval data is too big\n");
 536│                 rstack->flags &= ~MCOUNT_FL_RETVAL;
 537│                 return;
 538│         }
 539│
 540│         *(uint32_t *)argbuf = size;
 541│ }

uftrace/libmcount/record.c
honggyukim commented 5 years ago

gcc compiled uftrace can record both binaries properly.

$ ./uftrace record -L. -a t-dwarf1.gcc

$ ./uftrace record -L. -a t-dwarf1.clang

$ ./uftrace replay
# DURATION     TID     FUNCTION
   0.921 us [ 16125] | __monstartup();
   0.567 us [ 16125] | __cxa_atexit();
            [ 16125] | main() {
   0.691 us [ 16125] |   foo(-1, 32768) = 32767;
            [ 16125] |   bar("string argument", 'c', 0.000010, &null) {
   0.303 us [ 16125] |     null("NULL");
 207.820 us [ 16125] |   } = -1.000000; /* bar */
            [ 16125] |   baz(ONE) {
   0.218 us [ 16125] |     foo(1, -1) = 0;
   0.953 us [ 16125] |   } /* baz */
 211.331 us [ 16125] | } = 0; /* main */
honggyukim commented 5 years ago

Hmm.. I found one more problem on clang compiled uftrace.

$ ./uftrace record -L. -a t-dwarf1.clang

$ ./uftrace replay
# DURATION     TID     FUNCTION
   2.068 us [ 15269] | __monstartup();
   0.588 us [ 15269] | __cxa_atexit();
            [ 15269] | main() {
   2.370 us [ 15269] |   foo(-1, 32768) = 32767;
            [ 15269] |   bar("string argument", 'c', 0.000000, &null) {
   0.398 us [ 15269] |     null("NULL");
 209.638 us [ 15269] |   } = 0.000000; /* bar */
            [ 15269] |   baz(ONE) {
   0.229 us [ 15269] |     foo(1, -1) = 0;
   0.983 us [ 15269] |   } /* baz */
 215.485 us [ 15269] | } = 0; /* main */

The return value of bar should be -1.000000 but it shows 0.000000, which is incorrect.

$ cat tests/s-dwarf1.c 
    ...
float bar(char *s, char c, double d, void (*fp)(char *s))
{
        fp(NULL);
        return -1.0;
}
    ...
honggyukim commented 5 years ago

We can just ignore clang compiled uftrace, but I think there may be some problems of address range handling that doesn't show the problem.

honggyukim commented 5 years ago

Here is the disassembled output for both binaries. t-dwarf1.gcc

$ objdump -d t-dwarf1.gcc
    ...
000000000040066b <bar>:
  40066b:       55                      push   %rbp
  40066c:       48 89 e5                mov    %rsp,%rbp
  40066f:       48 83 ec 20             sub    $0x20,%rsp
  400673:       e8 58 fe ff ff          callq  4004d0 <mcount@plt>
  400678:       48 89 7d f8             mov    %rdi,-0x8(%rbp)
  40067c:       89 f0                   mov    %esi,%eax
  40067e:       f2 0f 11 45 e8          movsd  %xmm0,-0x18(%rbp)
  400683:       48 89 55 e0             mov    %rdx,-0x20(%rbp)
  400687:       88 45 f4                mov    %al,-0xc(%rbp)
  40068a:       48 8b 45 e0             mov    -0x20(%rbp),%rax
  40068e:       bf 00 00 00 00          mov    $0x0,%edi
  400693:       ff d0                   callq  *%rax
  400695:       f3 0f 10 05 4f 01 00    movss  0x14f(%rip),%xmm0        # 4007ec <_IO_stdin_used+0x14>
  40069c:       00 
  40069d:       c9                      leaveq 
  40069e:       c3                      retq 
    ...

t-dwarf1.clang

$ objdump -d t-dwarf1.clang
    ...
00000000004006a0 <bar>:
  4006a0:       55                      push   %rbp
  4006a1:       48 89 e5                mov    %rsp,%rbp
  4006a4:       48 83 ec 40             sub    $0x40,%rsp
  4006a8:       40 88 f0                mov    %sil,%al
  4006ab:       88 45 df                mov    %al,-0x21(%rbp)
  4006ae:       48 89 55 d0             mov    %rdx,-0x30(%rbp)
  4006b2:       f2 0f 11 45 c8          movsd  %xmm0,-0x38(%rbp)
  4006b7:       48 89 7d c0             mov    %rdi,-0x40(%rbp)
  4006bb:       e8 10 fe ff ff          callq  4004d0 <mcount@plt>
  4006c0:       31 f6                   xor    %esi,%esi
  4006c2:       89 f7                   mov    %esi,%edi
  4006c4:       48 8b 55 c0             mov    -0x40(%rbp),%rdx
  4006c8:       48 89 55 f8             mov    %rdx,-0x8(%rbp)
  4006cc:       8a 45 df                mov    -0x21(%rbp),%al
  4006cf:       88 45 f7                mov    %al,-0x9(%rbp)
  4006d2:       f2 0f 10 45 c8          movsd  -0x38(%rbp),%xmm0
  4006d7:       f2 0f 11 45 e8          movsd  %xmm0,-0x18(%rbp)
  4006dc:       48 8b 4d d0             mov    -0x30(%rbp),%rcx
  4006e0:       48 89 4d e0             mov    %rcx,-0x20(%rbp)
  4006e4:       ff 55 e0                callq  *-0x20(%rbp)
  4006e7:       f3 0f 10 04 25 5c 08    movss  0x40085c,%xmm0
  4006ee:       40 00 
  4006f0:       48 83 c4 40             add    $0x40,%rsp
  4006f4:       5d                      pop    %rbp
  4006f5:       c3                      retq   
  4006f6:       66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  4006fd:       00 00 00
    ...
honggyukim commented 5 years ago

I just compared the t-dwarf1.clang.dbg and t-dwarf1.gcc.dbg, but there's no difference at all except for the address location.