pszostek / cp

C. profiler
1 stars 0 forks source link

BLOCKER: LBR: streams from the kernel pass through blocks with calls (>30% of streams are wrong!) #18

Open xs-exp opened 8 years ago

xs-exp commented 8 years ago

The execution stream proceeds from pair 15 to pair 0 Target 10 is ffffffff81163b84, Source 9 ffffffff81163c4b Blocks inbetween contain calls but are not in the LBR stack

LBR dump straight from perf -D:

.....  0: ffffffff81172eb4 -> ffffffff81172e53
.....  1: ffffffff81172e51 -> ffffffff81172e90
.....  2: ffffffff8115a544 -> ffffffff81172e40
.....  3: ffffffff8115a41a -> ffffffff8115a543
.....  4: ffffffff8115a53e -> ffffffff8115a3f0
.....  5: ffffffff81172e3b -> ffffffff8115a510
.....  6: ffffffff81163c85 -> ffffffff81172e00
.....  7: ffffffff815fc3eb -> ffffffff81163c50
.....  8: ffffffff815fc3da -> ffffffff815fc3ea
.....  9: ffffffff81163c4b -> ffffffff815fc3c0
..... 10: ffffffff811a1edb -> ffffffff81163b84
..... 11: ffffffff811a1e9f -> ffffffff811a1eda
..... 12: ffffffff8119ed19 -> ffffffff811a1e90
..... 13: ffffffff8119e2c2 -> ffffffff8119ed04
..... 14: ffffffff8119ecff -> ffffffff8119e270
..... 15: ffffffff8119a69d -> ffffffff8119ecf4

objdump:

ffffffff81163b84:       85 c0                   test   %eax,%eax
ffffffff81163b86:       4c 8b 45 c0             mov    -0x40(%rbp),%r8
ffffffff81163b8a:       0f 85 58 01 00 00       jne    0xffffffff81163ce8
ffffffff81163b90:       48 8b 43 48             mov    0x48(%rbx),%rax
ffffffff81163b94:       49 b9 00 00 00 00 00    mov    $0x160000000000,%r9
ffffffff81163b9b:       16 00 00
ffffffff81163b9e:       48 ba b7 6d db b6 6d    mov    $0x6db6db6db6db6db7,%rdx
ffffffff81163ba5:       db b6 6d
ffffffff81163ba8:       48 89 c7                mov    %rax,%rdi
ffffffff81163bab:       48 23 3d 6e d2 ba 00    and    0xbad26e(%rip),%rdi        # 0xffffffff81d10e20
ffffffff81163bb2:       a8 01                   test   $0x1,%al
ffffffff81163bb4:       48 0f 44 f8             cmove  %rax,%rdi
ffffffff81163bb8:       4b 8d 04 08             lea    (%r8,%r9,1),%rax
ffffffff81163bbc:       48 c1 f8 03             sar    $0x3,%rax
ffffffff81163bc0:       48 0f af c2             imul   %rdx,%rax
ffffffff81163bc4:       48 c1 e0 0c             shl    $0xc,%rax
ffffffff81163bc8:       48 09 c7                or     %rax,%rdi
ffffffff81163bcb:       ff 14 25 70 40 c2 81    callq  *0xffffffff81c24070
ffffffff81163bd2:       49 89 c7                mov    %rax,%r15
ffffffff81163bd5:       48 83 c8 42             or     $0x42,%rax
ffffffff81163bd9:       f6 43 50 02             testb  $0x2,0x50(%rbx)
ffffffff81163bdd:       49 8b 3c 24             mov    (%r12),%rdi
ffffffff81163be1:       4c 0f 45 f8             cmovne %rax,%r15
ffffffff81163be5:       ff 14 25 90 40 c2 81    callq  *0xffffffff81c24090
ffffffff81163bec:       48 b9 00 f0 ff ff ff    mov    $0x3ffffffff000,%rcx
ffffffff81163bf3:       3f 00 00
ffffffff81163bf6:       48 21 c8                and    %rcx,%rax
ffffffff81163bf9:       48 c1 e8 0c             shr    $0xc,%rax
ffffffff81163bfd:       48 8d 14 c5 00 00 00    lea    0x0(,%rax,8),%rdx
ffffffff81163c04:       00
ffffffff81163c05:       48 c1 e0 06             shl    $0x6,%rax
ffffffff81163c09:       48 29 d0                sub    %rdx,%rax
ffffffff81163c0c:       4c 29 c8                sub    %r9,%rax
ffffffff81163c0f:       48 83 c0 30             add    $0x30,%rax
ffffffff81163c13:       48 89 45 c8             mov    %rax,-0x38(%rbp)
ffffffff81163c17:       49 8b 3c 24             mov    (%r12),%rdi
ffffffff81163c1b:       ff 14 25 90 40 c2 81    callq  *0xffffffff81c24090
ffffffff81163c22:       4c 89 ea                mov    %r13,%rdx
ffffffff81163c25:       49 bc 00 00 00 00 00    mov    $0xffff880000000000,%r12
ffffffff81163c2c:       88 ff ff
ffffffff81163c2f:       48 21 c8                and    %rcx,%rax
ffffffff81163c32:       48 c1 ea 09             shr    $0x9,%rdx
ffffffff81163c36:       48 8b 7d c8             mov    -0x38(%rbp),%rdi
ffffffff81163c3a:       4c 89 45 c0             mov    %r8,-0x40(%rbp)
ffffffff81163c3e:       81 e2 f8 0f 00 00       and    $0xff8,%edx
ffffffff81163c44:       4c 01 e2                add    %r12,%rdx
ffffffff81163c47:       4c 8d 24 02             lea    (%rdx,%rax,1),%r12
ffffffff81163c4b:       e8 70 87 49 00          callq  0xffffffff815fc3c0
xs-exp commented 8 years ago

Workaround: the analyzer looks for unconditional branches and stops accounting when one is encountered in the stream (+ a warning is dumped)

xs-exp commented 8 years ago

Today: Out of 1.47 mln streams, 430k were incorrect

xs-exp commented 8 years ago

Debugging with AY, DL, SE as of 5 Sep 2016