Vector35 / binaryninja-api

Public API, examples, documentation and issues for Binary Ninja
https://binary.ninja/
MIT License
936 stars 213 forks source link

Find in HLIL miss results if the address of the HLIL instructions are not monotonic #6144

Open bb33bb opened 4 days ago

bb33bb commented 4 days ago

Binary Ninja Version: 4.2.6444-dev Personal (f171d6c4) OS: > cat /proc/version Linux version 6.10.13-3-MANJARO (builduser@fv-az1246-770) (gcc (GCC) 14.2.1 20240910, GNU ld (GNU Binutils) 2.43.0) https://github.com/Vector35/binaryninja-api/issues/1 SMP PREEMPT_DYNAMIC Tue Oct 8 03:24:49 UTC 2024 CPU Architecture: x64 Bug Description:

https://github.com/user-attachments/assets/a5c955a1-e54c-4207-9216-fe38a9fd7c18

xusheng6 commented 1 day ago

I did some debugging and I found these are the lines of text that are actually being searched at all:

0xffffffc008625b00 : 
0xffffffc008625b00 :   int32_t* do_coredump(int32_t* arg1, int64_t* arg2 @ x18)
0xffffffc008625b00 : 
0xffffffc008625b00 :       SystemHintOp_BTI()
0xffffffc008625b04 :       int64_t x30
0xffffffc008625b04 :       *arg2 = x30
0xffffffc008625b28 :       int64_t __saved_x29
0xffffffc008625b28 :       int64_t* x29 = &__saved_x29
0xffffffc008625b2c :       int0_t sp_el0
0xffffffc008625b2c :       uint64_t x8 = _ReadStatusReg(sp_el0)
0xffffffc008625b30 :       uint64_t x19 = _ReadStatusReg(sp_el0)
0xffffffc008625b34 :       int64_t x8_1 = *(x8 + 0x5e0)
0xffffffc008625b50 :       int64_t var_d0 = 0
0xffffffc008625b54 :       void* x28 = *(x19 + 0x520)
0xffffffc008625b70 :       int64_t var_100 = 0
0xffffffc008625b78 :       int32_t* var_110 = arg1
0xffffffc008625b78 :       int64_t var_108 = *(x19 + 0x30) + 0x3eb0
0xffffffc008625b7c :       int64_t x8_4 = *(*(x19 + 0x7f0) + 0x2e8)
0xffffffc008625b80 :       int64_t s
0xffffffc008625b80 :       __builtin_memset(&s, c: 0, n: 0x1c)
0xffffffc008625b84 :       int64_t x20 = *(x28 + 0x328)
0xffffffc008625b94 :       int64_t s_1
0xffffffc008625b94 :       __builtin_memset(s: &s_1, c: 0, n: 0x58)
0xffffffc008625b9c :       int32_t* result
0xffffffc008625b9c :       int64_t* x18_1
0xffffffc008625b9c :       result, x18_1 = audit_core_dumps(sx.q(*arg1), &arg2[1])
0xffffffc008625ba0 :       void* x26 = *(x28 + 0x2f8)
0xffffffc008625ba0 :       
0xffffffc008625bb4 :       if (x26 != 0 && *(x26 + 0x28) != 0 && (x20 & 3) != 0)
0xffffffc008625bb8 :           result, x18_1 = prepare_creds(x18_1)
0xffffffc008625bb8 :           
0xffffffc008625bbc :           if (result != 0)
0xffffffc008625bc8 :               int64_t x8_8 = x20 & 3
0xffffffc008625bc8 :               
0xffffffc008625bd4 :               if (x8_8 == 2)
0xffffffc008625bd8 :                   result[7] = 0
0xffffffc008625bd8 :               
0xffffffc008625be0 :               int32_t x20_1 = *arg1
0xffffffc008625be8 :               void* x21_1 = *(x19 + 0x520)
0xffffffc008625bec :               int64_t var_90
0xffffffc008625bec :               var_90.d = 0
0xffffffc008625bf0 :               int64_t var_88
0xffffffc008625bf0 :               var_88.d = 0
0xffffffc008625bf4 :               int64_t* var_80 = &var_80
0xffffffc008625bf8 :               int64_t** var_78 = &var_80
0xffffffc008625bfc :               uint64_t var_a0 = x19
0xffffffc008625bfc :               int64_t var_98 = 0
0xffffffc008625c0c :               int32_t x0_2
0xffffffc008625c0c :               int64_t* x18_2
0xffffffc008625c0c :               x0_2, x18_2 = rwsem_write_trylock(x21_1 + 0x68, x18_1)
0xffffffc008625c10 :               uint64_t x0_58
0xffffffc008625c10 :               
0xffffffc008625c10 :               if ((x0_2 & 1) == 0)
0xffffffc008626870 :                   x0_58, x18_2 = rwsem_down_write_slowpath(x21_1 + 0x68, 0x102, x18_2)
0xffffffc008626870 :               
0xffffffc008626878 :               int32_t x25
0xffffffc008626878 :               
0xffffffc008626878 :               if ((x0_2 & 1) != 0 || x0_58 == -0x1000 || x0_58 u< -0x1000)
0xffffffc008626878 :               else
0xffffffc00862687c :                   x25 = 0
0xffffffc00862687c :               
0xffffffc00862692c :                           x0_19, x18_4 = freezing_slow_path(arg2, x18_4)
0xffffffc00862692c :                       
0xffffffc008626930 :                       if ((((*arg2).d & 1) != 0 && (zx.d(*(arg2 + 0x829)) & 1) != 0) || (system_freezing_cnt != 0 && (x0_19 & 1) != 0))
0xffffffc008626944 :                           label_ffffffc008626944:
0xffffffc008626944 :                           int32_t x8_13
0xffffffc008626944 :                           
0xffffffc008626944 :                           if (core_pipe_limit == 0)
0xffffffc008626944 :                               x8_13 = 0
0xffffffc008626944 :                           else
0xffffffc008626944 :                               x8_13 = 1
0xffffffc008626944 :                           
0xffffffc00862694c :                           if (x8_13 == 1)
0xffffffc008626958 :                               x18_4 = wait_for_dump_helpers(*(arg10 + 0xd8), x18_4)
0xffffffc008626930 :                       else if (arg10 == 0)
0xffffffc0086267f8 :                           *(arg6 - 0x58)
0xffffffc008626804 :                           x18_4 = _printk(&data_ffffffc009faf8fc, x18_4)
0xffffffc008626c18 :                                   __cfi_slowpath_diag(-0x3c83bcfaf6bb43b4, x23_2, &data_ffffffc00afb2af0, x18_5)
0xffffffc008626c18 :                               
0xffffffc008626bf4 :                               __cfi_slowpath_diag(-0x30547d604e32781e, x24_2, &data_ffffffc00afa93e0, x18_4)
0xffffffc008626bf4 :                           
0xffffffc008626d08 :                       SystemHintOp_BTI()
0xffffffc008626d14 :                       __prefetch(*x8_12)
0xffffffc008626d28 :                       int32_t temp0_3
0xffffffc008626d28 :                       int32_t i
0xffffffc008626d28 :                       
0xffffffc008626d28 :                       do
0xffffffc008626d18 :                           temp0_3 = __ldaxr(x8_12)
0xffffffc008626d18 :                           
0xffffffc008626d20 :                           if (temp0_3 != 0)
0xffffffc008626d20 :                               break
0xffffffc008626d20 :                           
0xffffffc008626d24 :                           i = __stxr(1, x8_12)
0xffffffc008626d28 :                       while (i != 0)
0xffffffc008626d28 :                       
0xffffffc008626d2c :                       if (temp0_3 != 0)
0xffffffc008626d34 :                           queued_spin_lock_slowpath(x8_12, temp0_3)
0xffffffc008626d34 :                       

To start with, there is obviously a bug, since the function do_coredump is huge and it contains a lot of lines of code than those being searched.

Interestingly, I noticed these lines:

0xffffffc008626878 :               if ((x0_2 & 1) != 0 || x0_58 == -0x1000 || x0_58 u< -0x1000)
0xffffffc008626878 :               else
0xffffffc00862687c :                   x25 = 0

Compared to what he see in the linear view:

ffffffc008626878                if ((x0_2 & 1) != 0 || x0_58 == -0x1000 || x0_58 u< -0x1000)
ffffffc008625c14                    x25 = 1
ffffffc008626878                else
ffffffc00862687c                    x25 = 0

Line x25 = 1 is not included in the search, and its address ending in 5c14, is also not distant from the lines next to it. That might be causing the issue

xusheng6 commented 1 day ago

This happens when we have some code like this:

0x10000: ....
0x500: ...
0x10002: ....

All lines between 0x500 and 0x10002 will NOT be searched at all

xusheng6 commented 1 day ago

@bb33bb nice catch, thx for the bug report!

xusheng6 commented 1 day ago

Relevant to: https://github.com/Vector35/binaryninja-api/issues/6071

bb33bb commented 1 day ago

@bb33bb nice catch, thx for the bug report!

谢谢,每天用这个太多了。

xusheng6 commented 1 day ago

@bb33bb nice catch, thx for the bug report!

谢谢,每天用这个太多了。

If you do not mind my asking -- do you use range based search more, or function based search more?

bb33bb commented 1 day ago

@bb33bb nice catch, thx for the bug report!

谢谢,每天用这个太多了。

If you do not mind my asking -- do you use range based search more, or function based search more?

I think the "search more" feature is useful. But I rarely use global search because it takes too long. When I do need global search, I have to use Python code - having AI write it makes it way faster than using the software's built-in global search. There's another thing - when doing reverse engineering, sometimes if a function between two other functions isn't referenced externally, it seems like it won't get decompiled. This is actually really important in practice. Like when reverse engineering kernels, some kernel functions aren't referenced by other functions. Thanks!! Really appreciate your patient answers!