bitdefender / bddisasm

bddisasm is a fast, lightweight, x86/x64 instruction decoder. The project also features a fast, basic, x86/x64 instruction emulator, designed specifically to detect shellcode-like behavior.
Apache License 2.0
888 stars 115 forks source link

PUNPCKLBW // #72

Closed icyfox168168 closed 2 years ago

icyfox168168 commented 2 years ago

Not sure if there is a problem, monitoring that adding this assembly instruction will cause the function to return recognition, the real machine returns successfully

ND_INS_PUNPCKLBW

000000014001EC66 PUNPCKLBW xmm4, xmm2 start XMM4 (HI_32) = 0x0000000000000000000000040000000000000000000000000000000000000000 start XMM4 (LO_32) = 0x00000000000000000000000400000000000000000000007a0000000000000045 start XMM2 (HI_32) = 0x0000000000000000000000040000000000000000000000000000000000000000 start XMM2 (LO_32) = 0x00000000000000000000000400000000000000000000007a0000000000000045 ture XMM4 (HI_32) = 0x0000000000000000000000040000000000000000000000000000000000000000 ture XMM4 (LO_32) = 0x00000000000000000000000400000000000000000000007a0000000000000045 bemu XMM4 (HI_32) = 0x000000000014fc60000000000014fa60000000000014f860000000000014f660 bemu XMM4 (LO_32) = 0x000000000014f460000000000014f260000000000014f060000000000014ee60

aesenc fail

vlutas commented 2 years ago

Hello,

I don't fully understand what the problem is. Is the PUNPCKLBW supplying the wrong output? Do you have an assembly snippet I can test?

As for the AESENC fail - that instruction is not supported at all currently.

icyfox168168 commented 2 years ago

AESENC fail is an error message

void aaes() { uint8_t rk[16], s[16], s0[16]; __m128i rk128, s128;

for (int i = 0; i < 16; ++i) rk[i] = i;
for (int i = 0; i < 16; ++i) s[i] = i;
rk128 = _mm_load_si128((__m128i*)rk);
s128 = _mm_load_si128((__m128i*)s);

aesenc(s, rk);
s128 = _mm_aesenc_si128(s128, rk128);
_mm_storeu_si128((__m128i*)s0, s128);

if (!memcmp(s, s0, 16)) printf("aesenc ok\n");
else printf("aesenc fail\n");

rk128 = _mm_load_si128((__m128i*)rk);
s128 = _mm_load_si128((__m128i*)s);

aesenclast(s, rk);
s128 = _mm_aesenclast_si128(s128, rk128);
_mm_storeu_si128((__m128i*)s0, s128);

if (!memcmp(s, s0, 16)) printf("aesenclast ok\n");
else printf("aesenclast fail\n");

}

icyfox168168 commented 2 years ago

Let's see where the error is tonight

icyfox168168 commented 2 years ago

aesenc ok aesenclast ok 0000000000000000 0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ aesenc ok aesenclast ok Shemu returned: 00000002 RunCount 1 start 0000000140000000 end 0000000000000000 rax 0000000000000000 RETN id 602 0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ time 0 .............ok1............... .............ok2...............

I commented out this mock to get it into my real mock function to get the correct result

/ case ND_INS_PUNPCKLBW: GET_OP(Context, 0, &dst); GET_OP(Context, 1, &src); if (dst.Size == 8) { // Operating on MMX register. dst.Value.Bytes[7] = src.Value.Bytes[3]; dst.Value.Bytes[6] = dst.Value.Bytes[3]; dst.Value.Bytes[5] = src.Value.Bytes[2]; dst.Value.Bytes[4] = dst.Value.Bytes[2]; dst.Value.Bytes[3] = src.Value.Bytes[1]; dst.Value.Bytes[2] = dst.Value.Bytes[1]; dst.Value.Bytes[1] = src.Value.Bytes[0]; } else { // Operating on XMM register. dst.Value.Bytes[15] = src.Value.Bytes[7]; dst.Value.Bytes[14] = dst.Value.Bytes[7]; dst.Value.Bytes[13] = src.Value.Bytes[6]; dst.Value.Bytes[12] = dst.Value.Bytes[6]; dst.Value.Bytes[11] = src.Value.Bytes[5]; dst.Value.Bytes[10] = dst.Value.Bytes[5]; dst.Value.Bytes[9] = src.Value.Bytes[4]; dst.Value.Bytes[8] = src.Value.Bytes[4]; dst.Value.Bytes[7] = src.Value.Bytes[3]; dst.Value.Bytes[6] = dst.Value.Bytes[3]; dst.Value.Bytes[5] = src.Value.Bytes[2]; dst.Value.Bytes[4] = dst.Value.Bytes[2]; dst.Value.Bytes[3] = src.Value.Bytes[1]; dst.Value.Bytes[2] = dst.Value.Bytes[1]; dst.Value.Bytes[1] = src.Value.Bytes[0]; } SET_OP(Context, 0, &dst); break; /

icyfox168168 commented 2 years ago

If you don't comment, you will get wrong results aesenc ok aesenclast ok 0000000000000000 0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 000000014001A406 PUNPCKLBW xmm4, xmm2 start XMM4 (HI_32) = 0x000000000000000000000004000000007261506c000000000000000000000000 start XMM4 (LO_32) = 0x000000000000000000000004000000007261506c0000007a0000000000000045 start XMM2 (HI_32) = 0x000000000000000000000004000000007261506c000000000000000000000000 start XMM2 (LO_32) = 0x000000000000000000000004000000007261506c0000007a0000000000000045 ture XMM4 (HI_32) = 0x000000000000000000000004000000007261506c000000000000000000000000 ture XMM4 (LO_32) = 0x000000000000000000000004000000007261506c0000007a0000000000000045 bemu XMM4 (HI_32) = 0x000000000014fc80000000000014fa80000000000014f880000000000014f680 bemu XMM4 (LO_32) = 0x000000000014f480000000000014f280000000000014f080000000000014ee80

aesenc fail aesenclast ok Shemu returned: 00000002 RunCount 1 start 0000000140000000 end 0000000000000000 rax 0000000000000000 RETN id 602 0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ time 1 .............ok1............... .............ok2...............

icyfox168168 commented 2 years ago

"\x48\x83\xEC\x28\x48\xB8\x0F\x0E\x0D\x0C\x0B\x0A\x09\x08\x48\x89\x44\x24\x10\x48\xB8\x07\x06\x05\x04\x03\x02\x01\x00\x48\x89\x44\x24\x18\x48\xC7\x04\x24\x00\x00\x00\x00\x48\xC7\x44\x24\x08\x00\x00\x00\x00\x66\x0F\x6F\x24\x24\x66\x0F\x6F\x54\x24\x10\x66\x0F\x60\xE2\x66\x0F\x7F\x24\x24\x66\x0F\x7F\x54\x24\x10\x48\x8B\x04\x24\x48\x03\x44\x24\x08\x48\x03\x44\x24\x10\x48\x03\x44\x24\x18\x48\x83\xC4\x28\xC3"

1C0A220E28122E16 0000000140049BD0 Shemu returned: 00000002 RunCount 1 start 0000000140000000 end 0000000000000000 rax 1C0A220E28122E21 RETN id 602 0000000140049C12 PUNPCKLBW xmm4, xmm2 start XMM4 (HI_32) = 0x0000000000000000000000000000000000007ffe000000000000000000000000 start XMM4 (LO_32) = 0x0000000000000000000000000000000000007ffe000000000000000000000000 start XMM2 (HI_32) = 0x0000000000000000000000000000000000007ffe000000000000000000000000 start XMM2 (LO_32) = 0x0000000000000000000000000000000000007ffe000000000000000000000000 ture XMM4 (HI_32) = 0x0000000000000000000000000000000000007ffe000000000000000000000000 ture XMM4 (LO_32) = 0x0000000000000000000000000000000000007ffe000000000000000000000000 bemu XMM4 (HI_32) = 0x000000000014fc80000000000014fa80000000000014f880000000000014f680 bemu XMM4 (LO_32) = 0x000000000014f480000000000014f280000000000014f080000000000014ee80

Shemu returned: 00000002 RunCount 1 start 0000000140000000 end 0000000000000000 rax 1C0A220E28122E21 RETN id 602

_Pragma("clang optimize off") uint64_t asmadc() { uint64_t r_xmm2[2]; r_xmm2[0] = 0x08090a0b0c0d0e0f; r_xmm2[1] = 0x0001020304050607; uint64_t r_xmm4[2]; r_xmm4[0] = 0; r_xmm4[1] = 0;

__asm
{
    MOVDQA xmm4, r_xmm4
    MOVDQA xmm2, r_xmm2
    PUNPCKLBW xmm4, xmm2
    MOVDQA r_xmm4, xmm4
    MOVDQA r_xmm2, xmm2
}
return r_xmm4[0] + r_xmm4[1] + r_xmm2[0] + r_xmm2[1];

} _Pragma("clang optimize on")

vlutas commented 2 years ago

Ah, yes, indeed, it seems that there was a typo in PUNPCKLBW emulation. Fixed in https://github.com/bitdefender/bddisasm/commit/47da322ea5c01c625c5c833e20f2716cd0a392c5, together with some other small tweaks & fixes.