nodejs / nbytes

A library of byte handling functions extracted from Node.js core
MIT License
6 stars 1 forks source link

Unnecessary MSVC code #4

Open lemire opened 1 month ago

lemire commented 1 month ago

The current code uses special casing for MSVC, but this code is likely unneeded. It seems to seek to avoid memcpy calls. But Microsoft's compiler is smart.

Let us consider this example:

#include <cstdint>
#include <cstring>

#define BSWAP_4(x)                                                       \
  (((x) & 0xFF) << 24) | (((x) & 0xFF00) << 8) | (((x) >> 8) & 0xFF00) | \
      (((x) >> 24) & 0xFF)

bool SwapBytes32(char *data, size_t nbytes) {
   if (nbytes % sizeof(uint32_t) != 0) return false;
   uint32_t temp;
   for (size_t i = 0; i < nbytes; i += sizeof(temp)) {
     memcpy(&temp, &data[i], sizeof(temp));
     temp = BSWAP_4(temp);
     memcpy(&data[i], &temp, sizeof(temp));
   }

   return true;
 }

With /O2, MSVC compiles it down to

bool SwapBytes32(char *,unsigned __int64) PROC               ; SwapBytes32, COMDAT
        test    dl, 3
        je      SHORT $LN5@SwapBytes3
        xor     al, al
        ret     0
$LN5@SwapBytes3:
        test    rdx, rdx
        je      SHORT $LN3@SwapBytes3
        dec     rdx
        shr     rdx, 2
        inc     rdx
        npad    9
$LL4@SwapBytes3:
        mov     eax, DWORD PTR [rcx]
        lea     rcx, QWORD PTR [rcx+4]
        bswap   eax
        mov     DWORD PTR [rcx-4], eax
        sub     rdx, 1
        jne     SHORT $LL4@SwapBytes3
$LN3@SwapBytes3:
        mov     al, 1
        ret     0

I am sure one can do better, but it is just generally fine code.

lemire commented 1 month ago

cc @anonrig

lemire commented 1 month ago

(@anonrig not having special case handing for MSVC is likely better long term.)