darrachequesne / notepack

A fast Node.js implementation of the latest MessagePack spec
MIT License
75 stars 19 forks source link

[Help] Differences parsing from Uint8Array[] and Buffer #17

Closed endel closed 6 years ago

endel commented 6 years ago

Hey there,

I'm having issues decoding from a byte array. Let me show you an example:

> var msgpack = require('notepack.io')
undefined

> msgpack.decode(msgpack.encode([ 10, 'HkgpZyO4lQ', 1 ]))
[ 10, 'HkgpZyO4lQ', 1 ]

> Array.from(msgpack.encode([ 10, 'HkgpZyO4lQ', 1 ]))
[ 147, 10, 170, 72, 107, 103, 112, 90, 121, 79, 52, 108, 81, 1 ]

> msgpack.decode(Array.from(msgpack.encode([ 10, 'HkgpZyO4lQ', 1 ])))
[ 10, '147,10,170,72,107,103,112,90,121,79,52,108,81,1', 1 ]

It seems some index/recursion issue when decoding from an Array? You can see the second value, which should be "HkgpZyO4lQ", it's actually a string representation of the whole msgpack object.

Any help would be appreciated!

endel commented 6 years ago

This is not as elegant as the current solution, but I've managed to fix it like this:

// decoder.js

function stringFromUTF8Array(data) {
  const extraByteMap = [ 1, 1, 1, 1, 2, 2, 3, 0 ];
  var count = data.length;
  var str = "";

  for (var index = 0;index < count;)
  {
    var ch = data[index++];
    if (ch & 0x80)
    {
      var extra = extraByteMap[(ch >> 3) & 0x07];
      if (!(ch & 0x40) || !extra || ((index + extra) > count))
        return null;

      ch = ch & (0x3F >> extra);
      for (;extra > 0;extra -= 1)
      {
        var chx = data[index++];
        if ((chx & 0xC0) != 0x80)
          return null;

        ch = (ch << 6) | (chx & 0x3F);
      }
    }

    str += String.fromCharCode(ch);
  }

  return str;
}

Decoder.prototype.str = function (length) {
  var value = stringFromUTF8Array(this.buffer.slice(this.offset, this.offset + length));
  // this.buffer.toString('utf8', this.offset, this.offset + length);
  this.offset += length;
  return value;
};

Then, it's possible to decode UTF8 strings using both Array and Buffer:

msgpack.decode(msgpack.encode([ 10, 'HkgpZyO4lQ', 1 ]))
// =>  [ 10, 'HkgpZyO4lQ', 1 ]

msgpack.decode(Array.from(msgpack.encode([ 10, 'HkgpZyO4lQ', 1 ])))
// =>  [ 10, 'HkgpZyO4lQ', 1 ]
endel commented 6 years ago

Maybe this is a "won't fix" kind of problem.

This problem happens only inside Electron, because my library was expecting the message to be in a format only the browser version of the decoder expects. I've fixed this in my library by explicitly requiring the browser version.

Electron will always load the main module specified on package.json.