Open timotheecour opened 3 years ago
cstrToNimstr
differs from this implementation?in particular this part:
// Surrogate Pair
c = 0x10000 + ((c & 0x03FF) << 10) + (str.charCodeAt(++i) & 0x03FF);
seems different?
https://github.com/google/closure-library/blob/master/closure/goog/crypt/crypt.js#L110
/**
* Converts a JS string to a UTF-8 "byte" array.
* @param {string} str 16-bit unicode string.
* @return {!Array<number>} UTF-8 byte array.
*/
goog.crypt.stringToUtf8ByteArray = function(str) {
'use strict';
// TODO(user): Use native implementations if/when available
var out = [], p = 0;
for (var i = 0; i < str.length; i++) {
var c = str.charCodeAt(i);
if (c < 128) {
out[p++] = c;
} else if (c < 2048) {
out[p++] = (c >> 6) | 192;
out[p++] = (c & 63) | 128;
} else if (
((c & 0xFC00) == 0xD800) && (i + 1) < str.length &&
((str.charCodeAt(i + 1) & 0xFC00) == 0xDC00)) {
// Surrogate Pair
c = 0x10000 + ((c & 0x03FF) << 10) + (str.charCodeAt(++i) & 0x03FF);
out[p++] = (c >> 18) | 240;
out[p++] = ((c >> 12) & 63) | 128;
out[p++] = ((c >> 6) & 63) | 128;
out[p++] = (c & 63) | 128;
} else {
out[p++] = (c >> 12) | 224;
out[p++] = ((c >> 6) & 63) | 128;
out[p++] = (c & 63) | 128;
}
}
return out;
};
edit: see this other implementation: https://stackoverflow.com/a/64277403/1426932 (From emscripten)
I agree. I think cstring
should use uint8Array
.
Will this simplify makeNimstrLit
, cstrToNimstr
and makeJSStr
?
links
note
insome benchmakr i did though, TextEncoder.encode seems slow.
Ref https://github.com/timotheecour/Nim/issues/156