Closed Uzlopak closed 3 months ago
Hi @Uzlopak,
Thanks for reporting this.
Do you know of a URL to a pre-built Node.js binary without ICU support? So we can test this without having to build our own Node.js binary. I have checked on https://nodejs.org/dist/ and could not find one. Node.js has a way to specify alternative ICU data at runtime, but I cannot find a way to disable ICU at runtime.
Also, do you know, when using a Node.js binary without ICU support, whether the u
RegExp
flag works, providing \p
is not used? That flag has a few additional benefits beyond enabling \p
.
@sindresorhus This could be solved using the following code (I will share the code I used to generate that massive RegExp):
const getSpecialCharRegExp = () => {
try {
return /\p{Separator}|\p{Other}/gu;
} catch {
// Equivalent to the above RegExp, but works even when Node.js has been built without ICU support
// See https://github.com/sindresorhus/execa/issues/1143
return /[\u0000-\u0020\u007f-\u00a0\u00ad\u0378\u0379\u0380-\u0383\u038b\u038d\u03a2\u0530\u0557\u0558\u058b\u058c\u0590\u05c8-\u05cf\u05eb-\u05ee\u05f5-\u0605\u061c\u06dd\u070e\u070f\u074b\u074c\u07b2-\u07bf\u07fb\u07fc\u082e\u082f\u083f\u085c\u085d\u085f\u086b-\u086f\u088f-\u0897\u08e2\u0984\u098d\u098e\u0991\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba\u09bb\u09c5\u09c6\u09c9\u09ca\u09cf-\u09d6\u09d8-\u09db\u09de\u09e4\u09e5\u09ff\u0a00\u0a04\u0a0b-\u0a0e\u0a11\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a\u0a3b\u0a3d\u0a43-\u0a46\u0a49\u0a4a\u0a4e-\u0a50\u0a52-\u0a58\u0a5d\u0a5f-\u0a65\u0a77-\u0a80\u0a84\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba\u0abb\u0ac6\u0aca\u0ace\u0acf\u0ad1-\u0adf\u0ae4\u0ae5\u0af2-\u0af8\u0b00\u0b04\u0b0d\u0b0e\u0b11\u0b12\u0b29\u0b31\u0b34\u0b3a\u0b3b\u0b45\u0b46\u0b49\u0b4a\u0b4e-\u0b54\u0b58-\u0b5b\u0b5e\u0b64\u0b65\u0b78-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce\u0bcf\u0bd1-\u0bd6\u0bd8-\u0be5\u0bfb-\u0bff\u0c0d\u0c11\u0c29\u0c3a\u0c3b\u0c45\u0c49\u0c4e-\u0c54\u0c57\u0c5b\u0c5c\u0c5e\u0c5f\u0c64\u0c65\u0c70-\u0c76\u0c8d\u0c91\u0ca9\u0cb4\u0cba\u0cbb\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdc\u0cdf\u0ce4\u0ce5\u0cf0\u0cf4-\u0cff\u0d0d\u0d11\u0d45\u0d49\u0d50-\u0d53\u0d64\u0d65\u0d80\u0d84\u0d97-\u0d99\u0db2\u0dbc\u0dbe\u0dbf\u0dc7-\u0dc9\u0dcb-\u0dce\u0dd5\u0dd7\u0de0-\u0de5\u0df0\u0df1\u0df5-\u0e00\u0e3b-\u0e3e\u0e5c-\u0e80\u0e83\u0e85\u0e8b\u0ea4\u0ea6\u0ebe\u0ebf\u0ec5\u0ec7\u0ecf\u0eda\u0edb\u0ee0-\u0eff\u0f48\u0f6d-\u0f70\u0f98\u0fbd\u0fcd\u0fdb-\u0fff\u10c6\u10c8-\u10cc\u10ce\u10cf\u1249\u124e\u124f\u1257\u1259\u125e\u125f\u1289\u128e\u128f\u12b1\u12b6\u12b7\u12bf\u12c1\u12c6\u12c7\u12d7\u1311\u1316\u1317\u135b\u135c\u137d-\u137f\u139a-\u139f\u13f6\u13f7\u13fe\u13ff\u1680\u169d-\u169f\u16f9-\u16ff\u1716-\u171e\u1737-\u173f\u1754-\u175f\u176d\u1771\u1774-\u177f\u17de\u17df\u17ea-\u17ef\u17fa-\u17ff\u180e\u181a-\u181f\u1879-\u187f\u18ab-\u18af\u18f6-\u18ff\u191f\u192c-\u192f\u193c-\u193f\u1941-\u1943\u196e\u196f\u1975-\u197f\u19ac-\u19af\u19ca-\u19cf\u19db-\u19dd\u1a1c\u1a1d\u1a5f\u1a7d\u1a7e\u1a8a-\u1a8f\u1a9a-\u1a9f\u1aae\u1aaf\u1acf-\u1aff\u1b4d-\u1b4f\u1b7f\u1bf4-\u1bfb\u1c38-\u1c3a\u1c4a-\u1c4c\u1c89-\u1c8f\u1cbb\u1cbc\u1cc8-\u1ccf\u1cfb-\u1cff\u1f16\u1f17\u1f1e\u1f1f\u1f46\u1f47\u1f4e\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e\u1f7f\u1fb5\u1fc5\u1fd4\u1fd5\u1fdc\u1ff0\u1ff1\u1ff5\u1fff-\u200f\u2028-\u202f\u205f-\u206f\u2072\u2073\u208f\u209d-\u209f\u20c1-\u20cf\u20f1-\u20ff\u218c-\u218f\u2427-\u243f\u244b-\u245f\u2b74\u2b75\u2b96\u2cf4-\u2cf8\u2d26\u2d28-\u2d2c\u2d2e\u2d2f\u2d68-\u2d6e\u2d71-\u2d7e\u2d97-\u2d9f\u2da7\u2daf\u2db7\u2dbf\u2dc7\u2dcf\u2dd7\u2ddf\u2e5e-\u2e7f\u2e9a\u2ef4-\u2eff\u2fd6-\u2fef\u3000\u3040\u3097\u3098\u3100-\u3104\u3130\u318f\u31e4-\u31ee\u321f\ua48d-\ua48f\ua4c7-\ua4cf\ua62c-\ua63f\ua6f8-\ua6ff\ua7cb-\ua7cf\ua7d2\ua7d4\ua7da-\ua7f1\ua82d-\ua82f\ua83a-\ua83f\ua878-\ua87f\ua8c6-\ua8cd\ua8da-\ua8df\ua954-\ua95e\ua97d-\ua97f\ua9ce\ua9da-\ua9dd\ua9ff\uaa37-\uaa3f\uaa4e\uaa4f\uaa5a\uaa5b\uaac3-\uaada\uaaf7-\uab00\uab07\uab08\uab0f\uab10\uab17-\uab1f\uab27\uab2f\uab6c-\uab6f\uabee\uabef\uabfa-\uabff\ud7a4-\ud7af\ud7c7-\ud7ca\ud7fc-\uf8ff\ufa6e\ufa6f\ufada-\ufaff\ufb07-\ufb12\ufb18-\ufb1c\ufb37\ufb3d\ufb3f\ufb42\ufb45\ufbc3-\ufbd2\ufd90\ufd91\ufdc8-\ufdce\ufdd0-\ufdef\ufe1a-\ufe1f\ufe53\ufe67\ufe6c-\ufe6f\ufe75\ufefd-\uff00\uffbf-\uffc1\uffc8\uffc9\uffd0\uffd1\uffd8\uffd9\uffdd-\uffdf\uffe7\uffef-\ufffb\ufffe\uffff\u{1000c}\u{10027}\u{1003b}\u{1003e}\u{1004e}\u{1004f}\u{1005e}-\u{1007f}\u{100fb}-\u{100ff}\u{10103}-\u{10106}\u{10134}-\u{10136}\u{1018f}\u{1019d}-\u{1019f}\u{101a1}-\u{101cf}\u{101fe}-\u{1027f}\u{1029d}-\u{1029f}\u{102d1}-\u{102df}\u{102fc}-\u{102ff}\u{10324}-\u{1032c}\u{1034b}-\u{1034f}\u{1037b}-\u{1037f}\u{1039e}\u{103c4}-\u{103c7}\u{103d6}-\u{103ff}\u{1049e}\u{1049f}\u{104aa}-\u{104af}\u{104d4}-\u{104d7}\u{104fc}-\u{104ff}\u{10528}-\u{1052f}\u{10564}-\u{1056e}\u{1057b}\u{1058b}\u{10593}\u{10596}\u{105a2}\u{105b2}\u{105ba}\u{105bd}-\u{105ff}\u{10737}-\u{1073f}\u{10756}-\u{1075f}\u{10768}-\u{1077f}\u{10786}\u{107b1}\u{107bb}-\u{107ff}\u{10806}\u{10807}\u{10809}\u{10836}\u{10839}-\u{1083b}\u{1083d}\u{1083e}\u{10856}\u{1089f}-\u{108a6}\u{108b0}-\u{108df}\u{108f3}\u{108f6}-\u{108fa}\u{1091c}-\u{1091e}\u{1093a}-\u{1093e}\u{10940}-\u{1097f}\u{109b8}-\u{109bb}\u{109d0}\u{109d1}\u{10a04}\u{10a07}-\u{10a0b}\u{10a14}\u{10a18}\u{10a36}\u{10a37}\u{10a3b}-\u{10a3e}\u{10a49}-\u{10a4f}\u{10a59}-\u{10a5f}\u{10aa0}-\u{10abf}\u{10ae7}-\u{10aea}\u{10af7}-\u{10aff}\u{10b36}-\u{10b38}\u{10b56}\u{10b57}\u{10b73}-\u{10b77}\u{10b92}-\u{10b98}\u{10b9d}-\u{10ba8}\u{10bb0}-\u{10bff}\u{10c49}-\u{10c7f}\u{10cb3}-\u{10cbf}\u{10cf3}-\u{10cf9}\u{10d28}-\u{10d2f}\u{10d3a}-\u{10e5f}\u{10e7f}\u{10eaa}\u{10eae}\u{10eaf}\u{10eb2}-\u{10efc}\u{10f28}-\u{10f2f}\u{10f5a}-\u{10f6f}\u{10f8a}-\u{10faf}\u{10fcc}-\u{10fdf}\u{10ff7}-\u{10fff}\u{1104e}-\u{11051}\u{11076}-\u{1107e}\u{110bd}\u{110c3}-\u{110cf}\u{110e9}-\u{110ef}\u{110fa}-\u{110ff}\u{11135}\u{11148}-\u{1114f}\u{11177}-\u{1117f}\u{111e0}\u{111f5}-\u{111ff}\u{11212}\u{11242}-\u{1127f}\u{11287}\u{11289}\u{1128e}\u{1129e}\u{112aa}-\u{112af}\u{112eb}-\u{112ef}\u{112fa}-\u{112ff}\u{11304}\u{1130d}\u{1130e}\u{11311}\u{11312}\u{11329}\u{11331}\u{11334}\u{1133a}\u{11345}\u{11346}\u{11349}\u{1134a}\u{1134e}\u{1134f}\u{11351}-\u{11356}\u{11358}-\u{1135c}\u{11364}\u{11365}\u{1136d}-\u{1136f}\u{11375}-\u{113ff}\u{1145c}\u{11462}-\u{1147f}\u{114c8}-\u{114cf}\u{114da}-\u{1157f}\u{115b6}\u{115b7}\u{115de}-\u{115ff}\u{11645}-\u{1164f}\u{1165a}-\u{1165f}\u{1166d}-\u{1167f}\u{116ba}-\u{116bf}\u{116ca}-\u{116ff}\u{1171b}\u{1171c}\u{1172c}-\u{1172f}\u{11747}-\u{117ff}\u{1183c}-\u{1189f}\u{118f3}-\u{118fe}\u{11907}\u{11908}\u{1190a}\u{1190b}\u{11914}\u{11917}\u{11936}\u{11939}\u{1193a}\u{11947}-\u{1194f}\u{1195a}-\u{1199f}\u{119a8}\u{119a9}\u{119d8}\u{119d9}\u{119e5}-\u{119ff}\u{11a48}-\u{11a4f}\u{11aa3}-\u{11aaf}\u{11af9}-\u{11aff}\u{11b0a}-\u{11bff}\u{11c09}\u{11c37}\u{11c46}-\u{11c4f}\u{11c6d}-\u{11c6f}\u{11c90}\u{11c91}\u{11ca8}\u{11cb7}-\u{11cff}\u{11d07}\u{11d0a}\u{11d37}-\u{11d39}\u{11d3b}\u{11d3e}\u{11d48}-\u{11d4f}\u{11d5a}-\u{11d5f}\u{11d66}\u{11d69}\u{11d8f}\u{11d92}\u{11d99}-\u{11d9f}\u{11daa}-\u{11edf}\u{11ef9}-\u{11eff}\u{11f11}\u{11f3b}-\u{11f3d}\u{11f5a}-\u{11faf}\u{11fb1}-\u{11fbf}\u{11ff2}-\u{11ffe}\u{1239a}-\u{123ff}\u{1246f}\u{12475}-\u{1247f}\u{12544}-\u{12f8f}\u{12ff3}-\u{12fff}\u{13430}-\u{1343f}\u{13456}-\u{143ff}\u{14647}-\u{167ff}\u{16a39}-\u{16a3f}\u{16a5f}\u{16a6a}-\u{16a6d}\u{16abf}\u{16aca}-\u{16acf}\u{16aee}\u{16aef}\u{16af6}-\u{16aff}\u{16b46}-\u{16b4f}\u{16b5a}\u{16b62}\u{16b78}-\u{16b7c}\u{16b90}-\u{16e3f}\u{16e9b}-\u{16eff}\u{16f4b}-\u{16f4e}\u{16f88}-\u{16f8e}\u{16fa0}-\u{16fdf}\u{16fe5}-\u{16fef}\u{16ff2}-\u{16fff}\u{187f8}-\u{187ff}\u{18cd6}-\u{18cff}\u{18d09}-\u{1afef}\u{1aff4}\u{1affc}\u{1afff}\u{1b123}-\u{1b131}\u{1b133}-\u{1b14f}\u{1b153}\u{1b154}\u{1b156}-\u{1b163}\u{1b168}-\u{1b16f}\u{1b2fc}-\u{1bbff}\u{1bc6b}-\u{1bc6f}\u{1bc7d}-\u{1bc7f}\u{1bc89}-\u{1bc8f}\u{1bc9a}\u{1bc9b}\u{1bca0}-\u{1ceff}\u{1cf2e}\u{1cf2f}\u{1cf47}-\u{1cf4f}\u{1cfc4}-\u{1cfff}\u{1d0f6}-\u{1d0ff}\u{1d127}\u{1d128}\u{1d173}-\u{1d17a}\u{1d1eb}-\u{1d1ff}\u{1d246}-\u{1d2bf}\u{1d2d4}-\u{1d2df}\u{1d2f4}-\u{1d2ff}\u{1d357}-\u{1d35f}\u{1d379}-\u{1d3ff}\u{1d455}\u{1d49d}\u{1d4a0}\u{1d4a1}\u{1d4a3}\u{1d4a4}\u{1d4a7}\u{1d4a8}\u{1d4ad}\u{1d4ba}\u{1d4bc}\u{1d4c4}\u{1d506}\u{1d50b}\u{1d50c}\u{1d515}\u{1d51d}\u{1d53a}\u{1d53f}\u{1d545}\u{1d547}-\u{1d549}\u{1d551}\u{1d6a6}\u{1d6a7}\u{1d7cc}\u{1d7cd}\u{1da8c}-\u{1da9a}\u{1daa0}\u{1dab0}-\u{1deff}\u{1df1f}-\u{1df24}\u{1df2b}-\u{1dfff}\u{1e007}\u{1e019}\u{1e01a}\u{1e022}\u{1e025}\u{1e02b}-\u{1e02f}\u{1e06e}-\u{1e08e}\u{1e090}-\u{1e0ff}\u{1e12d}-\u{1e12f}\u{1e13e}\u{1e13f}\u{1e14a}-\u{1e14d}\u{1e150}-\u{1e28f}\u{1e2af}-\u{1e2bf}\u{1e2fa}-\u{1e2fe}\u{1e300}-\u{1e4cf}\u{1e4fa}-\u{1e7df}\u{1e7e7}\u{1e7ec}\u{1e7ef}\u{1e7ff}\u{1e8c5}\u{1e8c6}\u{1e8d7}-\u{1e8ff}\u{1e94c}-\u{1e94f}\u{1e95a}-\u{1e95d}\u{1e960}-\u{1ec70}\u{1ecb5}-\u{1ed00}\u{1ed3e}-\u{1edff}\u{1ee04}\u{1ee20}\u{1ee23}\u{1ee25}\u{1ee26}\u{1ee28}\u{1ee33}\u{1ee38}\u{1ee3a}\u{1ee3c}-\u{1ee41}\u{1ee43}-\u{1ee46}\u{1ee48}\u{1ee4a}\u{1ee4c}\u{1ee50}\u{1ee53}\u{1ee55}\u{1ee56}\u{1ee58}\u{1ee5a}\u{1ee5c}\u{1ee5e}\u{1ee60}\u{1ee63}\u{1ee65}\u{1ee66}\u{1ee6b}\u{1ee73}\u{1ee78}\u{1ee7d}\u{1ee7f}\u{1ee8a}\u{1ee9c}-\u{1eea0}\u{1eea4}\u{1eeaa}\u{1eebc}-\u{1eeef}\u{1eef2}-\u{1efff}\u{1f02c}-\u{1f02f}\u{1f094}-\u{1f09f}\u{1f0af}\u{1f0b0}\u{1f0c0}\u{1f0d0}\u{1f0f6}-\u{1f0ff}\u{1f1ae}-\u{1f1e5}\u{1f203}-\u{1f20f}\u{1f23c}-\u{1f23f}\u{1f249}-\u{1f24f}\u{1f252}-\u{1f25f}\u{1f266}-\u{1f2ff}\u{1f6d8}-\u{1f6db}\u{1f6ed}-\u{1f6ef}\u{1f6fd}-\u{1f6ff}\u{1f777}-\u{1f77a}\u{1f7da}-\u{1f7df}\u{1f7ec}-\u{1f7ef}\u{1f7f1}-\u{1f7ff}\u{1f80c}-\u{1f80f}\u{1f848}-\u{1f84f}\u{1f85a}-\u{1f85f}\u{1f888}-\u{1f88f}\u{1f8ae}\u{1f8af}\u{1f8b2}-\u{1f8ff}\u{1fa54}-\u{1fa5f}\u{1fa6e}\u{1fa6f}\u{1fa7d}-\u{1fa7f}\u{1fa89}-\u{1fa8f}\u{1fabe}\u{1fac6}-\u{1facd}\u{1fadc}-\u{1fadf}\u{1fae9}-\u{1faef}\u{1faf9}-\u{1faff}\u{1fb93}\u{1fbcb}-\u{1fbef}\u{1fbfa}-\u{1ffff}\u{2a6e0}-\u{2a6ff}\u{2b73a}-\u{2b73f}\u{2b81e}\u{2b81f}\u{2cea2}-\u{2ceaf}\u{2ebe1}-\u{2ebef}\u{2ee5e}-\u{2f7ff}\u{2fa1e}-\u{2ffff}\u{3134b}-\u{3134f}\u{323b0}-\u{e00ff}\u{e01f0}-\u{10ffff}]/gu;
}
};
What are your thoughts about this solution, and about the problem in general?
It doesn't make sense to include such a large regex for such an edge-case. I would just go with \s
and maybe a few more and note the limitation in the readme.
Yes, I have about to suggest that alternative too. I'm going to put up a PR to fix this.
@Uzlopak If you don't know of a way to run Node.js without ICU support (except for building a Node.js binary from scratch), could you please run the following code using your setup which does not have ICU support? To make sure it will work before I create the PR. Thanks!
const getSpecialCharRegExp = () => {
try {
return /\p{Separator}|\p{Other}/u;
} catch {
return /[\s\u0000-\u001F\u007F-\u009F\u00AD]/;
}
};
const SPECIAL_CHAR_REGEXP = getSpecialCharRegExp();
console.log(SPECIAL_CHAR_REGEXP);
const codepoints = [0, 0x20, 0x2e, 0x7f, 0x0ecf, 0x10ffff];
for (const codepoint of codepoints) {
console.log(codepoint.toString(16), SPECIAL_CHAR_REGEXP.test(String.fromCodePoint(codepoint)));
}
will check now, currently compiling nodejs without intl.
nope. Seems like the Regex gets parsed in an early stage, resulting in a SyntaxError, so basically breaking the whole execution of the file.
first with normal node, and then with the without intl compiled one.
aras@aras-Lenovo-Legion-5-17ARH05H:~/workspace/node$ node intl.js
/\p{Separator}|\p{Other}/u
0 true
20 true
2e false
7f true
ecf true
10ffff true
aras@aras-Lenovo-Legion-5-17ARH05H:~/workspace/node$ ./node intl.js
/home/aras/workspace/node/intl.js:3
return /\p{Separator}|\p{Other}/u;
^^^^^^^^^^^^^^^^^^^^^^^^^^
SyntaxError: Invalid regular expression: /\p{Separator}|\p{Other}/u: Invalid property name
at wrapSafe (node:internal/modules/cjs/loader:1469:18)
at Module._compile (node:internal/modules/cjs/loader:1491:20)
at Module._extensions..js (node:internal/modules/cjs/loader:1691:10)
at Module.load (node:internal/modules/cjs/loader:1317:32)
at Module._load (node:internal/modules/cjs/loader:1127:12)
at TracingChannel.traceSync (node:diagnostics_channel:315:14)
at wrapModuleLoad (node:internal/modules/cjs/loader:217:24)
at Function.executeUserEntryPoint [as runMain] (node:internal/modules/run_main:166:5)
at node:internal/main/run_main_module:30:49
Node.js v23.0.0-pre
But this works
const getSpecialCharRegExp = () => {
try {
return new RegExp('\\p{Separator}|\\p{Other}', 'u');
} catch {
return /[\s\u0000-\u001F\u007F-\u009F\u00AD]/u;
}
};
const SPECIAL_CHAR_REGEXP = getSpecialCharRegExp();
console.log(SPECIAL_CHAR_REGEXP);
const codepoints = [0, 0x20, 0x2e, 0x7f, 0x0ecf, 0x10ffff];
for (const codepoint of codepoints) {
console.log(codepoint.toString(16), SPECIAL_CHAR_REGEXP.test(String.fromCodePoint(codepoint)));
}
aras@aras-Lenovo-Legion-5-17ARH05H:~/workspace/node$ node intl.js
/\p{Separator}|\p{Other}/u
0 true
20 true
2e false
7f true
ecf true
10ffff true
aras@aras-Lenovo-Legion-5-17ARH05H:~/workspace/node$ ./node intl.js
/[\s\u0000-\u001F\u007F-\u009F\u00AD]/u
0 true
20 true
2e false
7f true
ecf false
10ffff false
I dont know if string-width is a dependency or dev dependency, but it has some similar issue:
Yep, I only needed to patch execa in node_modules accordingly and now the tests run in undici with borp.
My patch was:
const SPECIAL_CHAR_REGEXP = (() => {
try {
return new RegExp('\\p{Separator}|\\p{Other}', 'gu');
} catch {
return /[\s\u0000-\u001F\u007F-\u009F\u00AD]/gu;
}
})();
Hope this helps. I go back to my baby shift :D
This was very helpful, thanks @Uzlopak for going the extra mile in terms of testing. :clap:
string-width
is a dev dependency.
node builds without intl are failing due to this line
https://github.com/sindresorhus/execa/blob/c0b6efc9cbf8fa88398ccc863a8eb554d3a76f93/lib/arguments/escape.js#L41
execa is used by borp @mcollina. borp is used in undici, thus breaking the without intl unit tests.