Open EdwardZZZ opened 5 years ago
Node.js 中 Buffer 采用 Uint 不需要将超过128的换算为负数
const str = '😁';
function UTF16(str) {
if (str.length !== 2) return String.charCodeAt(0);
const b1 = str.charCodeAt(0);
const b2 = str.charCodeAt(1);
const n1 = b1 & ~0xd800;
const n2 = b2 & ~0xdc00;
const n = (n1 << 10) | n2;
return n + 0x10000;
}
String.fromCodePoint(UTF16(str));
function UTF8(code) {
if (code < 0x10000) return code;
const n = code - 0x10000;
const n1 = n >> 10;
const n2 = n & 0x3ff;
const b1 = n1 | 0xd800;
const b2 = n2 | 0xdc00;
return [b1, b2];
}
const codes = UTF8(UTF16(str));
console.log(String.fromCodePoint(...codes))
泰语字符串长度
function strThaiLen(str) {
let len = 0;
let i = 0;
for (let w of str) {
const [ch0, ch1, ch2] = Buffer.from(w);
if (ch0 === 0xE0) {
if ((ch1 == 0xb8 && (ch2 == 0xb1 || (ch2 >= 0xb3 && ch2 <= 0xba))) || (ch1 == 0xb9 && ch2 >= 0x87 && ch2 <= 0x8e)) {
} else {
len++;
}
} else {
len++;
}
if (i === str.length) break;
}
return len;
}
https://unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table https://github.com/orling/grapheme-splitter