Open sindresorhus opened 2 years ago
Is there a formal specification of how this feature works?
If implemented simply, I would expect slice-ansi
to behave like this:
const segmenter = new Intl.Segmenter('en');
const text = 'Slice a string with ANSI escape codes';
console.log(sliceAnsi(text, 0, 10, { segmenter })); // 'Slice a'
console.log(sliceAnsi(text, 0, 11, { segmenter })); // 'Slice a'
console.log(sliceAnsi(text, 0, 12, { segmenter })); // 'Slice a'
console.log(sliceAnsi(text, 0, 13, { segmenter })); // 'Slice a'
console.log(sliceAnsi(text, 0, 14, { segmenter })); // 'Slice a string'
So far, the output is as expected. But what about the following?
console.log(sliceAnsi(text, 1, 10, { segmenter })); // 'lice a'
console.log(sliceAnsi(text, 1, 11, { segmenter })); // 'lice a'
console.log(sliceAnsi(text, 1, 12, { segmenter })); // 'lice a'
console.log(sliceAnsi(text, 1, 13, { segmenter })); // 'lice a string'
console.log(sliceAnsi(text, 1, 14, { segmenter })); // 'lice a string'
The second argument of sliceAnsi
can be used to forcefully divide a segment. This breaks the Intl.Segmenter
convention.
My gut feeling is that this behavior is confusing to the user and this feature should not be implemented in slice-ansi
. If I've misunderstood something, please let me know.
Thanks for a nice library!
@mizdra I think you are confused. sliceAnsi
slices on indices not on word boundries. The change the segmenter would bring is that it would make sliceAnsi
slice on character boundries instead of code point boundries.
@sindresorhus yeah, default granularity
is 'grapheme'
adopting Intl.Segmenter would allow stopping exploding country flags
excerpt from version 5 that got in my repository via https://www.npmjs.com/package/cli-truncate (checked that latest version behaves the same way but likely in more performant way):
const characters = [...'👪'] // ['👪']
— stripAnsi works fine on emojis
but
const characters = [...'🇮🇱'] // ['🇮','🇱']
— stripAnsi will cut flag consisting of two codePoints to kinda-incorrect single codePoint if country flag is on slice edge
however for Segmenter flag it's single unsplittable grapheme
[...new Intl.Segmenter('en', {granularity: 'grapheme'}).segment('🇮🇱')] // [ { segment: '🇮🇱', index: 0, input: '🇮🇱' } ]
Here is the spike for slice-ansi@5 that solved my problem:
diff --git a/node_modules/slice-ansi/index.js b/node_modules/slice-ansi/index.js
index e10af34..f6bbf20 100755
--- a/node_modules/slice-ansi/index.js
+++ b/node_modules/slice-ansi/index.js
@@ -50,8 +50,26 @@ const checkAnsi = (ansiCodes, isEscapes, endAnsiCode) => {
return output.join('');
};
+function isRegionalIndicator(string) {
+ for (const point of string) {
+ let number = point.codePointAt(0)
+ // U+1F1E6 🇦 REGIONAL INDICATOR SYMBOL LETTER A
+ // U+1F1FF 🇿 REGIONAL INDICATOR SYMBOL LETTER Z
+ if (!(0x1F1E6 <= number && number <= 0x1F1FF)) {
+ return false
+ }
+ }
+ return true
+}
+
export default function sliceAnsi(string, begin, end) {
- const characters = [...string];
+ const characters = Array.from(
+ (function* () {
+ for (let grapheme of new Intl.Segmenter('en', { granularity: 'grapheme' }).segment(string)) {
+ yield grapheme.segment
+ }
+ })()
+ )
const ansiCodes = [];
let stringEnd = typeof end === 'number' ? end : characters.length;
@@ -83,7 +101,7 @@ export default function sliceAnsi(string, begin, end) {
visible++;
}
- if (!astralRegex.test(character) && isFullwidthCodePoint(character.codePointAt())) {
+ if (!astralRegex.test(character) && isFullwidthCodePoint(character.codePointAt()) || isRegionalIndicator(character)) {
visible++;
if (typeof end !== 'number') {
Related issue: https://github.com/sindresorhus/string-length/issues/14