coolaj86 / TextEncoderLite_tmp

Polyfill for the Encoding Living Standard's API
Apache License 2.0
30 stars 19 forks source link

Suggestion: Make it a polyfill so replace .TextEncoder/.TextDecoder for utf-8 if no native is found #5

Open Ruffio opened 7 years ago

Ruffio commented 7 years ago

Would it be an idea to make it a Polifill, so if the native TextEncoder and -Decoder is available, but if not (fx. IE/Edge) then this takes over?

I have modified the code to accomplish this, but I don't know if this is the best/smartes way to do it. Maybe this is to much of a change/not within the scope?

The below is tested successfully in Chrome and IE 11: `(function () { 'use strict';

// Taken from https://github.com/feross/buffer/blob/master/index.js
// Thanks Feross et al! :-)

function TextEncoderLite() {
};

function TextDecoderLite() {
};

function utf8ToBytes(string, units) {
    units = units || Infinity;
    var codePoint;
    var length = string.length;
    var leadSurrogate = null;
    var bytes = [];
    var i = 0;

    for (; i < length; i++) {
        codePoint = string.charCodeAt(i);

        // is surrogate component
        if (codePoint > 0xD7FF && codePoint < 0xE000) {
            // last char was a lead
            if (leadSurrogate) {
                // 2 leads in a row
                if (codePoint < 0xDC00) {
                    if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
                    leadSurrogate = codePoint;
                    continue;
                } else {
                    // valid surrogate pair
                    codePoint = leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00 | 0x10000;
                    leadSurrogate = null;
                }
            } else {
                // no lead yet

                if (codePoint > 0xDBFF) {
                    // unexpected trail
                    if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
                    continue;
                } else if (i + 1 === length) {
                    // unpaired lead
                    if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
                    continue;
                } else {
                    // valid lead
                    leadSurrogate = codePoint;
                    continue;
                }
            }
        } else if (leadSurrogate) {
            // valid bmp char, but last char was a lead
            if ((units -= 3) > -1) bytes.push(0xEF, 0xBF, 0xBD)
            leadSurrogate = null;
        }

        // encode utf8
        if (codePoint < 0x80) {
            if ((units -= 1) < 0) break;
            bytes.push(codePoint);
        } else if (codePoint < 0x800) {
            if ((units -= 2) < 0) break;
            bytes.push(
                codePoint >> 0x6 | 0xC0,
                codePoint & 0x3F | 0x80
            );
        } else if (codePoint < 0x10000) {
            if ((units -= 3) < 0) break;
            bytes.push(
                codePoint >> 0xC | 0xE0,
                codePoint >> 0x6 & 0x3F | 0x80,
                codePoint & 0x3F | 0x80
            );
        } else if (codePoint < 0x200000) {
            if ((units -= 4) < 0) break;
            bytes.push(
                codePoint >> 0x12 | 0xF0,
                codePoint >> 0xC & 0x3F | 0x80,
                codePoint >> 0x6 & 0x3F | 0x80,
                codePoint & 0x3F | 0x80
            );
        } else {
            throw new Error('Invalid code point');
        }
    }

    return bytes;
};

function utf8Slice(buf, start, end) {
    var res = '';
    var tmp = '';
    end = Math.min(buf.length, end || Infinity);
    start = start || 0;

    for (var i = start; i < end; i++) {
        if (buf[i] <= 0x7F) {
            res += decodeUtf8Char(tmp) + String.fromCharCode(buf[i]);
            tmp = '';
        } else {
            tmp += '%' + buf[i].toString(16);
        }
    }

    return res + decodeUtf8Char(tmp);
};

function decodeUtf8Char(str) {
    try {
        return decodeURIComponent(str);
    } catch (err) {
        return String.fromCharCode(0xFFFD); // UTF 8 invalid char
    }
};

TextEncoderLite.prototype.encode = function (str) {
    var result;
    if ('undefined' === typeof Uint8Array) {
        result = utf8ToBytes(str);
    } else {
        result = new Uint8Array(utf8ToBytes(str));
    }

    return result;
};

TextDecoderLite.prototype.decode = function (bytes) {
    return utf8Slice(bytes, 0, bytes.length);
};

// Only use polyfill if native is not avalable
var g = typeof GLOBAL !== 'undefined' ? GLOBAL : window;
if (typeof g.TextEncoder === 'undefined' && typeof g.TextDecoder === 'undefined') {
    alert('Using polyfill for TextEncoder/-Decoder');
    g['TextEncoder'] = TextEncoderLite;
    g['TextDecoder'] = TextDecoderLite;
}

}()); `

Ruffio commented 7 years ago

So the two function are made local and only set to .TextEncoder and .TextDecoder if no native are found.

coolaj86 commented 7 years ago

You may be right that it may be best to have it in a separate fork. I don't care too much one way or the other.

I would be happy to have you make the changes here, with just a few constraints:

  1. Bump the major version number
  2. It should still work in node and the browser
  3. Expose it in such a way that it also keeps backwards compatibility, if reasonable.