kymr / daily-study

2 stars 0 forks source link

utf-8-validation #72

Closed kymr closed 5 years ago

kymr commented 5 years ago

    private static final int ONE_BYTE_EXT = 255;

    public boolean validUtf8(int[] data) {
        for (int i = 0; i < data.length; i++) {
            int bytes = extBytes(data[i]);

            if (bytes == 0 || (i + bytes - 1) >= data.length) {
                return false;
            }

            for (int j = 1; j < bytes; j++) {
                if (!((extOneByte(data[i + j]) >> 6) == 2)) {
                    return false;
                }
            }
            i += bytes - 1;
        }

        return true;
    }

    private int extBytes(int val) {
        int oneByte = extOneByte(val);

        if ((oneByte >> 7) == 0) {
            return 1;
        } else if ((oneByte >> 5) == 6) {
            return 2;
        } else if ((oneByte >> 4) == 14) {
            return 3;
        } else if ((oneByte >> 3) == 30) {
            return 4;
        }

        return 0;
    }

    private int extOneByte(int val) {
        return val & ONE_BYTE_EXT;
    }