tc39 / proposal-json-parse-with-source

Proposal for extending JSON.parse to expose input source text.
https://tc39.github.io/proposal-json-parse-with-source
MIT License
214 stars 9 forks source link

cookbook scenario to JSON.parse bigint in schema-less data #6

Closed kaizhu256 closed 4 years ago

kaizhu256 commented 4 years ago

say i'm working on an agile web-project with bigint where either:

  1. the schema changes so frequently with each iteration that the schema-based reviver function (key, val, src, keys) {...} becomes tech-debt

  2. or it has schema-less dictionaries with arbitrary key/val pairs.

in both cases i need an idiot-proof, schema-less JSON.parse solution that will preserve integer-precision by up-coercing integers to bigint as needed. would a cookbook solution look as follows?

require("http").createServer(async function (req, res) {
    let result;
    let reviver;

    reviver = function (ignore, val, src) {
    /*
     * this [schema-less] reviver will preserve integer-precision
     * by returning a bigint if precision-loss is detected
     *
     * reviver is not responsible for enforcing explicit, number/bigint schemas.
     * that is left to user after JSON.parse has done its job.
     */
        let bigint;
        // ignore non-number case
        if (typeof val !== "number") {
            return val;
        }
        // secure against malicious, 1000-digit numbers
        if (src.length > 1000) {
            throw new Error("encountered number with >1000 digits");
        }
        // TODO - handle bigdecimal
        if (src.indexOf(".") >= 0) {
            ...
        }
        try {
            bigint = BigInt(src);
        // ignore non-integer case
        } catch (err) {
            return val;
        }
        // integer precision-loss detected - return bigint
        if (BigInt(val) !== bigint) {
            return bigint;
        }
        // return val
        return val;
    };

    result = await ... // read body from http-request-stream
    // result = "{\
    //     \"dict\": {\
    //         \"bigdecimal\": 12345678901234567890.1234,\
    //         \"bigint\": 12345678901234567890,\
    //         \"float\": 1234.5678,\
    //         \"int\": 1234,\
    //     },\
    //     \"list\": [\
    //         12345678901234567890.1234,\
    //         12345678901234567890,\
    //         1234.5678,\
    //         1234\
    //     ]\
    // }"

    result = JSON.parse(result, reviver);
    // result = {
    //     "dict": {
    //         "bigdecimal": ???,
    //         "bigint": 12345678901234567890n,
    //         "float": 1234.5678,
    //         "int": 1234,
    //     },
    //     "list": [
    //         ???,
    //         12345678901234567890n,
    //         1234.5678,
    //         1234
    //     ]
    // }

    /*
     * reviver is not responsible for enforcing explicit, number/bigint schemas.
     * that is left to user after JSON.parse has done its job.
     */
    result = ...
}).listen(8080);
kaizhu256 commented 4 years ago

similarly, would this be an ok cookbook solution for the schema-less JSON.stringify roundtrip?

require("http").createServer(function (req, res) {
    let replacer;
    let result;

    replacer = function (ignore, val) {
    /*
     * this [schema-less] replacer will stringify and annotate
     * bigint/bigdecimal <val> with unique prefix "bignum_j9234v9d_"
     */
        if (typeof val === "bigdecimal" || typeof val === "bigint") {
            return "bignum_j9234v9d_" + val.toString();
        }
        return val;
    };

    result = {
        "dict": {
            "bigdecimal": 12345678901234567890.1234d,
            "bigint": 12345678901234567890n,
            "float": 1234.5678,
            "int": 1234
        },
        "list": [
            12345678901234567890.1234d,
            12345678901234567890n,
            1234.5678,
            1234
        ]
    };

    // 1st-pass to annotate bigint/bigdecimal
    result = JSON.stringify(result, replacer, 4);
    // result = "{\
    //     \"dict\": {\
    //         \"bigdecimal\": \"bignum_j9234v9d_12345678901234567890.1234\",\
    //         \"bigint\": \"bignum_j9234v9d_12345678901234567890\",\
    //         \"float\": 1234.5678,\
    //         \"int\": 1234,\
    //     },\
    //     \"list\": [\
    //         \"12345678901234567890.1234\",\
    //         \"12345678901234567890\",\
    //         1234.5678,\
    //         1234\
    //     ]\
    // }"

    // 2nd-pass to remove bigint/bigdecimal annotation
    result = result.replace((
        /"bignum_j9234v9d_(.*?)"/g
    ), "$1");
    // result = "{\
    //     \"dict\": {\
    //         \"bigdecimal\": 12345678901234567890.1234,\
    //         \"bigint\": 12345678901234567890,\
    //         \"float\": 1234.5678,\
    //         \"int\": 1234,\
    //     },\
    //     \"list\": [\
    //         12345678901234567890.1234,\
    //         12345678901234567890,\
    //         1234.5678,\
    //         1234\
    //     ]\
    // }"

    res.end(result);
}).listen(8080);
gibson042 commented 4 years ago

Your reviver looks mostly good, although you should keep in mind that JSON numbers with decimal points can still be mathematical integers (e.g., 4.0) and that there are values above Math.MAX_SAFE_INTEGER for which the corresponding BigInt value is non-lossy (e.g., BigInt(Number.MAX_SAFE_INTEGER) + 3n === BigInt(Number.MAX_SAFE_INTEGER + 3)).

Likewise for your replacer, subject to nonexistence of strings that start with "bignumj9234v9d" elsewhere in the output.

kaizhu256 commented 4 years ago

if json-form has decimal-point e.g. 123456789012345567890.0, its probably intended for bigdecimal (and replacer-function should update to reflect that). yea forgot about even-number case where Number.MIN_SAFE_INTEGER and Number.MAX_SAFE_INTEGER might be better tests for integer precision-loss.

at risk of beating a dead-horse, are you open to revisiting issue #5 to improve usability (and maybe performance) for this common, schemaless scenario?

require("https").request({
    "url": "https://third-party.api.com/arbitrary_data"
}, async function (clientResponse) {
    let result;
    let jsonOptions;

    jsonOptions = {
        // upscale-as-needed to bigint/bigdecimal to preserve numeric precision.
        // user can ad-hoc-enforce bignum-schema
        // after JSON.parse has done its job.
        "bignumPreservePrecision": true,
        // security
        "bignumMaxDigits": 1000
    };

    result = await ... // read responseText from http-client
    // result = "[\
    //     12345678901234567890.0,\
    //     12345678901234567890,\
    //     1234.5678,\
    //     1234\
    // ]"

    result = JSON.parse(result, jsonOptions);
    // result = [
    //     12345678901234567890.0d,
    //     12345678901234567890n,
    //     1234.5678,
    //     1234
    // ]

    // user can ad-hoc-enforce bignum-schema
    // after JSON.parse has done its job.
    result = result.map(function (elem) {
        return BigDecimal(elem);
    });
    // result = "[
    //     12345678901234567890.0d,
    //     12345678901234567890.0d,
    //     1234.5678d,
    //     1234.0d
    // ]"

    result = JSON.stringify(result, jsonOptions, 4);
    // result = "[\
    //     12345678901234567890.0,\
    //     12345678901234567890.0,\
    //     1234.5678,\
    //     1234.0\
    // ]"

    require("fs").writeFileSync(
        "/Downloads/data.json",
        result
    );
}).end();
gibson042 commented 4 years ago

at risk of beating a dead-horse, are you open to revisiting issue #5 to improve usability (and maybe performance) for this common, schemaless scenario?

I'm not opposed to introducing an options parameter for JSON.parse, but as I said in #5, that's out of scope for this proposal (which is intended to provide reviver functions more information).

kaizhu256 commented 4 years ago

k, closing this issue, though i might reopen to revisit bigdecimal