ziglang / zig

General-purpose programming language and toolchain for maintaining robust, optimal, and reusable software.
https://ziglang.org
MIT License
31.92k stars 2.33k forks source link

std/json: add config for enabling trailing commas #19668

Open silbinarywolf opened 2 months ago

silbinarywolf commented 2 months ago

Zig Version

0.12.0-dev.3653+e45bdc6bd

Wanted Feature

I was hoping that I'd be able to just enable having trailing commas in the JSON parser by doing something like:

try std.json.parseFromSlice(std.json.Value, allocator, json_data, .{
    .allow_trailing_comma = true,
});

Use cases

There are cases where it'd be nice to just be able to

Future enhancements

xdBronch commented 2 months ago

i dont think its planned to support any kind of json extensions, see https://github.com/ziglang/zig/issues/17684

silbinarywolf commented 2 months ago

hmm okay.

If folks want this behaviour, in the meantime here's a wrapper I've hacked in an hour if anyone else wants to pinch it. This is tested just for my use-cases so far and isn't thoroughly tested.

const std = @import("std");

const Token = std.json.Token;
const Scanner = std.json.Scanner;
const Error = std.json.Error;
const Allocator = std.mem.Allocator;
const AllocWhen = std.json.AllocWhen;

const OBJECT_MODE = 0;
const ARRAY_MODE = 1;

// Example use:
//
// const parsed = blk: {
//     var scanner = std.json.Scanner.initCompleteInput(allocator, yyp_data);
//     defer scanner.deinit();
//     var custom_scanner: ScannerIgnoreTrailingComma = .{
//         .underlying = scanner,
//     };
//     break :blk try std.json.parseFromTokenSource(std.json.Value, allocator, &custom_scanner, .{});
// };
// defer parsed.deinit();
pub const ScannerIgnoreTrailingComma = struct {
    pub const NextError = Scanner.NextError;
    pub const AllocError = Scanner.AllocError;
    pub const PeekError = Scanner.PeekError;

    underlying: Scanner,

    /// See `std.json.Token` for documentation of this function.
    pub fn next(self: *@This()) NextError!std.json.Token {
        const tok = try self.underlying.next();
        return tok;
    }

    /// Equivalent to `nextAllocMax(allocator, when, default_max_value_len);`
    /// This function is only available after `endInput()` (or `initCompleteInput()`) has been called.
    /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
    pub fn nextAlloc(self: *@This(), allocator: Allocator, when: AllocWhen) AllocError!Token {
        return self.nextAllocMax(allocator, when, std.json.default_max_value_len);
    }

    /// See also `std.json.Token` for documentation of `nextAlloc*()` function behavior.
    pub fn nextAllocMax(self: *@This(), allocator: Allocator, when: AllocWhen, max_value_len: usize) AllocError!Token {
        state_loop: while (true) {
            const tok = self.underlying.nextAllocMax(allocator, when, max_value_len) catch |err| switch (err) {
                error.SyntaxError => {
                    const prev_byte = self.expectPrevByte();
                    const curr_byte = self.expectByte();
                    switch (self.underlying.stack.peek()) {
                        OBJECT_MODE => {
                            if (prev_byte == ',' and curr_byte == '}') {
                                self.underlying.state = .post_value;
                                continue :state_loop;
                            }
                        },
                        ARRAY_MODE => {
                            if (prev_byte == ',' and curr_byte == ']') {
                                self.underlying.state = .post_value;
                                continue :state_loop;
                            }
                        },
                    }
                    return err;
                },
                else => return err,
            };
            return tok;
        }
    }

    fn expectPrevByte(self: *const @This()) u8 {
        var cursor = self.underlying.cursor - 1;
        while (cursor >= 0) : (cursor -= 1) {
            const c = self.underlying.input[cursor];
            switch (c) {
                // Whitespace
                ' ', '\t', '\r', '\n' => continue,
                else => return c,
            }
        }
        return 0;
    }

    fn expectByte(self: *const @This()) u8 {
        if (self.underlying.cursor < self.underlying.input.len) {
            return self.underlying.input[self.underlying.cursor];
        }
        return 0;
    }
};

If we were to patch this upstream it might look something like this, where allow_trailing_comma is a field on the Scanner struct that can be set to false/true.

pub fn peekNextTokenType(self: *@This()) PeekError!TokenType {
// ...
.post_value => {
    // ...
    switch (c) {
        '}' => return .object_end,
        ']' => return .array_end,
        ',' => {
            const peek = self.stack.peek();
            self.cursor += 1;
            switch (peek) {
                OBJECT_MODE => {
                    if (self.allow_trailing_comma and try self.skipWhitespaceExpectByte() == '}') {
                        return .object_end;
                    }
                    self.state = .object_post_comma;
                },
                ARRAY_MODE => {
                    if (self.allow_trailing_comma and try self.skipWhitespaceExpectByte() == ']') {
                        return .array_end;
                    }
                    self.state = .value;
                },
            }
            continue :state_loop;
        },
        else => return error.SyntaxError,
    }
},
VisenDev commented 2 months ago

i dont think its planned to support any kind of json extensions, see #17684

Was gonna say this

I think rather than allowing informal json extensions like this. It would better to directly support json5 as it is more well defined. Though I doubt that would get approval to be in the stdlib

nektro commented 3 weeks ago

btw spent the weekend implementing https://github.com/nektro/zig-json so that I could have this feature