getty-zig / getty

A (de)serialization framework for Zig
https://getty.so
MIT License
189 stars 13 forks source link

Large types with many fields hits backwards branches limit #103

Closed ion232 closed 1 year ago

ion232 commented 1 year ago

Description

I've encountered the following error message when attempting to deserialize into a large type auto-generated from a large JSON file.

/Users/ion/.cache/zig/p/122054359cb91a29ab5b91ee8c3b347a110cdc3aa93fc4a60f28e0236d6d729bb343/src/attributes.zig:102:32: error: evaluation exceeded 1000 backwards branches
                if (std.mem.eql(u8, field.name, f.name)) {
                    ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~
/Users/ion/.cache/zig/p/122054359cb91a29ab5b91ee8c3b347a110cdc3aa93fc4a60f28e0236d6d729bb343/src/attributes.zig:102:32: note: use @setEvalBranchQuota() to raise the branch limit from 1000
/Users/ion/.cache/zig/p/122054359cb91a29ab5b91ee8c3b347a110cdc3aa93fc4a60f28e0236d6d729bb343/src/ser/traits/attributes.zig:27:29: note: called from here
        const A = Attributes(T, attributes);

It also looks like there is an error in src/de/impls/visitor/struct.zig:29.

How to Reproduce the Bug

Attempt to deserialize the given JSON into the given Zig type:

Zig type:


pub const TopLevel = struct {
    data: []Datum,
    pagination: Pagination,
    meta: Meta,
};

pub const Datum = struct {
    type: Type,
    id: []u8,
    slug: []u8,
    url: []u8,
    bitly_gif_url: []u8,
    bitly_url: []u8,
    embed_url: []u8,
    username: Username,
    source: []u8,
    rating: Rating,
    content_url: []u8,
    source_tld: []u8,
    source_post_url: []u8,
    is_indexable: i64,
    import_datetime: []u8,
    trending_datetime: []u8,
    images: Images,
    user: ?User,
};

pub const Images = struct {
    fixed_height_still: The480_WStill,
    original_still: The480_WStill,
    fixed_width: FixedHeight,
    fixed_height_small_still: The480_WStill,
    fixed_height_downsampled: FixedHeight,
    preview: DownsizedSmall,
    fixed_height_small: FixedHeight,
    downsized_still: The480_WStill,
    downsized: The480_WStill,
    downsized_large: The480_WStill,
    fixed_width_small_still: The480_WStill,
    preview_webp: The480_WStill,
    fixed_width_still: The480_WStill,
    fixed_width_small: FixedHeight,
    downsized_small: DownsizedSmall,
    fixed_width_downsampled: FixedHeight,
    downsized_medium: The480_WStill,
    original: FixedHeight,
    fixed_height: FixedHeight,
    looping: Looping,
    original_mp4: DownsizedSmall,
    preview_gif: The480_WStill,
    the_480_w_still: ?The480_WStill,

    pub const @"getty.db" = struct {
        pub const attributes = .{
            .the_480_w_still = .{
                .rename = "480w_still",
            },
        };
    };

    pub const @"getty.sb" = struct {
        pub const attributes = .{
            .the_480_w_still = .{
                .rename = "480w_still",
            },
        };
    };
};

pub const The480_WStill = struct {
    url: []u8,
    width: []u8,
    height: []u8,
    size: ?[]u8,
};

pub const DownsizedSmall = struct {
    width: []u8,
    height: []u8,
    mp4: []u8,
    mp4_size: []u8,
};

pub const FixedHeight = struct {
    url: []u8,
    width: []u8,
    height: []u8,
    size: []u8,
    mp4: ?[]u8,
    mp4_size: ?[]u8,
    webp: []u8,
    webp_size: []u8,
    frames: ?[]u8,
    hash: ?[]u8,
};

pub const Looping = struct {
    mp4: []u8,
    mp4_size: []u8,
};

pub const User = struct {
    avatar_url: []u8,
    banner_url: []u8,
    profile_url: []u8,
    username: Username,
    display_name: []u8,
    twitter: []u8,
};

pub const Meta = struct {
    status: i64,
    msg: []u8,
    response_id: []u8,
};

pub const Pagination = struct {
    total_count: i64,
    count: i64,
    offset: i64,
};

pub const Rating = enum {
    g,
    pg,
    pg_13,
    y,

    pub const @"getty.db" = struct {
        pub const attributes = .{
            .pg_13 = .{
                .rename = "pg-13",
            },
        };
    };

    pub const @"getty.sb" = struct {
        pub const attributes = .{
            .pg_13 = .{
                .rename = "pg-13",
            },
        };
    };
};

pub const Type = enum {
    gif,
};

pub const Username = enum {
    cheezburger,
    empty,
    meetaiko,
    producthunt,

    pub const @"getty.db" = struct {
        pub const attributes = .{
            .empty = .{
                .rename = "",
            },
        };
    };

    pub const @"getty.sb" = struct {
        pub const attributes = .{
            .empty = .{
                .rename = "",
            },
        };
    };
};

JSON (one data object only - to keep under limit)

{
  "data": [
    {
      "type": "gif",
      "id": "gl8ymnpv4Sqha",
      "slug": "cats-gl8ymnpv4Sqha",
      "url": "https://giphy.com/gifs/cats-gl8ymnpv4Sqha",
      "bitly_gif_url": "http://gph.is/1ZexrL0",
      "bitly_url": "http://gph.is/1ZexrL0",
      "embed_url": "https://giphy.com/embed/gl8ymnpv4Sqha",
      "username": "",
      "source": "http://www.reddit.com/r/gifs/comments/40bwcc/cats_cuddling/",
      "rating": "g",
      "content_url": "",
      "source_tld": "www.reddit.com",
      "source_post_url": "http://www.reddit.com/r/gifs/comments/40bwcc/cats_cuddling/",
      "is_indexable": 1,
      "import_datetime": "2016-01-10 16:25:16",
      "trending_datetime": "2017-07-25 12:15:01",
      "images": {
        "fixed_height_still": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200_s.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "238",
          "height": "200"
        },
        "original_still": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy_s.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "335",
          "height": "282"
        },
        "fixed_width": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200w.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "200",
          "height": "168",
          "size": "816307",
          "mp4": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200w.mp4?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "mp4_size": "53468",
          "webp": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200w.webp?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "webp_size": "463236"
        },
        "fixed_height_small_still": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/100_s.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "119",
          "height": "100"
        },
        "fixed_height_downsampled": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200_d.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "238",
          "height": "200",
          "size": "149811",
          "webp": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200_d.webp?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "webp_size": "79132"
        },
        "preview": {
          "width": "226",
          "height": "190",
          "mp4": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy-preview.mp4?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "mp4_size": "34232"
        },
        "fixed_height_small": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/100.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "119",
          "height": "100",
          "size": "329307",
          "mp4": "https://media3.giphy.com/media/gl8ymnpv4Sqha/100.mp4?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "mp4_size": "27066",
          "webp": "https://media3.giphy.com/media/gl8ymnpv4Sqha/100.webp?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "webp_size": "209276"
        },
        "downsized_still": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy-tumblr_s.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "250",
          "height": "210"
        },
        "downsized": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy-tumblr.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "250",
          "height": "210",
          "size": "1268794"
        },
        "downsized_large": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "335",
          "height": "282",
          "size": "2092733"
        },
        "fixed_width_small_still": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/100w_s.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "100",
          "height": "84"
        },
        "preview_webp": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy-preview.webp?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "133",
          "height": "112",
          "size": "48914"
        },
        "fixed_width_still": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200w_s.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "200",
          "height": "168"
        },
        "fixed_width_small": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/100w.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "100",
          "height": "84",
          "size": "240785",
          "mp4": "https://media3.giphy.com/media/gl8ymnpv4Sqha/100w.mp4?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "mp4_size": "22302",
          "webp": "https://media3.giphy.com/media/gl8ymnpv4Sqha/100w.webp?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "webp_size": "159036"
        },
        "downsized_small": {
          "width": "334",
          "height": "282",
          "mp4": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy-downsized-small.mp4?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "mp4_size": "195745"
        },
        "fixed_width_downsampled": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200w_d.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "200",
          "height": "168",
          "size": "106769",
          "webp": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200w_d.webp?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "webp_size": "59012"
        },
        "downsized_medium": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "335",
          "height": "282",
          "size": "2092733"
        },
        "original": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "335",
          "height": "282",
          "size": "2092733",
          "frames": "47",
          "mp4": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy.mp4?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "mp4_size": "242120",
          "webp": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy.webp?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "webp_size": "1170728"
        },
        "fixed_height": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "238",
          "height": "200",
          "size": "1152973",
          "mp4": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200.mp4?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "mp4_size": "64900",
          "webp": "https://media3.giphy.com/media/gl8ymnpv4Sqha/200.webp?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "webp_size": "617762"
        },
        "looping": {
          "mp4": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy-loop.mp4?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "mp4_size": "1246558"
        },
        "original_mp4": {
          "width": "480",
          "height": "404",
          "mp4": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy.mp4?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "mp4_size": "242120"
        },
        "preview_gif": {
          "url": "https://media3.giphy.com/media/gl8ymnpv4Sqha/giphy-preview.gif?fingerprint=e1bb72ff597d1a8c4f54645836e08fe7",
          "width": "97",
          "height": "82",
          "size": "48424"
        }
      }
    }
  ],
  "pagination": {
    "total_count": 75875,
    "count": 25,
    "offset": 0
  },
  "meta": {
    "status": 200,
    "msg": "OK",
    "response_id": "597d1a8c4f54645836e08fe7"
  }
}

Additional Context

No response

ibokuri commented 1 year ago

Wow, that's a chonker!

As the error points out, you can use @setEvalBranchQuota to increase the amount of backwards branches the compiler can use before giving up (and erroring out). When testing the JSON payload you provided (one data object), a quota of 1800 worked for me.

I'm not sure what the best practice is for setting branch quotas in libraries, but for now I think I'll keep using the default in Getty (1000 branches). Users who work with larger types can set their own quotas. They have more insight into the types they're working with so they can set more accurate quotas. Plus, even if I did set a quota in Getty, I'm sure there'll be somebody out there who'll run into it eventually. 😅

P.S. After fixing the quota issue, I ran into an UnknownVariant error due to Username.empty being renamed to "". Pretty sure that's a bug in Getty, so I'll look into it.

ion232 commented 1 year ago

Before making this issue I tried a quota of 10000, which seemed to be enough for all cases, but I did this in the library itself. I didn't realise you could set it within user code 🤔.

With regards to best practice, I've seen examples of the quota being calculated based on number of fields - e.g. https://github.com/ziglang/zig/blob/master/lib/std/fmt.zig#L532. There are also just arbitrary limits in the standard library too.

That being said, I think it would be more user-friendly to try and approximate the required quota based on the type, given this is an implementation detail that can be automated (cheaply and decideably) by the library. A somewhat analogous scenario is how a user doesn't want to try and estimate buffer sizes, they would prefer to pass an allocator that can be used by the library to get memory as needed. In this case the quota estimate might still not be enough, like you say, but would handle many more cases and save the user time from having to experiment with quotas.

The default limit is fine for now given that the user can actually change it, but I think an automated limit is worth considering for a future version.

ibokuri commented 1 year ago

With regards to best practice, I've seen examples of the quota being calculated based on number of fields - e.g. https://github.com/ziglang/zig/blob/master/lib/std/fmt.zig#L532.

I like that.

I'll update the struct visitor over the weekend to set a quota based on the number of fields. If everything looks good, I'll go ahead and update other spots in Getty as well.