launchbadge / sqlx

🧰 The Rust SQL Toolkit. An async, pure Rust SQL crate featuring compile-time checked queries without a DSL. Supports PostgreSQL, MySQL, and SQLite.
Apache License 2.0
13.08k stars 1.24k forks source link

sqlx::query_as!() returns unexpected null; try decoding as an `Option` when multiple (left) joins are used #3202

Open rewbycraft opened 5 months ago

rewbycraft commented 5 months ago

Bug Description

I found I had to use the col as "col?" trick to force nullability at runtime otherwise sqlx::query_as!() produces "unexpected null; try decoding as an Option when multiple (left) joins are used". The documentation for sqlx::query_as!() requests a bug report be filed in such cases.

Minimal Reproduction

The application in question tracks the weekly top 50 of radio stations. The two tables needed to reproduce this are as follows:

CREATE TABLE IF NOT EXISTS songs
(
    id      SERIAL PRIMARY KEY,
    artist  TEXT NOT NULL,
    title   TEXT NOT NULL,
    UNIQUE (artist, title)
);
CREATE TABLE IF NOT EXISTS top50
(
    id          SERIAL PRIMARY KEY,
    year        INT NOT NULL DEFAULT EXTRACT(YEAR FROM CURRENT_DATE),
    week        INT NOT NULL DEFAULT EXTRACT(WEEK FROM CURRENT_DATE),
    song_id     SERIAL NOT NULL,
    position    INT NOT NULL,
    FOREIGN KEY (song_id) REFERENCES songs(id),
    UNIQUE (year, week, song_id)
);

The query involved:

SELECT this_week.week as cur_week, this_week.position as cur_position, prev_week.position as prev_position, (prev_week.position - this_week.position) as delta, songs.artist as artist, songs.title as title
FROM top50 AS this_week
         INNER JOIN songs ON songs.id=this_week.song_id
         LEFT OUTER JOIN top50 as prev_week ON prev_week.song_id=this_week.song_id AND prev_week.week = this_week.week - 1
WHERE this_week.week = 15
ORDER BY this_week.week, this_week.position

The fields prev_position and delta are null if the track was not present in the previous week's top50. Then looking at the nullability field in the files produced by cargo sqlx prepare I have also noticed that it produces a false for the prev_position field and a null for the delta field.

For completeness sake, here is the struct I'm using query_as! to decode the rows into:

struct SongPosition {
    cur_week: i32,
    cur_position: i32,
    prev_position: Option<i32>,
    delta: Option<i32>,
    artist: String,
    title: String,
}

The query plan:

[
  {
    "Plan": {
      "Node Type": "Sort",
      "Parallel Aware": false,
      "Async Capable": false,
      "Startup Cost": 10.00,
      "Total Cost": 10.13,
      "Plan Rows": 50,
      "Plan Width": 41,
      "Output": ["this_week.week", "this_week.\"position\"", "prev_week.\"position\"", "((prev_week.\"position\" - this_week.\"position\"))", "songs.artist", "songs.title"],
      "Sort Key": ["this_week.\"position\""],
      "Plans": [
        {
          "Node Type": "Hash Join",
          "Parent Relationship": "Outer",
          "Parallel Aware": false,
          "Async Capable": false,
          "Join Type": "Inner",
          "Startup Cost": 5.44,
          "Total Cost": 8.59,
          "Plan Rows": 50,
          "Plan Width": 41,
          "Output": ["this_week.week", "this_week.\"position\"", "prev_week.\"position\"", "(prev_week.\"position\" - this_week.\"position\")", "songs.artist", "songs.title"],
          "Inner Unique": true,
          "Hash Cond": "(this_week.song_id = songs.id)",
          "Plans": [
            {
              "Node Type": "Hash Join",
              "Parent Relationship": "Outer",
              "Parallel Aware": false,
              "Async Capable": false,
              "Join Type": "Right",
              "Startup Cost": 3.00,
              "Total Cost": 5.88,
              "Plan Rows": 50,
              "Plan Width": 16,
              "Output": ["this_week.week", "this_week.\"position\"", "this_week.song_id", "prev_week.\"position\""],
              "Inner Unique": false,
              "Hash Cond": "((prev_week.song_id = this_week.song_id) AND (prev_week.week = (this_week.week - 1)))",
              "Plans": [
                {
                  "Node Type": "Seq Scan",
                  "Parent Relationship": "Outer",
                  "Parallel Aware": false,
                  "Async Capable": false,
                  "Relation Name": "top50",
                  "Schema": "public",
                  "Alias": "prev_week",
                  "Startup Cost": 0.00,
                  "Total Cost": 2.00,
                  "Plan Rows": 100,
                  "Plan Width": 12,
                  "Output": ["prev_week.id", "prev_week.year", "prev_week.week", "prev_week.song_id", "prev_week.\"position\""]
                },
                {
                  "Node Type": "Hash",
                  "Parent Relationship": "Inner",
                  "Parallel Aware": false,
                  "Async Capable": false,
                  "Startup Cost": 2.25,
                  "Total Cost": 2.25,
                  "Plan Rows": 50,
                  "Plan Width": 12,
                  "Output": ["this_week.week", "this_week.\"position\"", "this_week.song_id"],
                  "Plans": [
                    {
                      "Node Type": "Seq Scan",
                      "Parent Relationship": "Outer",
                      "Parallel Aware": false,
                      "Async Capable": false,
                      "Relation Name": "top50",
                      "Schema": "public",
                      "Alias": "this_week",
                      "Startup Cost": 0.00,
                      "Total Cost": 2.25,
                      "Plan Rows": 50,
                      "Plan Width": 12,
                      "Output": ["this_week.week", "this_week.\"position\"", "this_week.song_id"],
                      "Filter": "(this_week.week = 15)"
                    }
                  ]
                }
              ]
            },
            {
              "Node Type": "Hash",
              "Parent Relationship": "Inner",
              "Parallel Aware": false,
              "Async Capable": false,
              "Startup Cost": 1.64,
              "Total Cost": 1.64,
              "Plan Rows": 64,
              "Plan Width": 29,
              "Output": ["songs.artist", "songs.title", "songs.id"],
              "Plans": [
                {
                  "Node Type": "Seq Scan",
                  "Parent Relationship": "Outer",
                  "Parallel Aware": false,
                  "Async Capable": false,
                  "Relation Name": "songs",
                  "Schema": "public",
                  "Alias": "songs",
                  "Startup Cost": 0.00,
                  "Total Cost": 1.64,
                  "Plan Rows": 64,
                  "Plan Width": 29,
                  "Output": ["songs.artist", "songs.title", "songs.id"]
                }
              ]
            }
          ]
        }
      ]
    }
  }
]

Info

spencerbart commented 2 months ago

I've run into this bug as well

KrisCarr commented 2 months ago

I see this quite often and end up using as "col_name!" as a workaround.

My understanding is that the type-checking is done on the query plan and the plan is based off the actual query executed internally in Postgres, which Postgres can mess with for optimisation.

In this case as you can see in the query plan, it's flipped your left join into a right join.

"Join Type": "Right",

While I defer to Postgres' judgement that this is indeed more optimal, this also makes the type of result columns that logically can never hold a NULL value, nullable.

This in turn throws off the type checking, causing this issue.

Not too familiar with this side of Postgres and what options are available but I wonder if the query sent up for type-checking can pass a flag or option telling Postgres not to mess with it and construct the query plan for the query as-is.