launchbadge / sqlx

🧰 The Rust SQL Toolkit. An async, pure Rust SQL crate featuring compile-time checked queries without a DSL. Supports PostgreSQL, MySQL, and SQLite.
Apache License 2.0
13.31k stars 1.26k forks source link

Left join with a `WHERE` condition falsely marks columns as non-nullable #3539

Open musjj opened 3 weeks ago

musjj commented 3 weeks ago

Bug Description

When there's a WHERE condition in a left join query, non-null columns from the right table is always marked as non-nullable (in left joins, they should always be nullable).

Minimal Reproduction

The schema:

CREATE TABLE foo (
    id BIGSERIAL PRIMARY KEY
);

CREATE TABLE bar (
    foo_id BIGSERIAL PRIMARY KEY REFERENCES foo (id)
);

With a simple left join, everything works fine:

let rec = sqlx::query!(
    "
    SELECT
        foo.id,
        bar.foo_id
    FROM
        foo
        LEFT JOIN bar ON bar.foo_id = foo.id
    "
)
.fetch_one(&pool)
.await?;

let _: Option<i64> = rec.foo_id; // checks out

But things goes awry when you introduce a WHERE condition:

let rec = sqlx::query!(
    "
    SELECT
        foo.id,
        bar.foo_id
    FROM
        foo
        LEFT JOIN bar ON bar.foo_id = foo.id
    WHERE
        foo.id = $1
    ",
    5
)
.fetch_one(&pool)
.await?;

let _: i64 = rec.foo_id; // hmmm

The issue doesn't occur if you use AND instead:

let rec = sqlx::query!(
    "
    SELECT
        foo.id,
        bar.foo_id
    FROM
        foo
        LEFT JOIN bar ON bar.foo_id = foo.id
        AND foo.id = $1
    ",
    5
)
.fetch_one(&pool)
.await?;

let _: Option<i64> = rec.foo_id;

The full reproduction repo can be found here: https://github.com/musjj/sqlx-join-bug

Execution plans of all relevant queries:

EXPLAIN (VERBOSE, FORMAT JSON) SELECT foo.id, bar.foo_id FROM foo LEFT JOIN bar ON bar.foo_id = foo.id; ```json QUERY PLAN ----------------------------------------------- [ + { + "Plan": { + "Node Type": "Hash Join", + "Parallel Aware": false, + "Async Capable": false, + "Join Type": "Left", + "Startup Cost": 60.85, + "Total Cost": 99.39, + "Plan Rows": 2260, + "Plan Width": 16, + "Output": ["foo.id", "bar.foo_id"], + "Inner Unique": true, + "Hash Cond": "(foo.id = bar.foo_id)", + "Plans": [ + { + "Node Type": "Seq Scan", + "Parent Relationship": "Outer", + "Parallel Aware": false, + "Async Capable": false, + "Relation Name": "foo", + "Schema": "public", + "Alias": "foo", + "Startup Cost": 0.00, + "Total Cost": 32.60, + "Plan Rows": 2260, + "Plan Width": 8, + "Output": ["foo.id"] + }, + { + "Node Type": "Hash", + "Parent Relationship": "Inner", + "Parallel Aware": false, + "Async Capable": false, + "Startup Cost": 32.60, + "Total Cost": 32.60, + "Plan Rows": 2260, + "Plan Width": 8, + "Output": ["bar.foo_id"], + "Plans": [ + { + "Node Type": "Seq Scan", + "Parent Relationship": "Outer",+ "Parallel Aware": false, + "Async Capable": false, + "Relation Name": "bar", + "Schema": "public", + "Alias": "bar", + "Startup Cost": 0.00, + "Total Cost": 32.60, + "Plan Rows": 2260, + "Plan Width": 8, + "Output": ["bar.foo_id"] + } + ] + } + ] + } + } + ] (1 row) ```
EXPLAIN (VERBOSE, FORMAT JSON) SELECT foo.id, bar.foo_id FROM foo LEFT JOIN bar ON bar.foo_id = foo.id WHERE foo.id = 123; ```json QUERY PLAN ---------------------------------------------- [ + { + "Plan": { + "Node Type": "Nested Loop", + "Parallel Aware": false, + "Async Capable": false, + "Join Type": "Left", + "Startup Cost": 0.31, + "Total Cost": 16.36, + "Plan Rows": 1, + "Plan Width": 16, + "Output": ["foo.id", "bar.foo_id"], + "Inner Unique": true, + "Plans": [ + { + "Node Type": "Index Only Scan", + "Parent Relationship": "Outer", + "Parallel Aware": false, + "Async Capable": false, + "Scan Direction": "Forward", + "Index Name": "foo_pkey", + "Relation Name": "foo", + "Schema": "public", + "Alias": "foo", + "Startup Cost": 0.15, + "Total Cost": 8.17, + "Plan Rows": 1, + "Plan Width": 8, + "Output": ["foo.id"], + "Index Cond": "(foo.id = 123)" + }, + { + "Node Type": "Index Only Scan", + "Parent Relationship": "Inner", + "Parallel Aware": false, + "Async Capable": false, + "Scan Direction": "Forward", + "Index Name": "bar_pkey", + "Relation Name": "bar", + "Schema": "public", + "Alias": "bar", + "Startup Cost": 0.15, + "Total Cost": 8.17, + "Plan Rows": 1, + "Plan Width": 8, + "Output": ["bar.foo_id"], + "Index Cond": "(bar.foo_id = 123)"+ } + ] + } + } + ] (1 row) ```
EXPLAIN (VERBOSE, FORMAT JSON) SELECT foo.id, bar.foo_id FROM foo LEFT JOIN bar ON bar.foo_id = foo.id AND foo.id = 123; ```json QUERY PLAN ----------------------------------------------- [ + { + "Plan": { + "Node Type": "Hash Join", + "Parallel Aware": false, + "Async Capable": false, + "Join Type": "Left", + "Startup Cost": 60.85, + "Total Cost": 99.38, + "Plan Rows": 2260, + "Plan Width": 16, + "Output": ["foo.id", "bar.foo_id"], + "Inner Unique": true, + "Hash Cond": "(foo.id = bar.foo_id)", + "Join Filter": "(foo.id = 123)", + "Plans": [ + { + "Node Type": "Seq Scan", + "Parent Relationship": "Outer", + "Parallel Aware": false, + "Async Capable": false, + "Relation Name": "foo", + "Schema": "public", + "Alias": "foo", + "Startup Cost": 0.00, + "Total Cost": 32.60, + "Plan Rows": 2260, + "Plan Width": 8, + "Output": ["foo.id"] + }, + { + "Node Type": "Hash", + "Parent Relationship": "Inner", + "Parallel Aware": false, + "Async Capable": false, + "Startup Cost": 32.60, + "Total Cost": 32.60, + "Plan Rows": 2260, + "Plan Width": 8, + "Output": ["bar.foo_id"], + "Plans": [ + { + "Node Type": "Seq Scan", + "Parent Relationship": "Outer",+ "Parallel Aware": false, + "Async Capable": false, + "Relation Name": "bar", + "Schema": "public", + "Alias": "bar", + "Startup Cost": 0.00, + "Total Cost": 32.60, + "Plan Rows": 2260, + "Plan Width": 8, + "Output": ["bar.foo_id"] + } + ] + } + ] + } + } + ] (1 row) ```

Info

fastfailure commented 1 day ago

It seems the same as https://github.com/launchbadge/sqlx/issues/3408