PRQL / prql

PRQL is a modern language for transforming data — a simple, powerful, pipelined SQL replacement
https://prql-lang.org
Apache License 2.0
9.84k stars 216 forks source link

Compiler panic with `sort` in inline join relation #4063

Open kgutwin opened 8 months ago

kgutwin commented 8 months ago

What happened?

The PRQL query below results in the following message:

The application panicked (crashed).
Message:  cannot find cid by id=121
Location: prqlc/prql-compiler/src/semantic/lowering.rs:949
Full backtrace ``` The application panicked (crashed). Message: cannot find cid by id=121 Location: prqlc/prql-compiler/src/semantic/lowering.rs:949 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ BACKTRACE ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ⋮ 13 frames hidden ⋮ 14: prql_compiler::semantic::lowering::Lowerer::lookup_cid::h2d39b5439c15c5cb at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/semantic/lowering.rs:949 947 │ } 948 │ } 949 > None => panic!("cannot find cid by id={id}"), 950 │ }; 951 │ 15: prql_compiler::semantic::lowering::Lowerer::lower_expr::h43150ba13889efe1 at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/semantic/lowering.rs:818 816 │ } else if let Some(id) = expr.target_id { 817 │ // base case: column ref 818 > let cid = self.lookup_cid(id, Some(&ident.name)).with_span(span)?; 819 │ 820 │ rq::ExprKind::ColumnRef(cid) 16: prql_compiler::semantic::lowering::Lowerer::declare_as_column::hf30114d4b1d0c339 at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/semantic/lowering.rs:753 751 │ 752 │ // lower 753 > let expr = self.lower_expr(expr_ast)?; 754 │ 755 │ // don't create new ColumnDef if expr is just a ColumnRef with no renaming 17: prql_compiler::semantic::lowering::Lowerer::lower_sorts::{{closure}}::hf8b91e5a417d6ef8 at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/semantic/lowering.rs:580 578 │ by.into_iter() 579 │ .map(|ColumnSort { column, direction }| { 580 > let column = self.declare_as_column(*column, false)?; 581 │ Ok(ColumnSort { direction, column }) 582 │ }) 18: core::iter::adapters::map::map_try_fold::{{closure}}::he0b15f4de2861127 at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/adapters/map.rs:91 19: core::iter::traits::iterator::Iterator::try_fold::hb7a594574057407d at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/traits/iterator.rs:2461 20: as core::iter::traits::iterator::Iterator>::try_fold::h72778778608af965 at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/adapters/map.rs:117 21: as core::iter::traits::iterator::Iterator>::try_fold::hcba8449c91e94c76 at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/adapters/mod.rs:199 22: >::collect_in_place::h94878eb6dd18bc3b at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/alloc/src/vec/in_place_collect.rs:258 23: alloc::vec::in_place_collect:: for alloc::vec::Vec>::from_iter::h500d49fd8d8a3e5e at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/alloc/src/vec/in_place_collect.rs:182 24: as core::iter::traits::collect::FromIterator>::from_iter::h50ae6a4fe2fde6ed at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/alloc/src/vec/mod.rs:2749 25: core::iter::traits::iterator::Iterator::collect::hf2ca0ca1a820771e at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/traits/iterator.rs:2053 26: as core::iter::traits::collect::FromIterator>>::from_iter::{{closure}}::h01961685e36c2b5c at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/result.rs:1933 27: core::iter::adapters::try_process::h945b3ac383f3f193 at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/adapters/mod.rs:168 28: as core::iter::traits::collect::FromIterator>>::from_iter::hc5fadb63a4aed165 at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/result.rs:1933 29: core::iter::traits::iterator::Iterator::collect::heaae315b84747def at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/iter/traits/iterator.rs:2053 30: itertools::Itertools::try_collect::hff9980339b4dfb8d at /Users/kgutwin/.cargo/registry/src/index.crates.io-6f17d22bba15001f/itertools-0.12.0/src/lib.rs:2224 2222 │ Result: FromIterator>, 2223 │ { 2224 > self.collect() 2225 │ } 2226 │ 31: prql_compiler::semantic::lowering::Lowerer::lower_sorts::he61534cb19f04c69 at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/semantic/lowering.rs:578 576 │ 577 │ fn lower_sorts(&mut self, by: Vec>>) -> Result>> { 578 > by.into_iter() 579 │ .map(|ColumnSort { column, direction }| { 580 │ let column = self.declare_as_column(*column, false)?; 32: prql_compiler::semantic::lowering::Lowerer::lower_pipeline::hd5286dcbc239425a at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/semantic/lowering.rs:491 489 │ vec![] 490 │ }, 491 > sort: self.lower_sorts(transform_call.sort)?, 492 │ }; 493 │ self.window = Some(window); 33: prql_compiler::semantic::lowering::Lowerer::lower_relation::hbba58a6ae4f215f5 at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/semantic/lowering.rs:439 437 │ let prev_pipeline = self.pipeline.drain(..).collect_vec(); 438 │ 439 > self.lower_pipeline(expr, None)?; 440 │ 441 │ let mut transforms = self.pipeline.drain(..).collect_vec(); 34: prql_compiler::semantic::lowering::Lowerer::lower_table_decl::h7563b7df48cb13b3 at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/semantic/lowering.rs:174 172 │ TableExpr::RelationVar(expr) => { 173 │ // a CTE 174 > (self.lower_relation(*expr)?, Some(fq_ident.name.clone())) 175 │ } 176 │ TableExpr::LocalTable => extern_ref_to_relation(columns, &fq_ident), 35: prql_compiler::semantic::lowering::lower_to_ir::h40152dd8ddb4ec74 at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/semantic/lowering.rs:58 56 │ let is_main = fq_ident == main_ident; 57 │ 58 > l.lower_table_decl(table, fq_ident)?; 59 │ 60 │ if is_main { 36: prql_compiler::semantic::resolve_and_lower::h4a8a4aa83f7c700a at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/semantic/mod.rs:32 30 │ let root_mod = resolve(file_tree, Default::default())?; 31 │ 32 > let (query, _) = lowering::lower_to_ir(root_mod, main_path)?; 33 │ Ok(query) 34 │ } 37: prql_compiler::pl_to_rq_tree::h94cbc4f091e580d7 at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/lib.rs:302 300 │ main_path: &[String], 301 │ ) -> Result { 302 > semantic::resolve_and_lower(pl, main_path).map_err(error_message::downcast) 303 │ } 304 │ 38: prqlc::cli::Command::execute::{{closure}}::hfb77e93c70be3288 at /Users/kgutwin/github/prql/prqlc/prqlc/src/cli.rs:401 399 │ 400 │ prql_to_pl_tree(sources) 401 > .and_then(|pl| pl_to_rq_tree(pl, &main_path)) 402 │ .and_then(|rq| rq_to_sql(rq, &opts)) 403 │ .map_err(|e| e.composed(sources))? 39: core::result::Result::and_then::h67710fe60ca55207 at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/result.rs:1320 40: prqlc::cli::Command::execute::h81aad01959f0ac7d at /Users/kgutwin/github/prql/prqlc/prqlc/src/cli.rs:400 398 │ .with_format(*format); 399 │ 400 > prql_to_pl_tree(sources) 401 │ .and_then(|pl| pl_to_rq_tree(pl, &main_path)) 402 │ .and_then(|rq| rq_to_sql(rq, &opts)) 41: prqlc::cli::Command::run_io_command::he0fe31d8f62ecef1 at /Users/kgutwin/github/prql/prqlc/prqlc/src/cli.rs:259 257 │ let (mut file_tree, main_path) = self.read_input()?; 258 │ 259 > self.execute(&mut file_tree, &main_path) 260 │ .and_then(|buf| Ok(self.write_output(&buf)?)) 261 │ } 42: prqlc::cli::Command::run::h6d437a495f3f9194 at /Users/kgutwin/github/prql/prqlc/prqlc/src/cli.rs:238 236 │ Ok(()) 237 │ } 238 > _ => self.run_io_command(), 239 │ } 240 │ } 43: prqlc::cli::main::hc79b9c103a70b62e at /Users/kgutwin/github/prql/prqlc/prqlc/src/cli.rs:38 36 │ cli.color.write_global(); 37 │ 38 > if let Err(error) = cli.command.run() { 39 │ eprintln!("{error}"); 40 │ // Copied from 44: prqlc::main::h2f44fa02b5e526e7 at /Users/kgutwin/github/prql/prqlc/prqlc/src/main.rs:16 14 │ #[cfg(not(target_family = "wasm"))] 15 │ fn main() -> color_eyre::eyre::Result<()> { 16 > cli::main() 17 │ } 18 │ 45: core::ops::function::FnOnce::call_once::h820ed851c883d382 at /rustc/79e9716c980570bfd1f666e3b16ac583f0168962/library/core/src/ops/function.rs:250 ⋮ 13 frames hidden ⋮ ```

However, when the join relation is expressed as a variable, the panic does not happen.

let from_b = (from b | sort { -xyz })

from a
join side:left map=from_b (this.column == that.key)

Adding an explicit definition of the column within the inline relation definition (from b | select { key, xyz } | sort { -xyz }) does not avoid the panic.

PRQL input

from a
join map=(from b | sort { -xyz }) (==key)

SQL output

-- this is from the second, non-panic example
WITH from_b AS (
  SELECT
    *
  FROM
    b
)
SELECT
  a.*,
  map.*
FROM
  a
  LEFT JOIN from_b AS map ON a."key" = map."key"

-- Generated by PRQL compiler version:0.11.2 (https://prql-lang.org)

Expected SQL output

No response

MVCE confirmation

  • [X] Minimal example
  • [X] New issue

Anything else?

Built from git, at 36219a1.

As an aside, I noticed that even without the panic, the sort does not translate into an ORDER BY in the from_b CTE. Is this intentional, given the ordering guarantee design of PRQL? I tried to review how ordering works in PRQL in the documentation as well as through some relevant issue discussions, but admit that I still find it a bit confusing, especially around joins.

max-sixty commented 8 months ago

Thanks for the issue — I think this is a dupe of the issue from https://github.com/PRQL/prql/pull/4037...

max-sixty commented 8 months ago

As an aside, I noticed that even without the panic, the sort does not translate into an ORDER BY in the from_b CTE.

Right, good spot, the result should have an ORDER BY.

Assuming this is a duplicate, shall we copy-paste that narrower example into a new issue?

kgutwin commented 8 months ago

The panic is in the same spot as #4037 but I'm guessing the cause is different. The backtrace shows that the problem starts when lowering the "sorts" that are defined in the pipeline:

  32: prql_compiler::semantic::lowering::Lowerer::lower_pipeline::hd5286dcbc239425a
      at /Users/kgutwin/github/prql/prqlc/prql-compiler/src/semantic/lowering.rs:491
       489 │                 vec![]
       490 │             },
       491 >             sort: self.lower_sorts(transform_call.sort)?,
       492 │         };
       493 │         self.window = Some(window);

I think there's an issue related to the order of the lowering process in lower_pipeline() somehow. I ran the compiler with RUST_LOG=debug and got more informative messages:

[ ... snip ... ]
[DEBUG prql_compiler::semantic::lowering] lookup for main pipeline in []
[DEBUG prql_compiler::semantic::lowering] lowering table None, columns = [Single(Some("xyz")), Single(Some("key")), Wildcard]
[DEBUG prql_compiler::semantic::lowering] lowering table None, columns = [Single(Some("key")), Wildcard]
[DEBUG prql_compiler::semantic::lowering] lowering an instance of table default_db.a (id=127)...
[DEBUG prql_compiler::semantic::lowering] ... columns = [(Single(Some("key")), column-0), (Wildcard, column-1)]
[DEBUG prql_compiler::semantic::lowering] lowering ident this.b.xyz (target Some(121))
The application panicked (crashed).
Message:  cannot find cid by id=121
Location: prqlc/prql-compiler/src/semantic/lowering.rs:949

So it looks like the table default_db.b is not lowered before ident this.b.xyz. I'm not sure why, though. The table is successfully resolved (earlier in the debug log):

[DEBUG prql_compiler::semantic::resolver::expr] resolving ident b...
[DEBUG prql_compiler::semantic::resolver::expr] ... resolved to default_db.b
[DEBUG prql_compiler::semantic::resolver::expr] ... which is TableDecl: [{*..}] LocalTable
[DEBUG prql_compiler::semantic::resolver::inference] instanced table default_db.b as Lineage { columns: [All { input_id: 121, except: {} }], inputs: [LineageInput { id: 121, name: "b", table: Ident { path: ["default_db"], name: "b" } }], prev_columns: [] }
[DEBUG prql_compiler::semantic::resolver::functions] resolved arg to Ident
[DEBUG prql_compiler::semantic::resolver::functions] resolved arg to Ident
[DEBUG prql_compiler::semantic::resolver::expr] resolving ident xyz...
[DEBUG prql_compiler::semantic::resolver::names] inferring xyz to be from table default_db.b
[DEBUG prql_compiler::semantic::resolver::expr] ... resolved to this.b.xyz
[DEBUG prql_compiler::semantic::resolver::expr] ... which is Column (target 121)
max-sixty commented 8 months ago

Super, thanks for tracking that down

max-sixty commented 8 months ago

FYI this is no longer a panic, instead now points to #3870