pola-rs / pyo3-polars

Plugins/extension for Polars
MIT License
232 stars 38 forks source link

LazyFrame::anonymous_scan can't be send to python #67

Open Vincenthays opened 6 months ago

Vincenthays commented 6 months ago

Hello I'm trying to create a module with maturin and LazyFrame::anonymous_scan Compilation works just fine but unfortunately it crashes at runtime The goal is to implement projection_pushdown and predicate_pushdown once it's working

src/lib.rs

use polars::prelude::*;
use pyo3::prelude::*;
use pyo3_polars::PyLazyFrame;
use std::any::Any;

#[pymodule]
fn bigtable2(_py: Python, m: &PyModule) -> PyResult<()> {
    m.add_function(wrap_pyfunction!(get, m)?)?;
    Ok(())
}

#[pyfunction(name = "test")]
pub fn get() -> PyLazyFrame {
    LazyFrame::anonymous_scan(
        Arc::new(BigtableScan {}),
        ScanArgsAnonymous {
            schema: Some(Arc::new(Schema::from_iter([Field::new(
                "a",
                DataType::UInt32,
            )]))),
            ..Default::default()
        },
    )
    .map(PyLazyFrame)
    // .inspect(|lf| println!("{:?}", lf.0.logical_plan))
    .expect("anonymous_scan error")
}

pub struct BigtableScan {}
impl AnonymousScan for BigtableScan {
    fn as_any(&self) -> &dyn Any {
        unimplemented!()
    }

    fn scan(&self, _scan_opts: AnonymousScanArgs) -> PolarsResult<DataFrame> {
        df!("a" => [1u32, 2u32])
    }
}

Cargo.toml

[package]
name = "bigtable2"
version = "0.0.1"
edition = "2021"

[lib]
name = "bigtable2"
crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.20.2", features = ["extension-module"] }
pyo3-polars = { version = "0.11.3", features = ["lazy"] }
polars = { version = "0.37.0", features = ["lazy"] }

rust-toolchain.toml

[toolchain]
channel = "nightly-2024-01-24"
profile = "minimal"

Terminal

> maturin develop # works fine
> python3 -c "import bigtable2; print(bigtable2.test())"
thread '<unnamed>' panicked at /Users/<USER>/.cargo/registry/src/index.crates.io-6f17d22bba15001f/pyo3-polars-0.11.3/src/lib.rs:244:71:
called `Result::unwrap()` on an `Err` value: Value("the enum variant FileScan::Anonymous cannot be serialized")
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
Traceback (most recent call last):
  File "<string>", line 1, in <module>
pyo3_runtime.PanicException: called `Result::unwrap()` on an `Err` value: Value("the enum variant FileScan::Anonymous cannot be serialized")
Vincenthays commented 6 months ago

Ok I understand, it can't be serialize serialized because of the dyn AnonymousScan

// crates/polars-plan/src/logical_plan/file_scan.rs

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum FileScan {
    ...
    #[cfg_attr(feature = "serde", serde(skip))]
    Anonymous {
        options: Arc<AnonymousScanOptions>,
        function: Arc<dyn AnonymousScan>,
    },
}