pola-rs / pyo3-polars

Plugins/extension for Polars
MIT License
232 stars 38 forks source link

Polars 0.26 rc1: plugins panic when passing String input #61

Closed MarcoGorelli closed 6 months ago

MarcoGorelli commented 6 months ago

To reproduce:

Cargo.toml:

[package]
name = "minimal_plugin"
version = "0.1.0"
edition = "2021"

[lib]
name = "minimal_plugin"
crate-type= ["cdylib"]

[dependencies]
pyo3 = { version = "0.20.0", features = ["extension-module"] }
pyo3-polars = { version = "0.10.0", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
polars = { version = "0.36.2", default-features = false }

[target.'cfg(target_os = "linux")'.dependencies]
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }

pyproject.toml:

[build-system]
requires = ["maturin>=1.0,<2.0"]
build-backend = "maturin"

[project]
name = "minimal_plugin"  # Should match the folder with your code!
requires-python = ">=3.8"
classifiers = [
  "Programming Language :: Rust",
  "Programming Language :: Python :: Implementation :: CPython",
  "Programming Language :: Python :: Implementation :: PyPy",
]

minimal_plugin/__init__.py:

import polars as pl
from polars.utils.udfs import _get_shared_lib_location
from polars.type_aliases import IntoExpr

lib = _get_shared_lib_location(__file__)

def noop(expr: pl.Expr) -> pl.Expr:
    return expr.register_plugin(
        lib=lib,
        symbol="noop",
        is_elementwise=True,
    )

src/expressions.rs

#![allow(clippy::unused_unit)]
use polars::prelude::arity::binary_elementwise;
use polars::prelude::*;
use pyo3_polars::derive::polars_expr;

fn same_output_type(input_fields: &[Field]) -> PolarsResult<Field> {
    let field = &input_fields[0];
    Ok(field.clone())
}

#[polars_expr(output_type_func=same_output_type)]
fn noop(inputs: &[Series]) -> PolarsResult<Series> {
    let s = &inputs[0];
    Ok(s.clone())
}

src/lib.rs:

mod expressions;

#[cfg(target_os = "linux")]
use jemallocator::Jemalloc;

#[global_allocator]
#[cfg(target_os = "linux")]
static ALLOC: Jemalloc = Jemalloc;

run.py

import polars as pl
import minimal_plugin as mp

df = pl.DataFrame({'a': ['bob', 'billy']})
print(df.with_columns(mp.noop(pl.col('a'))))

This gives:

$ POLARS_VERBOSE=1  python run.py 
panicked at src/expressions.rs:17:1:
called `Result::unwrap()` on an `Err` value: ComputeError(ErrString("The datatype \"vu\" is still not supported in Rust implementation"))
Traceback (most recent call last):
  File "/home/marcogorelli/polars-plugins-minimal-examples/run.py", line 5, in <module>
    print(df.with_columns(mp.noop('a')))
  File "/home/marcogorelli/polars-plugins-minimal-examples/venv/lib/python3.10/site-packages/polars/dataframe/frame.py", line 8281, in with_columns
    return self.lazy().with_columns(*exprs, **named_exprs).collect(_eager=True)
  File "/home/marcogorelli/polars-plugins-minimal-examples/venv/lib/python3.10/site-packages/polars/lazyframe/frame.py", line 1730, in collect
    return wrap_df(ldf.collect())
polars.exceptions.ComputeError: the plugin panicked

The message is suppressed. Set POLARS_VERBOSE=1 to send the panic message to stderr.

Error originated just after this operation:
DF ["a"]; PROJECT */1 COLUMNS; SELECTION: "None"
MarcoGorelli commented 6 months ago

from Discord, this just requires a new Rust release

ritchie46 commented 6 months ago

This however breaks plugins for older versions. I don't think a copy to the old string type is acceptable for the plugins, so we must raise a informing error for the old release. Didn't anticipate this. But this break is worth it.