pola-rs / pyo3-polars

Plugins/extension for Polars
MIT License
232 stars 38 forks source link

Sorted flag not preserved when returning the PyDataFrame to Rust #51

Open MichalLebeda opened 8 months ago

MichalLebeda commented 8 months ago

After sorting DataFrame in Python, the sorted flag is not preserved and is set to IsSorted::Not value in Rust.

Cargo.toml:

[package]
name = "polars_sorted_flag"
version = "0.1.0"
edition = "2021"

[dependencies]
pyo3-polars = "0.9.0"
polars = "0.35.4"
pyo3 = "0.20.0"

main.rs:

use pyo3::{Py, PyAny, Python};
use pyo3::types::PyModule;
use pyo3_polars::PyDataFrame;

fn main() {
    pyo3::prepare_freethreaded_python();

    Python::with_gil(|py| {
        let code = "
import polars as pl
def get_sorted_df():
    df = pl.DataFrame(
        {
            'a': [9, 2, 0],
            'b': [6.0, 5.0, 4.0],
            'c': ['a', 'c', 'b'],
        }
    )
    return df.sort('a')

def get_is_sorted():
    df = get_sorted_df()
    return str(df['a'].is_sorted())
";

        let py_module = PyModule::from_code(py, code, "", "").unwrap();

        let get_sorted_df: Py<PyAny> = py_module.getattr("get_sorted_df").unwrap().into();
        let df = get_sorted_df.call0(py).unwrap().extract::<PyDataFrame>(py).unwrap().0;
        println!("df: {:?}", df);

        let is_sorted_flag = df.column("a").unwrap().is_sorted_flag();
        println!("Rust is_sorted_flag(): {:?}", is_sorted_flag);

        let get_is_sorted: Py<PyAny> = py_module.getattr("get_is_sorted").unwrap().into();
        let py_is_sorted = get_is_sorted.call0(py).unwrap().extract::<String>(py).unwrap();
        println!("Python is_sorted(): {:?}", py_is_sorted);
    })
}

Output:

df: shape: (3, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 0   ┆ 4.0 ┆ b   │
│ 2   ┆ 5.0 ┆ c   │
│ 9   ┆ 6.0 ┆ a   │
└─────┴─────┴─────┘
Rust is_sorted_flag(): Not
Python is_sorted(): "True"

BTW, how to properly format the Python script while avoiding Unexpected indent?