Open etiennebacher opened 3 months ago
It may helpful the tidyquery
package's code.
https://github.com/ianmcook/tidyquery
The tidyquery
package generates a dplyr query string from an SQL string and executes the actual dplyr query, but I believe the query to be displayed and the query to be executed were constructed separately (rather than being evaluated as is after the string is assembled)
Need to check how this affects performance. Building on https://stackoverflow.com/questions/78262759
library(tidypolars)
library(dplyr, warn.conflicts = FALSE)
library(tidyr, warn.conflicts = FALSE)
pl_relig_income <- as_polars_df(tidyr::relig_income)
pl_relig_income |>
pivot_longer(!religion, names_to = "income", values_to = "count") |>
drop_na() |>
arrange(religion, count) |>
show_query()
#> Pure polars expression:
#>
#> <data>$
#> melt(id_vars = "religion", value_vars = c("<$10k", "$10-20k", "$20-30k", "$30-40k", "$40-50k", "$50-75k", "$75-100k", "$100-150k", ">150k", "Don't know/refused"), variable_name = "income", value_name = "count")$sort("religion")$
#> drop_nulls(character(0))$
#> sort(c("religion", "count"), descending = c(FALSE, FALSE))
library(tidypolars)
library(dplyr, warn.conflicts = FALSE)
iris |>
as_polars_df() |>
filter(Sepal.Length > 5) |>
show_query()
#> Pure polars expression:
#>
#> <data>$
#> filter(p$col("Sepal.Length")$gt(5))
iris |>
as_polars_df() |>
filter(Species %in% c("setosa", "virginica")) |>
show_query()
#> Pure polars expression:
#>
#> <data>$
#> filter(p$col("Species")$is_in(p$lit(c("setosa", "virginica"))))
iris |>
as_polars_df() |>
filter(Sepal.Length > 5, Species %in% c("setosa", "virginica")) |>
show_query()
#> Pure polars expression:
#>
#> <data>$
#> filter(p$col("Sepal.Length")$gt(5), p$col("Species")$is_in(p$lit(c("setosa", "virginica"))))
Close #96
Example:
TODO:
bind_cols_polars()