cynkra / constructive

Display Idiomatic Code to Construct Most R Objects
https://cynkra.github.io/constructive
Other
127 stars 6 forks source link

methods for tbl_lazy and connections #150

Open moodymudskipper opened 1 year ago

moodymudskipper commented 1 year ago

A tbl_lazy is probably called by tbl(con, "my_table") if there is no query, if there is a query I believe the dplyr code is stored somewhere in the object so we might use it, we might also use the sql with something like tbl(con, sql(...)) (IIRC how it works).

so I think constructors are "dplyr" and "sql" (and either way we just take the name of the table if there is no query. Need to use dbplyr::in_schema() too if relevant.

connection objects are trickier. 1) We might just use con and user is supposed to know how they do it 2) We construct the connection code, but I'm not sure credentials are stored in the object, and if so we might want to hide them by default, using "password" etc for instance. *3) We might construct a local DB with the data but then it has to be SQLite and the dbplyr code (and the sql to a greater extent) won't be robust anymore.

method 2) with password is the only one that would produce an equivalent object.

moodymudskipper commented 1 year ago
library(constructive)
library(dplyr)

con <- DBI::dbConnect(RSQLite::SQLite())

# The connection object is beautifully handled by our S4 support
# we might use directly the DBI::dbConnect(<driver>,) syntax, though it will
# necessarily create a new pointer (and a new env but waldo will consider those equivalent)
construct(con)
#> new(
#>   "SQLiteConnection" |>
#>     structure(package = "RSQLite"),
#>   ptr = constructive::external_pointer("0x10e708290"),
#>   dbname = "",
#>   loadable.extensions = TRUE,
#>   flags = 70L,
#>   vfs = "",
#>   ref = constructive::env("0x10af7ce28", parents = "empty"),
#>   bigint = "integer64",
#>   extended_types = FALSE
#> )

DBI::dbWriteTable(con, "cars", cars)

# Maybe the class of the nested lazy query <lazy_base_remote_query/lazy_base_query/lazy_query> 
# indicates that there is no query as long as unclass(object$lazy_query$x) is a name,
# so we might construct tbl(con, "cars") accurately here
construct(tbl(con, "cars"), data = list(con = con))
#> list(
#>   src = list(con = con, disco = NULL) |>
#>     structure(class = c("src_SQLiteConnection", "src_dbi", "src_sql", "src")),
#>   lazy_query = list(
#>     x = "cars" |>
#>       structure(class = c("ident", "character")),
#>     vars = c("speed", "dist"),
#>     group_vars = character(0),
#>     order_vars = NULL,
#>     frame = NULL
#>   ) |>
#>     structure(class = c("lazy_base_remote_query", "lazy_base_query", "lazy_query"))
#> ) |>
#>   structure(class = c("tbl_SQLiteConnection", "tbl_dbi", "tbl_sql", "tbl_lazy", "tbl"))

# Here we see the class of the lazy query change into "<lazy_select_query/lazy_query>"
# the filter clause ends up in object$lazy_quey$where but it's not clear how we could reconstruct
# the query from there
waldo::compare(
  max_diffs = Inf,
  construct(tbl(con, "cars")),
  construct(tbl(con, "cars") |> filter(speed < 10))
)
#> old$code[16:28] vs new$code[16:48]
#>   "  ) |>"
#>   "    structure(class = c(\"src_SQLiteConnection\", \"src_dbi\", \"src_sql\", \"src\")),"
#>   "  lazy_query = list("
#> - "    x = \"cars\" |>"
#> - "      structure(class = c(\"ident\", \"character\")),"
#> - "    vars = c(\"speed\", \"dist\"),"
#> + "    x = list("
#> + "      x = \"cars\" |>"
#> + "        structure(class = c(\"ident\", \"character\")),"
#> + "      vars = c(\"speed\", \"dist\"),"
#> + "      group_vars = character(0),"
#> + "      order_vars = NULL,"
#> + "      frame = NULL"
#> + "    ) |>"
#> + "      structure(class = c(\"lazy_base_remote_query\", \"lazy_base_query\", \"lazy_query\")),"
#> + "    select = tibble::tibble("
#> + "      name = c(\"speed\", \"dist\"),"
#> + "      expr = list(quote(speed), quote(dist)),"
#> + "      group_vars = list(character(0), character(0)),"
#> + "      order_vars = list(NULL, NULL),"
#> + "      frame = list(NULL, NULL),"
#> + "    ),"
#> + "    where = list(rlang::new_quosure(quote(speed < 10))),"
#> + "    group_by = NULL,"
#> + "    order_by = NULL,"
#> + "    distinct = FALSE,"
#> + "    limit = NULL,"
#> + "    select_operation = \"select\","
#> + "    message_summarise = NULL,"
#>   "    group_vars = character(0),"
#>   "    order_vars = NULL,"
#>   "    frame = NULL"
#>   "  ) |>"
#> - "    structure(class = c(\"lazy_base_remote_query\", \"lazy_base_query\", \"lazy_query\"))"
#> + "    structure(class = c(\"lazy_select_query\", \"lazy_query\"))"
#>   ") |>"
#>   "  structure(class = c(\"tbl_SQLiteConnection\", \"tbl_dbi\", \"tbl_sql\", \"tbl_lazy\", \"tbl\"))"

# note that it is again a object
# but lazy_query$x contains the query here, so we might be able to reconstruct this call
# as well
construct(tbl(con,  sql("SELECT *\nFROM `cars`\nWHERE (`speed` < 10.0)")), data = list(con = con))
#> list(
#>   src = list(con = con, disco = NULL) |>
#>     structure(class = c("src_SQLiteConnection", "src_dbi", "src_sql", "src")),
#>   lazy_query = list(
#>     x = "SELECT *\nFROM `cars`\nWHERE (`speed` < 10.0)" |>
#>       structure(class = c("sql", "character")),
#>     vars = c("speed", "dist"),
#>     group_vars = character(0),
#>     order_vars = NULL,
#>     frame = NULL
#>   ) |>
#>     structure(class = c("lazy_base_remote_query", "lazy_base_query", "lazy_query"))
#> ) |>
#>   structure(class = c("tbl_SQLiteConnection", "tbl_dbi", "tbl_sql", "tbl_lazy", "tbl"))

# the lazy query is a recursive element, it would be muche asier to follow if the list
# calls were piped.
# note that dbplyr has constructors lazy_query and lazy_*_query that can be used too, we can pipe them by default
# lazy_query is just a list with a class so it's an easy low hanging fruit.
construct(tbl(con, "cars") |> filter(speed < mean(speed)) |> group_by(dist) |> mutate(foo = mean(dist)), data = list(con = con))
#> {constructive} couldn't create code that reproduces perfectly the input
#> ℹ Call `construct_issues()` to inspect the last issues
#> list(
#>   src = list(con = con, disco = NULL) |>
#>     structure(class = c("src_SQLiteConnection", "src_dbi", "src_sql", "src")),
#>   lazy_query = list(
#>     x = list(
#>       x = list(
#>         x = "cars" |>
#>           structure(class = c("ident", "character")),
#>         vars = c("speed", "dist"),
#>         group_vars = character(0),
#>         order_vars = NULL,
#>         frame = NULL
#>       ) |>
#>         structure(class = c("lazy_base_remote_query", "lazy_base_query", "lazy_query")),
#>       select = tibble::tibble(
#>         name = c("speed", "dist", "q06"),
#>         expr = list(
#>           quote(speed),
#>           quote(dist),
#>           rlang::new_quosure(
#>             quote(mean(speed)),
#>             constructive::env("0x104c59390", parents = "namespace:dbplyr")
#>           )
#>         ),
#>         group_vars = list(character(0), character(0), character(0)),
#>         order_vars = list(NULL, NULL, NULL),
#>         frame = list(NULL, NULL, NULL),
#>       ),
#>       where = NULL,
#>       group_by = NULL,
#>       order_by = NULL,
#>       distinct = FALSE,
#>       limit = NULL,
#>       select_operation = "mutate",
#>       message_summarise = NULL,
#>       group_vars = character(0),
#>       order_vars = NULL,
#>       frame = NULL
#>     ) |>
#>       structure(class = c("lazy_select_query", "lazy_query")),
#>     select = tibble::tibble(
#>       name = c("speed", "dist", "foo"),
#>       expr = list(quote(speed), quote(dist), rlang::new_quosure(quote(mean(dist)))),
#>       group_vars = list("dist", "dist", "dist"),
#>       order_vars = list(NULL, NULL, NULL),
#>       frame = list(NULL, NULL, NULL),
#>     ),
#>     where = list(quote(speed < q06)),
#>     group_by = NULL,
#>     order_by = NULL,
#>     distinct = FALSE,
#>     limit = NULL,
#>     select_operation = "select",
#>     message_summarise = NULL,
#>     group_vars = "dist",
#>     order_vars = NULL,
#>     frame = NULL
#>   ) |>
#>     structure(class = c("lazy_select_query", "lazy_query"))
#> ) |>
#>   structure(class = c("tbl_SQLiteConnection", "tbl_dbi", "tbl_sql", "tbl_lazy", "tbl"))

# we also have an environment issue above worth investigating
construct_issues()
#> `attr(original$lazy_query$select$expr[[3]], '.Environment')` is <env:global>
#> `attr(recreated$lazy_query$select$expr[[3]], '.Environment')` is <env:0x10af27130>

# actually rebuilding the dbplyr code is probably a very deep rabbit hole, and considering
# the fst evolution of dbplyr the code is likely to not be stable.

Created on 2023-05-29 with reprex v2.0.2