Closed averissimo closed 6 months ago
@m7pr what do you think about the alternative code for the connector?
Using a mix of eval_code
and within
(This avoids having to escape the python code)
@averissimo I think we should rewrite the app so that it uses eval_code
that can handle comments, in which we add #@linksto
tag so that python setup code is returned for IRIS
. Would you like me to handle that, or would you like to try on your own?
I can do it myself as the code above already does it.
Do you think it's a good idea to mix eval_code
with within
? My first thought is that it looks a bit better when defining the python_code
string.
But I'm reluctant on it.
edit: copying it here without <details>
tag.
library(teal.data)
#> Loading required package: teal.code
data <- teal.code::eval_code(
teal_data(),
"
library(reticulate)
python_dependencies <- c(\"pip\", \"numpy\", \"pandas\") # @linksto IRIS
virtualenv_dir <- Sys.getenv(\"VIRTUALENV_NAME\", \"example_env_name\") # @linksto IRIS
python_path <- Sys.getenv(\"PYTHON_PATH\") # @linksto IRIS
if (python_path == \"\") {
python_path <- NULL
}
reticulate::virtualenv_create(envname = virtualenv_dir, python = python_path) # @linksto IRIS
reticulate::virtualenv_install(virtualenv_dir, packages = python_dependencies, ignore_installed = TRUE) # @linksto IRIS
reticulate::use_virtualenv(virtualenv_dir, required = TRUE) # @linksto IRIS
iris_raw <- cbind(id = seq_len(nrow(iris)), iris) # @linksto IRIS
"
)
#> virtualenv: example_env_name
#> Using virtual environment 'example_env_name' ...
data <- within(
data,
{
python_code <- "import pandas as pd
data = r.iris_raw
def svd_whiten(dat):
import numpy as np
X = np.matrix(dat)
U, s, Vt = np.linalg.svd(X, full_matrices=False)
X_white = np.dot(U, Vt)
return X_white
data_columns = data.columns
global numeric_cols_ix
global numeric_cols
numeric_cols_ix = list(range(5))[1:]
numeric_cols = [x for i,x in enumerate(data_columns) if i in numeric_cols_ix]
svd_res = svd_whiten(data.iloc[:, numeric_cols_ix])
data_new = pd.concat([data, pd.DataFrame(svd_res)], axis = 1)
data_new.columns = list(data_columns) + [i + '.whiten' for i in numeric_cols]
data_new = data_new.round(10)
data_new
"
withr::with_options(
list(reticulate.engine.environment = environment()),
value <- py_run_string(python_code)
)
IRIS <- value$data_new
}
)
datanames(data) <- c("IRIS")
rlang::hash(data[["IRIS"]])
#> [1] "e9225d33975ec7ec986485068139f7af"
get_code(data, datanames = "IRIS") |> cat()
#> library(reticulate)
#> python_dependencies <- c("pip", "numpy", "pandas")
#> virtualenv_dir <- Sys.getenv("VIRTUALENV_NAME", "example_env_name")
#> python_path <- Sys.getenv("PYTHON_PATH")
#> if (python_path == "") {
#> python_path <- NULL
#> }
#> reticulate::virtualenv_create(envname = virtualenv_dir, python = python_path)
#> reticulate::virtualenv_install(virtualenv_dir, packages = python_dependencies, ignore_installed = TRUE)
#> reticulate::use_virtualenv(virtualenv_dir, required = TRUE)
#> iris_raw <- cbind(id = seq_len(nrow(iris)), iris)
#> python_code <- "import pandas as pd\ndata = r.iris_raw\ndef svd_whiten(dat):\n import numpy as np\n X = np.matrix(dat)\n U, s, Vt = np.linalg.svd(X, full_matrices=False)\n X_white = np.dot(U, Vt)\n return X_white\n\ndata_columns = data.columns\nglobal numeric_cols_ix\nglobal numeric_cols\nnumeric_cols_ix = list(range(5))[1:]\nnumeric_cols = [x for i,x in enumerate(data_columns) if i in numeric_cols_ix]\nsvd_res = svd_whiten(data.iloc[:, numeric_cols_ix])\ndata_new = pd.concat([data, pd.DataFrame(svd_res)], axis = 1)\ndata_new.columns = list(data_columns) + [i + '.whiten' for i in numeric_cols]\ndata_new = data_new.round(10)\ndata_new\n"
#> withr::with_options(list(reticulate.engine.environment = environment()), value <- py_run_string(python_code))
#> IRIS <- value$data_new
Do you think it's a good idea to mix eval_code with within? My first thought is that it looks a bit better when defining the python_code string.
Yes, in this case this looks fine. You have my blessing!
What happened?
python connector is not reproducing the full code needed for the different modules.
Context
When running the genentech.shinyapps.io/nest_python_dev/ app the code does not reproduce the full R code
This is probably due to calling
within
in theteal_data_module
(and the code parser not recognizing dependencies)Snippet showing example of failure
``` r library(teal.data) #> Loading required package: teal.code data <- within( teal_data(), { library(reticulate) python_dependencies <- c("pip", "numpy", "pandas") virtualenv_dir <- Sys.getenv("VIRTUALENV_NAME", "example_env_name") python_path <- Sys.getenv("PYTHON_PATH") if (python_path == "") { python_path <- NULL } reticulate::virtualenv_create(envname = virtualenv_dir, python = python_path) reticulate::virtualenv_install(virtualenv_dir, packages = python_dependencies, ignore_installed = TRUE) reticulate::use_virtualenv(virtualenv_dir, required = TRUE) iris_raw <- cbind(id = seq_len(nrow(iris)), iris) python_code <- "import pandas as pd data = r.iris_raw def svd_whiten(dat): import numpy as np X = np.matrix(dat) U, s, Vt = np.linalg.svd(X, full_matrices=False) X_white = np.dot(U, Vt) return X_white data_columns = data.columns global numeric_cols_ix global numeric_cols numeric_cols_ix = list(range(5))[1:] numeric_cols = [x for i,x in enumerate(data_columns) if i in numeric_cols_ix] svd_res = svd_whiten(data.iloc[:, numeric_cols_ix]) data_new = pd.concat([data, pd.DataFrame(svd_res)], axis = 1) data_new.columns = list(data_columns) + [i + '.whiten' for i in numeric_cols] data_new = data_new.round(10) data_new " withr::with_options( list(reticulate.engine.environment = environment()), py_run_string(python_code) ) IRIS <- py$data_new } ) #> virtualenv: example_env_name #> Using virtual environment 'example_env_name' ... datanames(data) <- c("IRIS") rlang::hash(data[["IRIS"]]) #> [1] "e9225d33975ec7ec986485068139f7af" get_code(data, datanames = "IRIS") |> cat() #> library(reticulate) #> IRIS <- py$data_new ``` Created on 2024-02-19 with [reprex v2.0.2](https://reprex.tidyverse.org)Possible alternative using `teal.code::eval_code` and `# @linksto ...`
``` r library(teal.data) #> Loading required package: teal.code data <- teal.code::eval_code( teal_data(), " library(reticulate) python_dependencies <- c(\"pip\", \"numpy\", \"pandas\") # @linksto IRIS virtualenv_dir <- Sys.getenv(\"VIRTUALENV_NAME\", \"example_env_name\") # @linksto IRIS python_path <- Sys.getenv(\"PYTHON_PATH\") # @linksto IRIS if (python_path == \"\") { python_path <- NULL } reticulate::virtualenv_create(envname = virtualenv_dir, python = python_path) # @linksto IRIS reticulate::virtualenv_install(virtualenv_dir, packages = python_dependencies, ignore_installed = TRUE) # @linksto IRIS reticulate::use_virtualenv(virtualenv_dir, required = TRUE) # @linksto IRIS iris_raw <- cbind(id = seq_len(nrow(iris)), iris) # @linksto IRIS " ) #> virtualenv: example_env_name #> Using virtual environment 'example_env_name' ... data <- within( data, { python_code <- "import pandas as pd data = r.iris_raw def svd_whiten(dat): import numpy as np X = np.matrix(dat) U, s, Vt = np.linalg.svd(X, full_matrices=False) X_white = np.dot(U, Vt) return X_white data_columns = data.columns global numeric_cols_ix global numeric_cols numeric_cols_ix = list(range(5))[1:] numeric_cols = [x for i,x in enumerate(data_columns) if i in numeric_cols_ix] svd_res = svd_whiten(data.iloc[:, numeric_cols_ix]) data_new = pd.concat([data, pd.DataFrame(svd_res)], axis = 1) data_new.columns = list(data_columns) + [i + '.whiten' for i in numeric_cols] data_new = data_new.round(10) data_new " withr::with_options( list(reticulate.engine.environment = environment()), value <- py_run_string(python_code) ) IRIS <- value$data_new } ) datanames(data) <- c("IRIS") rlang::hash(data[["IRIS"]]) #> [1] "e9225d33975ec7ec986485068139f7af" get_code(data, datanames = "IRIS") |> cat() #> library(reticulate) #> python_dependencies <- c("pip", "numpy", "pandas") #> virtualenv_dir <- Sys.getenv("VIRTUALENV_NAME", "example_env_name") #> python_path <- Sys.getenv("PYTHON_PATH") #> if (python_path == "") { #> python_path <- NULL #> } #> reticulate::virtualenv_create(envname = virtualenv_dir, python = python_path) #> reticulate::virtualenv_install(virtualenv_dir, packages = python_dependencies, ignore_installed = TRUE) #> reticulate::use_virtualenv(virtualenv_dir, required = TRUE) #> iris_raw <- cbind(id = seq_len(nrow(iris)), iris) #> python_code <- "import pandas as pd\ndata = r.iris_raw\ndef svd_whiten(dat):\n import numpy as np\n X = np.matrix(dat)\n U, s, Vt = np.linalg.svd(X, full_matrices=False)\n X_white = np.dot(U, Vt)\n return X_white\n\ndata_columns = data.columns\nglobal numeric_cols_ix\nglobal numeric_cols\nnumeric_cols_ix = list(range(5))[1:]\nnumeric_cols = [x for i,x in enumerate(data_columns) if i in numeric_cols_ix]\nsvd_res = svd_whiten(data.iloc[:, numeric_cols_ix])\ndata_new = pd.concat([data, pd.DataFrame(svd_res)], axis = 1)\ndata_new.columns = list(data_columns) + [i + '.whiten' for i in numeric_cols]\ndata_new = data_new.round(10)\ndata_new\n" #> withr::with_options(list(reticulate.engine.environment = environment()), value <- py_run_string(python_code)) #> IRIS <- value$data_new ``` Created on 2024-02-19 with [reprex v2.0.2](https://reprex.tidyverse.org)