Closed DavisVaughan closed 2 months ago
Slightly smaller and faster reprex, run everything to wfs
here, then manually type out wfs %>% tidyr::unnest(c(inf))
, put your cursor at inf<>
and hit tab. That results in an absolute wall of completion()
LSP request error text like what you see below, and I'm pretty sure in the real case with fits
that ends up sending over >512 MB of info. It looks like:
at /Users/davis/.cargo/registry/src/index.crates.io-6f17d22bba15001f/anyhow-1.0.80/src/error.rs:565:25
4: <core::result::Result<T,F> as core::ops::try_trait::FromResidual<core::result::Result<core::convert::Infallible,E>>>::from_residual
at /rustc/82e1608dfa6e0b5569232559e3d385fea5a93112/library/core/src/result.rs:1963:27
5: ark::lsp::completions::sources::composite::call::completions_from_session_arguments
at /Users/davis/files/programming/positron/amalthea/crates/ark/src/lsp/completions/sources/composite/call.rs:221:9
6: ark::lsp::completions::sources::composite::call::completions_from_arguments
[Info - 10:40:28 AM] completion(): Failed to provide completions: Error evaluating .ps.completions.formalNames(.Primitive("c"), structure(list(wflow_id = c("recipe_1_xgb",
"recipe_1_lm", "recipe_1_rf", "recipe_1_svm_poly", "recipe_2_xgb",
"recipe_2_lm", "recipe_2_rf", "recipe_2_svm_poly", "recipe_3_xgb",
"recipe_3_lm", "recipe_3_rf", "recipe_3_svm_poly", "recipe_4_xgb",
"recipe_4_lm", "recipe_4_rf", "recipe_4_svm_poly", "recipe_5_xgb",
"recipe_5_lm", "recipe_5_rf", "recipe_5_svm_poly"), info = list(
structure(list(workflow = list(structure(list(pre = structure(list(
actions = list(recipe = structure(list(recipe = structure(list(
var_info = structure(list(variable = c("income",
"houseAge", "rooms", "bedrooms", "population", "households",
"houseValue"), type = list(c("double", "numeric"),
c("double", "numeric"), c("double", "numeric"
), c("double", "numeric"), c("double", "numeric"
), c("double", "numeric"), c("double", "numeric"
)), role = c("predictor", "predictor", "predictor",
"predictor", "predictor", "predictor", "outcome"),
source = c("original", "original", "original",
"original", "original", "original", "original"
)), row.names = c(NA, -7L), class = c("tbl_df",
"tbl", "data.frame")), term_info = structure(list(
variable = c("income", "houseAge", "rooms", "bedrooms",
"population", "households", "houseValue"), type = list(
c("double", "numeric"), c("double", "numeric"
), c("double", "numeric"), c("double", "numeric"
), c("double", "numeric"), c("double", "numeric"
), c("double", "numeric")), role = c("predictor",
"predictor", "predictor", "predictor", "predictor",
"predictor", "outcome"), source = c("original",
"original", "original", "original", "original",
"original", "original")), row.names = c(NA, -7L
), class = c("tbl_df", "tbl", "data.frame")), steps = NULL,
template = structure(list(income = c(5.7143, 5.9683,
3.3903, 3.7973, 6.0574, 3.2841, 2.227, 3.7139, 1.845,
5.5983, 4.3438, 4.2679, 2.9817, 8.2049, 1.3375, 7.0177,
7.9187, 3.1062, 2.4861, 4.2917, 1.6483, 3.1856, 3.0973,
4.1359, 4.1552, 4.6731, 1.9074, 3.8581, 5.0234, 4.425,
3.25, 5.2589, 2.8, 4.2222, 5.6152, 3.6875, 3.0114,
5.5456, 12.8665, 3.8, 4.875, 3.0437, 4.6696, 4.3958,
3.0926, 2.4884, 3.8201, 2.065, 1.6667, 4.2434, 3.0938,
3.3833, 5.363, 7.0565, 4.2596, 1.9556, 2.6513, 2.0214,
5.7188, 4.3898, 3.0893, 2.9044, 2.1149, 2.0134, 3.9722,
1.6111, 5.0602, 4.5625, 2.3482, 0.4999, 1.125, 3.1719,
11.3176, 7.0469, 10.7569, 5.4337, 5.3107, 3.8125,
1.1384, 1.6318, 2.2277, 4.9643, 3.7958, 2.6803, 3.7813,
3.725, 2.5759, 5.2088, 7.1497, 15.0001, 3.631, 3.8897,
3.9443, 4.9517, 3.1771, 2.1522, 3.5524, 2.0549, 3.2813,
3.6121, 3.3371, 4.5484, 3.5404, 4.2348, 2.1667, 1.5455,
<snip for brevity, this goes onnnnnnn and onnnnn>
library(sf)
library(readr)
library(dplyr)
library(sfdep)
tf <- tempfile(fileext = ".csv")
download.file("https://raw.githubusercontent.com/xj-liu/spatial_feature_incorporation/main/houses1990.csv", tf)
# read and create an sf object
houses_raw <- read_csv(tf) |>
st_as_sf(coords = c("longitude", "latitude"), crs = 4326) |>
# apply a smidgen of jittering to the points because there are dupes
mutate(geometry = st_jitter(geometry))
houses_nb <- houses_raw |>
mutate(
# use knn neighbors
nb = st_knn(geometry, k = 25),
# IDW weight
wt = st_inverse_distance(nb, geometry)
)
autocorrelation <- houses_nb |>
reframe(across(where(is.numeric), \(.x) {
broom::tidy(global_moran_test(.x, nb, wt))
})) |>
tidyr::pivot_longer(everything()) |>
tidyr::unnest_wider(value)
# Identify which measures are spatially autocorrelated
auto_cor_fields <- autocorrelation |>
select(name, I = estimate1, p_value = p.value) |>
mutate(is_autocorrelated = p_value <= 0.01 & I > 0.3)
# these are the input fields that we want to use neighboring values for
auto_correlated_x <- auto_cor_fields |>
filter(
is_autocorrelated,
# this will be the y so we cannot use this
name != "houseValue"
) |>
pull(name)
# calculate the spatial lag of these variables
# drop the neighbors and the weights and the geometry
clean_df <- houses_nb |>
mutate(
across(
all_of(auto_correlated_x),
\(.x) st_lag(.x, nb, wt),
.names = "{.col}_lag"
)
) |>
st_drop_geometry() |>
select(-c(nb, wt))
# Model Specifications ---------------------------------------------------
library(tidymodels)
# These packages are needed for the engines
# install.packages(c("ranger", "kernlab", "xgboost"))
boost_tree_xgboost_spec <-
boost_tree(
# tree_depth = tune(),
# trees = tune(),
# learn_rate = tune(),
# min_n = tune(),
# loss_reduction = tune(),
# sample_size = tune(),
# stop_iter = tune()
) |>
set_engine("xgboost") |>
set_mode("regression")
linear_reg_glm_spec <-
linear_reg() |>
set_engine("glm")
rand_forest_ranger_spec <-
rand_forest(
# mtry = tune(),
# min_n = tune()
) |>
set_engine("ranger") |>
set_mode("regression")
svm_poly_kernlab_spec <-
svm_poly(
# cost = tune(),
# degree = tune(),
# scale_factor = tune(),
# margin = tune()
) |>
set_engine("kernlab") |>
set_mode("regression")
models <- list(
xgb = boost_tree_xgboost_spec,
lm = linear_reg_glm_spec,
rf = rand_forest_ranger_spec,
svm_poly = svm_poly_kernlab_spec
)
# Partition --------------------------------------------------------------
# create initial split
init_split <- initial_split(clean_df)
# training and testing
train_df <- training(init_split)
test_df <- testing(init_split)
folds <- vfold_cv(train_df)
# Pre-processing steps ---------------------------------------------------
# just going to apply normalization to each of the models
base_rec <- recipe(
houseValue ~ income + houseAge + rooms + bedrooms + population + households,
data = train_df
)
sp_house_age <- recipe(
houseValue ~ income + houseAge + rooms + bedrooms + population + households +
# spatial component
houseAge_lag,
data = train_df
) |> step_scale(everything())
sp_income <- recipe(
houseValue ~ income + houseAge + rooms + bedrooms + population + households +
# spatial component
income_lag,
data = train_df
) |> step_scale(everything())
sp_income_age <- recipe(
houseValue ~ income + houseAge + rooms + bedrooms + population + households +
# spatial component
income_lag + houseAge_lag,
data = train_df
) |> step_scale(everything())
sp_all <- recipe(
houseValue ~ .,
data = train_df
) |> step_scale(everything())
# create a list of all the recipes we want to work with
recipes <- list(base_rec, sp_house_age, sp_income, sp_income_age, sp_all)
wfs <- workflow_set(recipes, models, cross = TRUE)
In Positron 2024.05.0 (Universal) build 1251, I can walk through this reprex and then I successfully do not see any LSP completion errors:
https://github.com/posit-dev/positron/assets/12505835/06fc4a16-11cc-41a5-97b7-9d787fdfdf5e
While investigating: https://github.com/posit-dev/positron-beta/discussions/19#discussioncomment-9407240 with the reprex below, I was able to reproduce the issue after printing out
fits
and then runningfits |> tidyr::unnest(info)
.At some point when trying to run some code after running everything below, the console will hang and you should see this in
Positron R Extension
And this in the
Console: R
output channel:Interestingly, after waiting like a full minute or so the Console does end up being able to process the code in there and returns control to me. But the LSP is now dead and not processing any requests.