tidymodels / applicable

Quantify extrapolation of new samples given a training set
https://applicable.tidymodels.org/
Other
46 stars 7 forks source link

Testing fitting & scoring functions #3

Closed marlycormar closed 5 years ago

marlycormar commented 5 years ago
library(hardhat)
library(dplyr)

# ---------------------------------------------------------
# Testing model constructor
# ---------------------------------------------------------

# Run constructor.R
manual_model <- new_ad_pca("my_coef", default_xy_blueprint())
manual_model
names(manual_model)

manual_model$blueprint

# ---------------------------------------------------------
# Testing model fitting implementation
# ---------------------------------------------------------

# Run pca-fit.R
ad_pca_impl(iris %>% select(Sepal.Width))

# ---------------------------------------------------------
# Simulating user input and pass it to the fit bridge
# ---------------------------------------------------------

# Simulating formula interface
processed_1 <- mold(~., iris)
ad_pca_bridge(processed_1)

# Simulating x interface
iris_sub <- iris %>% select(-Species)
processed_2 <- mold(iris_sub, NA_real_)
ad_pca_bridge(processed_2)

# Simulating multiple outcomes. Error expected.
multi_outcome <- mold(Sepal.Width + Petal.Width ~ Sepal.Length + Species, iris)
ad_pca_bridge(multi_outcome)

# ---------------------------------------------------------
# Testing user facing fitting function
# ---------------------------------------------------------

# Using recipes
library(recipes)

predictors <- iris[c("Sepal.Width", "Petal.Width")]

# Data frame predictor
predictor <- iris['Sepal.Length']
ad_pca(predictor)

# Vector predictor.
# We should get the following error:
# "Error: `ad_pca()` is not defined for a 'numeric'."
predictor <- iris$Sepal.Length
ad_pca(predictor)

# Formula interface
ad_pca(~., iris)

# Using recipes. Fails "Error: No variables or terms were selected.".
library(recipes)
rec <- recipe(~., iris) %>%
  step_log(Sepal.Width) %>%
  step_dummy(Species, one_hot = TRUE)
ad_pca(rec, iris)

# ---------------------------------------------------------
# Testing model scoring implementation
# ---------------------------------------------------------

# Run pca-score.R
model <- ad_pca(Sepal.Width ~ Sepal.Length + Species, iris)
predictors <- forge(iris, model$blueprint)$predictors
predictors <- as.matrix(predictors)
score_ad_pca_numeric(model, predictors)

# ---------------------------------------------------------
# Testing score bridge function
# ---------------------------------------------------------

model <- ad_pca(~., iris)
predictors <- forge(iris, model$blueprint)$predictors
score_ad_pca_bridge("numeric", model, predictors)

# ---------------------------------------------------------
# Testing score interface function
# ---------------------------------------------------------

# Run 0.R
model <- ad_pca(~., iris)
score(model, iris)

# We should get an error:
# "Error: The class of `new_data`, 'factor', is not recognized."
# since `iris$Species` is not a data.frame
score(model, iris$Species)

# We should get an error:
# "Error: The following required columns are missing: 'Sepal.Length'."
# since `Sepal.Length` column is missing.
score(model, subset(iris, select = -Sepal.Length))

# The column `Species` is silently converted to a factor.
iris_character_col <- transform(iris, Species = as.character(Species))
score(model, iris_character_cols)

# We should get an error:
# "Error: Can't cast `x$Species` <double> to `to$Species` <factor<12d60>>."
# since `Species` can't be forced to be a factor
iris_double_col <- transform(iris, Species = 1)
score(model, iris_double_col)
marlycormar commented 5 years ago

Tests have already been added.

github-actions[bot] commented 3 years ago

This issue has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex: https://reprex.tidyverse.org) and link to this issue.