Open arzevedo opened 3 years ago
Hi Arthur,
For the first instance I suggest to verify if there is any constant variable into the data, e.g: one column which have the same value for every instance.
For the other cases, make sure that both test and training data have the same structure and covariate names.
If any of these work, could you send the header from the training and test set?
I took your first guidance. There were two columns only with zeros so i filter them out. That was enough to make predict.rm_model work.
library(rmachines)
library(tidyverse)
library(tidymodels)
mydf <- recipe(EDpDM_isKeystone ~ .,
data = read_csv("C:/Users/arthu/OneDrive/Documentos/function_analysis/model_input.csv") %>%
mutate(EDpDM_isKeystone = factor(EDpDM_isKeystone))
) %>%
update_role(Taxon, new_role = "Id") %>%
step_corr(all_predictors(), threshold = 0.8,- all_nominal()) %>%
prep() %>% juice()
df_split <- initial_split(mydf, strata = EDpDM_isKeystone, prop = .75)
df_train <- training(df_split)
df_test <- testing(df_split)
head(df_train)
#> # A tibble: 6 x 92
#> Taxon Ecosystem x1_1_1_trichlor~ x1_and_2_methyl~ x2_4_dichlorobe~
#> <fct> <fct> <dbl> <dbl> <dbl>
#> 1 Acid~ sediment 0 2 2
#> 2 Acti~ sediment 0 0 12
#> 3 Aqui~ sediment 0 1 0
#> 4 Arma~ sediment 0 1 2
#> 5 Baln~ sediment 2 4 3
#> 6 Cald~ sediment 0 2 2
#> # ... with 87 more variables: acridone_alkaloid_biosynthesis <dbl>,
#> # amino_sugar_and_nucleotide_sugar_metabolism <dbl>,
#> # aminoacyl_t_rna_biosynthesis <dbl>, arachidonic_acid_metabolism <dbl>,
#> # ascorbate_and_aldarate_metabolism <dbl>, atrazine_degradation <dbl>,
#> # beta_lactam_resistance <dbl>, betalain_biosynthesis <dbl>,
#> # biosynthesis_of_ansamycins <dbl>...
head(df_test)
#> # A tibble: 6 x 92
#> Taxon Ecosystem x1_1_1_trichlor~ x1_and_2_methyl~ x2_4_dichlorobe~
#> <fct> <fct> <dbl> <dbl> <dbl>
#> 1 Cand~ sediment 0 1 0
#> 2 Cand~ sediment 0 0 2
#> 3 Cand~ sediment 0 1 0
#> 4 Cand~ sediment 0 1 0
#> 5 Cand~ sediment 0 1 2
#> 6 Cand~ sediment 0 1 3
#> # ... with 87 more variables: acridone_alkaloid_biosynthesis <dbl>,
#> # amino_sugar_and_nucleotide_sugar_metabolism <dbl>,
#> # aminoacyl_t_rna_biosynthesis <dbl>, arachidonic_acid_metabolism <dbl>,
#> # ascorbate_and_aldarate_metabolism <dbl>, atrazine_degradation <dbl>,
#> # beta_lactam_resistance <dbl>, betalain_biosynthesis <dbl>,
#> # biosynthesis_of_ansamycins <dbl>....
mod_classification <- rmachines::random_machines(formula=EDpDM_isKeystone~.,
train=df_train,
test=df_test,
boots_size=100,
cost=1,
seed.bootstrap=2020,
automatic_tuning=FALSE,
poly_scale=1,
gamma_rbf=1,
gamma_lap=1,
degree=2,
offset=0)
env_test <- mydf %>% filter(Ecosystem == "saline water")
head(env_test)
#> # A tibble: 6 x 92
#> Taxon Ecosystem x1_1_1_trichlor~ x1_and_2_methyl~ x2_4_dichlorobe~
#> <fct> <fct> <dbl> <dbl> <dbl>
#> 1 Acti~ saline w~ 0 0 12
#> 2 Aqui~ saline w~ 0 1 0
#> 3 Bact~ saline w~ 0 0 2
#> 4 Baln~ saline w~ 2 4 3
#> 5 Cand~ saline w~ 0 1 0
#> 6 Cand~ saline w~ 1 0 0
#> # ... with 87 more variables: acridone_alkaloid_biosynthesis <dbl>,
#> # amino_sugar_and_nucleotide_sugar_metabolism <dbl>,
#> # aminoacyl_t_rna_biosynthesis <dbl>, arachidonic_acid_metabolism <dbl>,
#> # ascorbate_and_aldarate_metabolism <dbl>, atrazine_degradation <dbl>,
#> # beta_lactam_resistance <dbl>, betalain_biosynthesis <dbl>,
#> # biosynthesis_of_ansamycins <dbl> .....
pred <- rmachines::predict(mod_classification,newdata=mydf)
#> Error in UseMethod("predict"): método não aplicável para 'predict' aplicado a um objeto de classe "rm_model"
pred <- rmachines::predict.rm_model(mod_classification,newdata=env_test)
env_test %>%
mutate(PRED = pred) %>%
count(EDpDM_isKeystone, PRED) %>%
group_by(EDpDM_isKeystone) %>%
mutate(freq = n/sum(n))
#> # A tibble: 4 x 4
#> # Groups: EDpDM_isKeystone [2]
#> EDpDM_isKeystone PRED n freq
#> <fct> <fct> <int> <dbl>
#> 1 0 0 55 0.509
#> 2 0 1 53 0.491
#> 3 1 0 10 0.238
#> 4 1 1 32 0.762
Created on 2020-12-02 by the reprex package (v0.3.0)
I still get the error in predict, but i suppose that everything is running smoothly.
Hi @arzevedo , Can you show me the class of the object mod_classification? Do not use rmachines::predict, just load the library and use the prediction function normally, if it still not working please remove and reinstall the package.
library(rmachines)
library(tidyverse)
library(tidymodels)
mydf <- recipe(EDpDM_isKeystone ~ .,
data = read_csv("C:/Users/arthu/OneDrive/Documentos/function_analysis/model_input.csv") %>%
mutate(EDpDM_isKeystone = factor(EDpDM_isKeystone))
) %>%
update_role(Taxon, new_role = "Id") %>%
step_corr(all_predictors(), threshold = 0.8,- all_nominal()) %>%
prep() %>% juice()
df_split <- initial_split(mydf, strata = EDpDM_isKeystone, prop = .75)
df_train <- training(df_split)
df_test <- testing(df_split)
mod_classification <- rmachines::random_machines(formula=EDpDM_isKeystone~.,
train=df_train,
test=df_test,
boots_size=100,
cost=1,
seed.bootstrap=2020,
automatic_tuning=FALSE,
poly_scale=1,
gamma_rbf=1,
gamma_lap=1,
degree=2,
offset=0)
class(mod_classification)
#> [1] "rm_model"
env_test <- mydf %>% filter(Ecosystem == "saline water")
pred <- predict(mod_classification,newdata=env_test)
#> Error in UseMethod("predict"): método não aplicável para 'predict' aplicado a um objeto de classe "rm_model"
Created on 2020-12-03 by the reprex package (v0.3.0)
still not working. I'll remove and reinstall the package.
Hi Matheus, I have a problem with the package could you help me with this error?
Created on 2020-12-02 by the reprex package (v0.3.0)
I tried both predict.lm model and predict, but neither run. Thanks in advance.