Closed zoezhang106 closed 2 years ago
Hey can you please use reprex::reprex
or at least include your package version numbers? My guess is that survivalmodels is out of date, the latest version on GH/R-universe is 0.1.13
Also the code that you provided is not working
Many thanks for you guys prompt reply.
The code is based on https://sebastian.vollmer.ms/post/survival_networks/
The packages version are as followed:
[1] mlr3pipelines_0.4.0 mlr3extralearners_0.5.23 mlr3tuning_0.12.1
[4] paradox_0.8.0 mlr3proba_0.4.4 mlr3_0.13.3
[7] future_1.23.0 survivalmodels_0.1.13 survival_3.2-13
[10] survminer_0.4.9 ggpubr_0.4.0 ggplot2_3.3.5
Please find the reprex::reprex report below.
#### 1 Using Python in R ####
library(survivalmodels)
#> Warning: package 'survivalmodels' was built under R version 4.0.5
memory.limit(900000)
#> [1] 9e+05
library(future)
plan(strategy = "multicore", workers = 6)
#### 2 Setting Seeds ####
set_seed(1234)
#### 3 Survival data #####
library(mlr3)
library(mlr3proba)
whas <- tsk("whas")
## create our own task from the rats dataset
rats_data <- survival::rats
## convert characters to factors
rats_data$sex <- factor(rats_data$sex, levels = c("f", "m"))
rats <- TaskSurv$new("rats", rats_data, time = "time", event = "status")
## combine in list
tasks <- list(whas, rats)
#### 4 Getting and tuning learners ####
# Hyper-parameter configurations
# We’re going to tune the neural networks with the following configurations:
#
# Dropout fraction tuned over [0, 1]
# Weight decay over [0, 0.5]
# Learning rate over [0, 1]
# Number of nodes in a layer over {1,…,32}
# Number of hidden layers over {1,…,4}
library(paradox)
search_space <- ps(
## p_dbl for numeric valued parameters
dropout = p_dbl(lower = 0, upper = 1),
weight_decay = p_dbl(lower = 0, upper = 0.5),
learning_rate = p_dbl(lower = 0, upper = 1),
## p_int for integer valued parameters
nodes = p_int(lower = 1, upper = 32),
k = p_int(lower = 1, upper = 4)
)
search_space$trafo <- function(x, param_set) {
x$num_nodes = rep(x$nodes, x$k)
x$nodes = x$k = NULL
return(x)
}
#### 5 wrap the learners in an AutoTuner ######
library(mlr3tuning)
create_autotuner <- function(learner) {
AutoTuner$new(
learner = learner,
search_space = search_space,
resampling = rsmp("holdout"),
measure = msr("surv.cindex"),
terminator = trm("evals", n_evals = 60),
tuner = tnr("random_search")
)
}
#### 6 get our learners and apply our function #####
## learners are stored in mlr3extralearners
library(mlr3extralearners)
## load learners
learners <- lrns(
paste0("surv.", c("deepsurv")),
frac = 0.3, early_stopping = TRUE, epochs = 100, optimizer = "adam"
)
#learners <- lrns(
# paste0("surv.", c("coxtime", "deepsurv","deephit","pchazard")),
# frac = 0.3, activation = "relu", early_stopping = TRUE, epochs = 100, optimizer = "adam")
# apply our function
learners <- lapply(learners, create_autotuner)
#### 7 Pre-processing ####
library(mlr3pipelines)
create_pipeops <- function(learner) {
po("encode") %>>% po("scale") %>>% po("learner", learner)
}
## apply our function
learners <- lapply(learners, create_pipeops)
## select holdout as the resampling strategy
resampling <- rsmp("cv", folds = 5)
design <- benchmark_grid(tasks, learners, resampling)
bm <- benchmark(design)
#> INFO [10:08:58.083] [bbotk] Evaluating 1 configuration(s)
#> INFO [10:08:58.107] [mlr3] Running benchmark with 1 resampling iterations
#> INFO [10:08:58.112] [mlr3] Applying learner 'surv.deepsurv' on task 'whas' (iter 1/1)
#> Error in py_call_impl(callable, dots$args, dots$keywords): ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 32])
#>
#> Detailed traceback:
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\pycox\models\cox.py", line 51, in fit
#> return super().fit(input, target, batch_size, epochs, callbacks, verbose,
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torchtuples\base.py", line 294, in fit
#> log = self.fit_dataloader(dataloader, epochs, callbacks, verbose, metrics, val_dataloader)
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torchtuples\base.py", line 236, in fit_dataloader
#> self.batch_metrics = self.compute_metrics(data, self.metrics)
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torchtuples\base.py", line 180, in compute_metrics
#> out = self.net(*input)
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
#> return forward_call(*input, **kwargs)
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torchtuples\practical.py", line 84, in forward
#> return self.net(input)
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
#> return forward_call(*input, **kwargs)
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torch\nn\modules\container.py", line 141, in forward
#> input = module(input)
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
#> return forward_call(*input, **kwargs)
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torchtuples\practical.py", line 61, in forward
#> input = self.batch_norm(input)
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
#> return forward_call(*input, **kwargs)
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torch\nn\modules\batchnorm.py", line 168, in forward
#> return F.batch_norm(
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torch\nn\functional.py", line 2419, in batch_norm
#> _verify_batch_size(input.size())
#> File "C:\Users\yuanzh\AppData\Local\R-MINI~1\envs\R-RETI~1\lib\site-packages\torch\nn\functional.py", line 2387, in _verify_batch_size
#> raise ValueError("Expected more than 1 value per channel when training, got input size {}".format(size))
#>
#> This happened PipeOp surv.deepsurv.tuned's $train()
msrs <- msrs(c("surv.cindex", "surv.graf"))
bm$aggregate(msrs)[, c(3, 4, 7, 8)]
#> Error in eval(expr, envir, enclos): object 'bm' not found
Created on 2022-04-04 by the reprex package (v2.0.1)
Thanks, edited your reprex to make more readable. So this is a different error than you sent before. I'll see if I can reproduce it and get back to you
Hi Raphael,
Yes, you are right. The error is different for demo data. I am sorry I can not put the data I work on public. I guess the basic problem is still similar that with low n_evals, epoches, folds the code works well, but not work with a larger number.
I already update the package to the latest version, but I am not sure if the error is caused by incompatible packages.
Thanks a lot.
Can you please run the code below, retry your example and report whether the problem is solved?
devtools::install_github("RaphaelS1/survivalmodels")
devtools::install_github("mlr-org/mlr3proba")
Just comparing to the tutorial are you sure the issue is n_evals
or epochs
and not actually the number of folds?
@sebffischer After I run the code you gave, I still get the error as below.
Error in py_call_impl(callable, dots$args, dots$keywords) : ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 32]) This happened PipeOp surv.deepsurv.tuned's $train()
@RaphaelS1
For the demo data, if I change folds = 3 to folds = 5, the error is as below.
Error in py_call_impl(callable, dots$args, dots$keywords) : ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 32]) This happened PipeOp surv.deepsurv.tuned's $train()
For my real data, the max numbers I can set are n_evals = 10, epochs=10, folds = 3 (but with this setting, the surv.cindex is only 0.66 which I guess is not good). If I try to increase either one of these three, I will get an error as below.
Error in if (!all(x <= 1 & x >= 0)) { : missing value where TRUE/FALSE needed This happened PipeOp surv.deepsurv.tuned's $train()
I am not sure if these two errors are caused from the same reason.
Many thanks for your help.
No I think they're caused by different reasons. One is an error in fitting the model within pycox. The other is a conversion error due to reticulate. I can try and fix the probability error but I need to first figure out of it is just a rounding error (which I suspect) or a genuine bug in reticulate.
I also think it's a coincidence you're getting different bugs at different times. The pycox error about input is probably affected by the parameter choices. The other error is likely stochastic
what is the status here @RaphaelS1 ? :)
Thanks for the reminder @sebffischer. Will look into it on Friday
Okay I've spent a long time looking at this. I don't think it's a bug. I think something is happening within all the different folds so that in one loop in training there's only one observation in the batch (hence the error). Can you confirm the size of your data (number of rows)?
Hi Raphael, many thanks for your time and helps. The number of rows for my data is 2516.
@RaphaelS1 this means that is not a bug and I can close?
Yes
Sorry, I am still not fully understand. Do you mean that the error is caused that survival event is too small for some groups? Is there a way to aviod this error?
Tbh I am also a little confused: Why is the batch size related to the channels? (I have no idea how this learner works)
It comes in in two places: 1) BatchNorm1D, 2) Dataloader. There's quite a lot about this online, e.g. here and here. The problem is I can't implement the solution because it's in the Python backend...you might be able to try playing with the batch size and increasing/decreasing from the default (256L).
Settingdrop_last = TRUE
in the dataloader should solve the problem I guess? maybe this issue has to be raised upstream?
Many thanks for both of your helps and time. I will try to fix it based on your suggestions.
I am not sure whether the current API allows to change this parameter (didn't check in detail), but if not this is a problem in one of the other packages ...
Yes exactly the issue is in upstream packages but to raise an issue there you'd either need to convert all your code to Python. Sorry I can't be more help now but when I re-implement everything that should fix this
The original code works perfectly, but when I want to change hyperparameters such as n_evals, epoches, folds to a larger number. It will report an error as followed.
Error in if (!all(x <= 1 & x >= 0)) { : missing value where TRUE/FALSE needed This happened PipeOp surv.deepsurv.tuned's $train()
Many thanks in advance.
Expected Behaviour
resampling <- rsmp("cv", folds = 3) works
INFO [12:44:41.005] [bbotk] Finished optimizing after 10 evaluation(s) INFO [12:44:41.009] [bbotk] Result: INFO [12:44:41.013] [bbotk] dropout weight_decay learning_rate nodes k learner_param_vals x_domain surv.cindex INFO [12:44:41.013] [bbotk] 0.46898 0.051531 0.3204191 9 3 <list[8]> <list[4]> 0.6702235 INFO [12:44:42.529] [mlr3] Finished benchmark
Actual Behaviour
resampling <- rsmp("cv", folds = 5) did not work.
INFO [12:48:00.757] [bbotk] Evaluating 1 configuration(s) INFO [12:48:00.847] [mlr3] Running benchmark with 1 resampling iterations INFO [12:48:00.873] [mlr3] Applying learner 'surv.deepsurv' on task 'dataset' (iter 1/1) Error in if (!all(x <= 1 & x >= 0)) { : missing value where TRUE/FALSE needed This happened PipeOp surv.deepsurv.tuned's $train()
complete code are as followed
library(paradox)
search_space <- ps( dropout = p_dbl(lower = 0, upper = 1), weight_decay = p_dbl(lower = 0, upper = 0.5), learning_rate = p_dbl(lower = 0, upper = 1),
nodes = p_int(lower = 1, upper = 32), k = p_int(lower = 1, upper = 4) )
search_space$trafo <- function(x, param_set) { x$num_nodes = rep(x$nodes, x$k) x$nodes = x$k = NULL return(x) }
library(mlr3tuning)
create_autotuner <- function(learner) { AutoTuner$new( learner = learner, search_space = search_space, resampling = rsmp("holdout"), measure = msr("surv.cindex"), terminator = trm("evals", n_evals = 10), tuner = tnr("random_search") ) }
learners <- lrns( paste0("surv.", c("deepsurv")), frac = 0.3, early_stopping = TRUE, epochs = 10, optimizer = "adam" )
learners <- lapply(learners, create_autotuner)
library(mlr3pipelines)
create_pipeops <- function(learner) { po("encode") %>>% po("scale") %>>% po("learner", learner) }
learners <- lapply(learners, create_pipeops)
resampling <- rsmp("cv", folds = 5)
design <- benchmark_grid(tasks, learners, resampling) bm <- benchmark(design)
msrs <- msrs(c("surv.cindex", "surv.graf")) bm$aggregate(msrs)[, c(3, 4, 7, 8)]