biomodhub / biomod2

BIOMOD is a computer platform for ensemble forecasting of species distributions, enabling the treatment of a range of methodological uncertainties in models and the examination of species-environment relationships.
89 stars 22 forks source link

Error in BIOMOD_xxx - [short error summary here] #534

Open chenyongpeng1 opened 2 weeks ago

chenyongpeng1 commented 2 weeks ago

Please make sure to close the issue once you consider it as solved Please use screenshots only when you cannot copy-paste the object, e.g. for figures or maps

Error and context The error message "task 1 failed - missing value where TRUE/FALSE needed" and the warnings regarding lowered k-fold values due to missing observations suggest that there might be issues with the presence-absence data (e.g., the pseudo-absence or occurrence data).

Code used to get the error

library(biomod2) library(sf) library(blockCV) library(caret)

myBiomodData <- BIOMOD_FormatingData( expl.var = myExpl, resp.var = myResp_train, eval.resp.var = myResp_test, resp.name = "species", PA.nb.rep = 4, PA.nb.absences = c(rep(n_PA_3, 2), rep(40000, 2)), PA.strategy = 'random', filter.raster = TRUE, dir.name = getwd(), seed.val = 1234 )

print(myBiomodData)

PA_used <- colnames(myBiomodData@PA.table)

PA1_10 <- paste0("PA", 1:2) PA11_20 <- paste0("PA", 3:4)

FDA <- PA1_10 RF <- PA1_10 RFd <- PA1_10 XGBOOST <- PA1_10 GLM <- PA11_20 GAM <- PA11_20 MAXENT <- PA11_20 MAXNET <- PA11_20 GBM <- PA11_20

# 构建模型列表 models.pa <- list( FDA = FDA, GBM = GBM, RF = RF, RFd = RFd, XGBOOST = XGBOOST, GLM = GLM, GAM = GAM, MAXENT = MAXENT )

user.rf <- list('for_all_datasets' = list(type = 'classification', importance = TRUE, nodesize = 10, oob.prox = TRUE, ntree = 250, mtry = 2, maxnodes = 5)) user.maxent <- list('for_all_datasets' = list(visible = TRUE, beta_threshold = 0))

user.XGBOOST <- list('for_all_datasets' = list(objective = "binary:logistic", params = list(max_depth = 5, eta = 0.2, gamma = 1), nrounds = 20, subsample = 0.5))

form.GLM <- bm_MakeFormula(resp.name = myBiomodData@sp.name, expl.var = head(myBiomodData@data.env.var), type = 's_smoother', interaction.level = 0) user.GLM <- list('for_all_datasets' = list(formula = form.GLM))

form.GAM <- bm_MakeFormula(resp.name = myBiomodData@sp.name, expl.var = head(myBiomodData@data.env.var), type = 's_smoother', interaction.level = 0) user.GAM <- list('for_all_datasets' = list(algo = 'GAM.mgcv.gam'))

form.GBM <- bm_MakeFormula(resp.name = myBiomodData@sp.name, expl.var = head(myBiomodData@data.env.var), type = 'simple', interaction.level = 0)

user.GBM <- list('for_all_datasets' = list(formula = form.GBM))

form.FDA <- bm_MakeFormula(resp.name = myBiomodData@sp.name, expl.var = head(myBiomodData@data.env.var), type = 's_smoother', interaction.level = 0) user.FDA <- list('for_all_datasets' = list(formula = form.FDA))

user.val <- list( RF.binary.randomForest.randomForest = user.rf, MAXENT.binary.MAXENT.MAXENT = user.maxent, XGBOOST.binary.xgboost.xgboost = user.XGBOOST, GLM.binary.stats.glm= user.GLM, GAM.binary.mgcv.gam= user.GAM, GBM.binary.gbm.gbm = user.GBM, FDA.binary.mda.fda = user.FDA )

myBiomodOption <- bm_ModelingOptions( data.type = 'binary', models = allModels, strategy = "user.defined", user.base = 'bigboss', user.val = user.val, bm.format = myBiomodData )

myBiomodModelOut <- BIOMODModeling( bm.format = myBiomodData, OPT.user = myBiomodOption, modeling.id = as.character(format(Sys.time(), "%Y%m%d%H%M_%S")), models = allModels, models.pa = models.pa, CV.strategy = 'kfold', CV.nb.rep = 1, CV.k = 5, var.import = 3 )

print(myBiomodModelOut) `

Environment Information

``R version 4.4.2 (2024-10-31 ucrt) Platform: x86_64-w64-mingw32/x64 Running under: Windows 11 x64 (build 22631)

Matrix products: default

locale: [1] LC_COLLATE=Chinese (Simplified)_China.utf8 LC_CTYPE=Chinese (Simplified)_China.utf8
[3] LC_MONETARY=Chinese (Simplified)_China.utf8 LC_NUMERIC=C
[5] LC_TIME=Chinese (Simplified)_China.utf8

time zone: Asia/Shanghai tzcode source: internal

attached base packages: [1] splines grid stats graphics grDevices utils datasets methods base

other attached packages: [1] caret_6.0-94 lattice_0.22-6 blockCV_3.1-5 xgboost_1.7.8.1 randomForest_4.7-1.2 [6] maxnet_0.1.4 earth_5.3.4 plotmo_3.6.4 plotrix_3.8-4 Formula_1.2-5
[11] gbm_2.2.2 mgcv_1.9-1 nlme_3.1-166 gam_1.22-5 foreach_1.5.2
[16] mda_0.5-4 class_7.3-22 rpart_4.1.23 nnet_7.3-19 biomod2_4.2-6-1
[21] gridExtra_2.3 terra_1.7-83 dplyr_1.1.4 spThin_0.2.0 knitr_1.48
[26] fields_16.3 viridisLite_0.4.2 spam_2.11-0 ggplot2_3.5.1 spatialsample_0.6.0 [31] readr_2.1.5 sf_1.0-19

loaded via a namespace (and not attached): [1] DBI_1.2.3 pROC_1.18.5 s2_1.1.7 rlang_1.1.4
[5] magrittr_2.0.3 furrr_0.3.1 e1071_1.7-16 compiler_4.4.2
[9] vctrs_0.6.5 maps_3.4.2 reshape2_1.4.4 stringr_1.5.1
[13] pkgconfig_2.0.3 wk_0.9.4 crayon_1.5.3 labeling_0.4.3
[17] utf8_1.2.4 prodlim_2024.06.25 tzdb_0.4.0 purrr_1.0.2
[21] bit_4.5.0 xfun_0.49 jsonlite_1.8.9 PresenceAbsence_1.1.11 [25] recipes_1.1.0 reshape_0.8.9 parallel_4.4.2 R6_2.5.1
[29] stringi_1.8.4 rsample_1.2.1 parallelly_1.38.0 lubridate_1.9.3
[33] Rcpp_1.0.13-1 iterators_1.0.14 future.apply_1.11.3 timechange_0.3.0
[37] Matrix_1.7-1 tidyselect_1.2.1 rstudioapi_0.17.1 abind_1.4-8
[41] timeDate_4041.110 codetools_0.2-20 listenv_0.9.1 tibble_3.2.1
[45] plyr_1.8.9 withr_3.0.2 future_1.34.0 survival_3.7-0
[49] units_0.8-5 proxy_0.4-27 pillar_1.9.0 KernSmooth_2.23-24
[53] stats4_4.4.2 generics_0.1.3 vroom_1.6.5 sp_2.1-4
[57] hms_1.1.3 munsell_0.5.1 scales_1.3.0 globals_0.16.3
[61] glue_1.8.0 tools_4.4.2 data.table_1.16.2 ModelMetrics_1.2.2.2
[65] gower_1.0.1 dotCall64_1.2 tidyr_1.3.1 ipred_0.9-15
[69] colorspace_2.1-1 raster_3.6-30 cli_3.6.3 fansi_1.0.6
[73] lava_1.8.0 gtable_0.3.6 digest_0.6.37 classInt_0.4-10
[77] farver_2.1.2 lifecycle_1.0.4 hardhat_1.4.0 dismo_1.3-14
[81] MASS_7.3-61 bit64_4.5.2 `

Additional information `> myBiomodData

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= BIOMOD.formated.data -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

dir.name = E:/apis_mellifera2024

sp.name = apis

 22908 presences,  0 true absences and  63 undefined points in dataset

 11 explanatory variables

 bio13           bio15            bio18            bio19             bio2             bio3      

Min. : 0.0 Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. : 3.636 Min. :15.58
1st Qu.: 79.0 1st Qu.: 18.61 1st Qu.: 163.0 1st Qu.: 115.0 1st Qu.: 8.261 1st Qu.:30.88
Median :106.0 Median : 30.42 Median : 224.0 Median : 180.0 Median :10.237 Median :36.36
Mean :125.2 Mean : 39.73 Mean : 249.6 Mean : 204.7 Mean :10.541 Mean :40.19
3rd Qu.:146.0 3rd Qu.: 54.41 3rd Qu.: 307.0 3rd Qu.: 257.0 3rd Qu.:12.411 3rd Qu.:46.79
Max. :729.0 Max. :171.53 Max. :1850.0 Max. :1487.0 Max. :20.299 Max. :93.63
bio5 bio8 bio9 elev hii_v2geo1
Min. : 8.072 Min. :-8.609 Min. :-23.37 Min. :-259.0 Min. :-128.00
1st Qu.:23.096 1st Qu.: 9.904 1st Qu.: 1.67 1st Qu.: 64.0 1st Qu.: 16.03
Median :27.320 Median :15.927 Median : 10.76 Median : 200.0 Median : 24.34
Mean :27.353 Mean :15.443 Mean : 10.20 Mean : 410.1 Mean : 20.55
3rd Qu.:31.308 3rd Qu.:21.081 3rd Qu.: 18.89 3rd Qu.: 463.0 3rd Qu.: 35.24
Max. :46.704 Max. :33.711 Max. : 37.10 Max. :5356.0 Max. : 62.74

Evaluation data :

 9908 presences,  19 true absences and  0 undefined points in dataset

 bio13           bio15             bio18            bio19             bio2             bio3      

Min. : 5.0 Min. : 5.415 Min. : 0.0 Min. : 0.0 Min. : 3.546 Min. :16.72
1st Qu.: 80.0 1st Qu.: 18.725 1st Qu.: 163.0 1st Qu.: 116.0 1st Qu.: 8.239 1st Qu.:31.01
Median :107.0 Median : 30.506 Median : 225.0 Median : 181.0 Median :10.150 Median :36.64
Mean :126.2 Mean : 39.846 Mean : 251.6 Mean : 206.4 Mean :10.524 Mean :40.25
3rd Qu.:148.0 3rd Qu.: 55.025 3rd Qu.: 309.0 3rd Qu.: 260.0 3rd Qu.:12.386 3rd Qu.:46.73
Max. :685.0 Max. :160.232 Max. :1658.0 Max. :1318.0 Max. :20.115 Max. :93.49
bio5 bio8 bio9 elev hii_v2geo1
Min. :11.69 Min. :-6.694 Min. :-24.831 Min. :-282.0 Min. :-128.00
1st Qu.:23.08 1st Qu.: 9.749 1st Qu.: 1.765 1st Qu.: 62.0 1st Qu.: 16.01
Median :27.31 Median :15.952 Median : 10.907 Median : 197.0 Median : 24.54
Mean :27.34 Mean :15.456 Mean : 10.247 Mean : 407.9 Mean : 20.57
3rd Qu.:31.34 3rd Qu.:21.226 3rd Qu.: 18.699 3rd Qu.: 464.0 3rd Qu.: 35.40
Max. :44.92 Max. :32.962 Max. : 35.332 Max. :4469.0 Max. : 59.78

4 Pseudo Absences dataset available ( PA1, PA2, PA3, PA4 ) with 259 (PA1, PA2, PA3, PA4) pseudo absences

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

myBiomodOption

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= BIOMOD.models.options -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

>  FDA options (datatype: binary , package: mda , function: fda ) :
   ( dataset _allData_allRun )
    -  formula = apis ~ 1 + gam::s(bio13) + gam::s(bio15) + gam::s(bio18) + gam::s(bio19) +      gam::s(bio2) + gam::s(bio3) + gam::s(bio5) + gam::s(bio8) +      gam::s(bio9) + gam::s(elev) + gam::s(hii_v2geo1) <environment: 0x000001af5fb571b8>   (default: formula(data) )
    -  data = sys.frame(sys.parent())
    -  weights = 
    -  theta = 
    -  eps = .Machine$double.eps
    -  method = "mars"   (default: polyreg )

>  GAM options (datatype: binary , package: mgcv , function: gam ) :
   ( dataset _allData_allRun )
    -  formula = apis ~ 1 + gam::s(bio13) + gam::s(bio15) + gam::s(bio18) + gam::s(bio19) +      gam::s(bio2) + gam::s(bio3) + gam::s(bio5) + gam::s(bio8) +      gam::s(bio9) + gam::s(elev) + gam::s(hii_v2geo1) <environment: 0x000001aef60aec38>   (default:  )
    -  family =  Family: binomial  Link function: logit  
    -  data = list()
    -  na.action = 
    -  method = "GCV.Cp"
    -  optimizer = c("outer", "newton")
    -  control = $epsilon 1e-06  $trace FALSE  $maxit 100    (default: $nthreads 1  $ncv.threads 1  $irls.reg 0  $epsilon 1e-07  $maxit 200  $trace FALSE  $mgcv.tol 1e-07  $mgcv.half 15  $rank.tol 1.490116e-08  $nlm $nlm$ndigit 7  $nlm$gradtol 1e-06  $nlm$stepmax 2  $nlm$steptol 1e-04  $nlm$iterlim 200  $nlm$check.analyticals FALSE   $optim $optim$factr 1e+07   $newton $newton$conv.tol 1e-06  $newton$maxNstep 5  $newton$maxSstep 2  $newton$maxHalf 30  $newton$use.svd FALSE   $idLinksBases TRUE  $scalePenalty TRUE  $efs.lspmax 15  $efs.tol 0.1  $keepData FALSE  $scale.est "fletcher"  $edge.correct FALSE  )
    -  scale = 0
    -  select = FALSE
    -  gamma = 1
    -  fit = TRUE
    -  drop.unused.levels = TRUE
    -  discrete = FALSE
    -  algo = "GAM.mgcv.gam"   (default: NULL )

>  GBM options (datatype: binary , package: gbm , function: gbm ) :
   ( dataset _allData_allRun )
    -  formula = apis ~ 1 + bio13 + bio15 + bio18 + bio19 + bio2 + bio3 + bio5 +      bio8 + bio9 + elev + hii_v2geo1 <environment: 0x000001afa1cf6e50>   (default: formula(data) )
    -  distribution = "bernoulli"
    -  data = list()
    -  weights = 
    -  n.trees = 2500   (default: 100 )
    -  interaction.depth = 7   (default: 1 )
    -  n.minobsinnode = 5   (default: 10 )
    -  shrinkage = 0.001   (default: 0.1 )
    -  bag.fraction = 0.5
    -  train.fraction = 1
    -  cv.folds = 3   (default: 0 )
    -  keep.data = FALSE   (default: TRUE )
    -  verbose = FALSE
    -  n.cores = 1

>  GLM options (datatype: binary , package: stats , function: glm ) :
   ( dataset _allData_allRun )
    -  formula = apis ~ 1 + gam::s(bio13) + gam::s(bio15) + gam::s(bio18) + gam::s(bio19) +      gam::s(bio2) + gam::s(bio3) + gam::s(bio5) + gam::s(bio8) +      gam::s(bio9) + gam::s(elev) + gam::s(hii_v2geo1) <environment: 0x000001af637a0250>   (default:  )
    -  family =  Family: binomial  Link function: logit  
    -  data = 
    -  weights = 
    -  subset = 
    -  na.action = 
    -  etastart = 
    -  mustart = 0.5   (default:  )
    -  offset = 
    -  control = $epsilon 1e-08  $maxit 50  $trace FALSE    (default: list() )
    -  model = TRUE
    -  method = "glm.fit"
    -  x = FALSE
    -  y = TRUE
    -  singular.ok = TRUE

>  MAXENT options (datatype: binary , package: MAXENT , function: MAXENT ) :
   ( dataset _allData_allRun )
    -  path_to_maxent.jar = "."   (default: "E:/apis_mellifera2024" )
    -  memory_allocated = 512
    -  background_data_dir = "default"
    -  visible = TRUE   (default: FALSE )
    -  linear = TRUE
    -  quadratic = TRUE
    -  product = TRUE
    -  threshold = TRUE
    -  hinge = TRUE
    -  lq2lqptthreshold = 80
    -  l2lqthreshold = 10
    -  hingethreshold = 15
    -  beta_threshold = 0   (default: -1 )
    -  beta_categorical = -1
    -  beta_lqp = -1
    -  beta_hinge = -1
    -  betamultiplier = 1
    -  defaultprevalence = 0.5

>  RF options (datatype: binary , package: randomForest , function: randomForest ) :
   ( dataset _allData_allRun )
    -  mtry = 2   (default: 1 )
    -  type = "classification"
    -  ntree = 250   (default: NULL )
    -  strata = 0 1 Levels: 0 1   (default: NULL )
    -  nodesize = 10   (default: NULL )
    -  importance = TRUE   (default: NULL )
    -  oob.prox = TRUE   (default: NULL )
    -  maxnodes = 5   (default: NULL )

>  RFd options (datatype: binary , package: randomForest , function: randomForest ) :
   ( dataset _allData_allRun )
    -  mtry = 2   (default: 1 )
    -  type = "classification"
    -  ntree = 500   (default: NULL )
    -  strata = 0 1 Levels: 0 1   (default: NULL )
    -  nodesize = 5   (default: NULL )

>  XGBOOST options (datatype: binary , package: xgboost , function: xgboost ) :
   ( dataset _allData_allRun )
    -  missing = NA
    -  params = $max_depth 5  $eta 0.2  $gamma 1    (default: list() )
    -  nrounds = 20   (default: 4 )
    -  verbose = 1
    -  print_every_n = 1
    -  save_name = "xgboost.model"
    -  callbacks = list()
    -  nthread = 2   (default: NULL )
    -  objective = "binary:logistic"   (default: NULL )
    -  subsample = 0.5   (default: NULL )

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

allModels [1] "FDA" "GAM" "GBM" "GLM" "MAXENT" "RF" "RFd" "XGBOOST" models.pa $FDA [1] "PA1" "PA2"

$GBM [1] "PA3" "PA4"

$RF [1] "PA1" "PA2"

$RFd [1] "PA1" "PA2"

$XGBOOST [1] "PA1" "PA2"

$GLM [1] "PA3" "PA4"

$GAM [1] "PA3" "PA4"

$MAXENT [1] "PA3" "PA4"`

MayaGueguen commented 2 weeks ago

Hello Chenyongpeng 👋

I'm not sure to understand at which step / function the error arises, but from what I can see :

Here are some recommandations :

And if you have more details about which function is causing the error, please do not hesitate to send it 👀

Hope it helps, Maya