biomodhub / biomod2

BIOMOD is a computer platform for ensemble forecasting of species distributions, enabling the treatment of a range of methodological uncertainties in models and the examination of species-environment relationships.
91 stars 22 forks source link

Error in BIOMOD_4.2-6-1 - [The results of the ensemble model are negative and greater than 1000] #519

Open 0009a opened 1 month ago

0009a commented 1 month ago

Error and context When I use BIOMOD2_4.2-6-1 to predict species distribution, the results of the ensemble model output are negative and greater than 1000. Code used to get the error rm(list=ls()) library(biomod2) library(terra) library(gbm3)

setwd('E:/Data analysis/R/biomod2_xj_ca') getwd() speciesdata <- read.csv('./point.csv') head(speciesdata) speciename <- 'ca' specie.var <- as.numeric(speciesdata[,speciename])#这步需要先做speciename

speciexy <- speciesdata[, c("x","y")]

envir.files <- list.files(path ="./model_envir/", pattern = ".asc", full.names = TRUE) envir <- rast(envir.files) envir plot(envir)

mybiomoddata.first <- BIOMOD_FormatingData( resp.var = specie.var , expl.var = envir , resp.xy = speciexy , resp.name = speciename ) mybiomoddata.first summary(mybiomoddata.first) plot(mybiomoddata.first)

myResp.PA <- ifelse(specie.var == 1, 1, NA)# # 将真正的缺席转化为潜在的伪缺席

mybiomoddata <- BIOMOD_FormatingData(resp.var = myResp.PA, expl.var = envir, resp.xy = speciexy, resp.name = speciename, PA.nb.rep = 3, PA.nb.absences = c(500), PA.strategy = 'random') summary(mybiomoddata) plot(mybiomoddata)

all.models <-c("ANN", "CTA", "FDA", "GAM", "GBM", "GLM", "MARS", "MAXENT", "RF", "SRE")

Define MAXENT options

myMAXENToptions <- list(threshold = FALSE, path_to_maxent.jar = ".") user.MAXENT <- list('_allData_allRun'= myMAXENToptions, '_PA1_allRun' = myMAXENToptions, '_PA2_allRun' = myMAXENToptions, '_PA3_allRun' = myMAXENToptions)

Define RF options

myRFoptions <- list(nodesize = 10, maxnodes = 20) user.RF <- rep( list(myRFoptions), (ncol(mybiomoddata@PA.table) + 1 )) names(user.RF) <- c( paste0("_", names(mybiomoddata@PA.table), "_allRun"), "_allData_allRun")

Define GLM options

myGLMoptions <- list(family =gaussian(link = "identity"), control = glm.control(maxit = 200)) user.GLM <- rep(list(myGLMoptions), (ncol(mybiomoddata@PA.table) + 1)) names(user.GLM) <- c(paste0("_", names(mybiomoddata@PA.table), "_allRun"), "_allData_allRun")

Define MARS options

myMARSoptions <- list( glm = list(family = gaussian(link = "identity")), ncross = 0,
nk = 20,
penalty = 3,
thresh = 0.01)
user.MARS <- rep(list(myMARSoptions), (ncol(mybiomoddata@PA.table) + 1)) names(user.MARS) <- c(paste0("_", names(mybiomoddata@PA.table), "_allRun"), "_allData_allRun")

Define user values

user.val <- list(RF.binary.randomForest.randomForest = user.RF, MAXENT.binary.MAXENT.MAXENT= user.MAXENT, GLM.binary.stats.glm = user.GLM, MARS.binary.earth.earth = user.MARS)

Set up modeling options

myBiomodOption <- bm_ModelingOptions( data.type = "binary", models = all.models, strategy = 'user.defined', user.val= user.val, bm.format = mybiomoddata, calib.lines = NULL )

Output modeling options

print(myBiomodOption)

myBiomodModelOut <- BIOMOD_Modeling( bm.format = mybiomoddata, models = all.models, OPT.strategy='user.defined', OPT.user = myBiomodOption, CV.strategy = "random", CV.nb.rep = 10, CV.perc = 0.7, metric.eval = c("ROC", "TSS"), var.import = 3, scale.models = FALSE, nb.cpu = 8, do.progress = TRUE, seed.val = 42)

myBiomodModelOut mybiomodmodeleval <- get_evaluations(myBiomodModelOut) gg1 <- bm_PlotEvalMean(bm.out = myBiomodModelOut)

bm_PlotEvalBoxplot(bm.out = myBiomodModelOut, group.by = c('algo', 'run')) write.csv(mybiomodmodeleval, "./myBiomodModelEval.CSV")

dimnames(mybiomodmodeleval) bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('expl.var', 'algo', 'algo'))

mybiomodmodelout_variables_importance = get_variables_importance(myBiomodModelOut)

write.csv(mybiomodmodelout_variables_importance, "./variables_importance_mean.CSV")

ensemble_models <- BIOMOD_EnsembleModeling( bm.mod = myBiomodModelOut, models.chosen = 'all', em.by = 'all', em.algo = c('EMmean'), metric.select = c('TSS'), metric.select.thresh = c(0.8), metric.eval = c('TSS','ROC'), seed.val = 42, var.import = 3, prob.mean = FALSE, prob.cv = TRUE, committee.averaging = TRUE, prob.mean.weight = TRUE, nb.cpu = 10 )

mybiomodmodelout_variables_importance_em = get_variables_importance(ensemble_models) mybiomodmodelout_variables_importance_em write.csv(mybiomodmodelout_variables_importance_em, "./variables_importance_mean_em.CSV")

mybiomodmodeleval_em <- get_evaluations( ensemble_models) mybiomodmodeleval_em write.csv(mybiomodmodeleval_em, "./mybiomodmodeleval_em.CSV")

mybiomodprojection <- BIOMOD_Projection(bm.mod =myBiomodModelOut, new.env = envir, proj.name = 'current', selected.models = 'all', binary.meth = c('TSS','ROC'), compress = FALSE, build.clamping.mask = FALSE, nb.cpu = 10)

myBiomodEMProj <- BIOMOD_EnsembleForecasting(bm.em = ensemble_models, bm.proj = mybiomodprojection, models.chosen = 'ca_EMmeanByTSS_mergedData_mergedRun_mergedAlgo', metric.binary = 'TSS', nb.cpu = 10, metric.filter = 'all')

myBiomodEMProj plot(myBiomodEMProj) Additional information Snipaste_2024-10-12_15-27-49

HeleneBlt commented 1 month ago

Hello there,

We choose not to truncate the values returned by the different algorithms (except for EMci, which is a special case). In fact, we think this can give a lot of information if you look at where you have negative values, with which algorithms, etc.

Perhaps it would be a good idea to add an argument to let the user choose if he wants the clean the predictions or not ? Let me know if you are interested 🙂

Hélène