RetoSchmucki / rbms

A home for the latest rbms R package
https://retoschmucki.github.io/rbms/
Other
4 stars 4 forks source link

Updated code to use terra pakage and avoid raster and rasterVis #23

Open ptg732 opened 7 months ago

ptg732 commented 7 months ago

Hi

I did update the code from your workshop (https://butterfly-monitoring.github.io/bms_workshop/index.html) to use teh new terra package and discard the use of raster and rasterVis. I did also correct some small issues in your code taht were causing errors. Hope this is usefull

regards

if (!require("data.table")){ install.packages("data.table") library(data.table)} if (!require("speedglm")){ install.packages("speedglm") library(speedglm)} if (!require("devtools")){ install.packages("devtools") library(devtools)} if (!require("ggplot2")){ install.packages("ggplot2") library(ggplot2)} if (!require("reshape2")){ install.packages("reshape2") library(reshape2)} if (!require("plyr")){ install.packages("plyr") library(plyr)} if (!require("sf")){ install.packages("sf") library(sf)} if (!require("terra")) { install.packages("terra") library(terra)} if (!require("tmap")){ remotes::install_github('r-tmap/tmap')

install.packages("tmap")

    library(tmap)}

if (!require("DT")){ install.packages("DT") library(DT)}

rbms from GitHub

if(!requireNamespace("devtools")) { install.packages("devtools") devtools::install_github("RetoSchmucki/rbms") library(rbms)}

library(data.table)

load .rds data, we use the function readRDS()

b_count_sub <- readRDS("bms_workshop_data/work_count.rds") # species data with counts m_visit_sub <- readRDS("bms_workshop_data/work_visit.rds") # date and time of transects/stations m_site_sub <- readRDS("bms_workshop_data/work_site.rds") # km coordinates of sites

to read csv into a data.table we use fread()

bcz_class <- data.table::fread("bms_workshop_data/GEnS_v3_classification.csv") # Bioclimate classes b_count_sub

Extract columns, to store only 'bms_id', the reference to samples, teh year of the record and the full date

my_new_dt <- b_count_sub[ , .(bms_id, transect_id_serial, year, visit_date)] my_new_dt

Extract unique

unique(my_new_dt$bms_id) # store the unique 'bms_id' unique(my_new_dt[, .(bms_id, year)]) # store 'bms_id' and 'year'

Subset

my_new_dt[bms_id == "NLBMS", ]

Work with date

my_new_dt[ , month := month(visit_date)][ , c("day", "year") := .(mday(visit_date), year(visit_date))] my_new_dt

Count object

my_new_dt[ , .N, by = bms_id]

rename column

change names

setnames(b_count_sub, "transect_id_serial", "SITE_ID") setnames(b_count_sub, c("species_name", "bms_id"), c("SPECIES", "BMS_ID")) names(b_count_sub) <- toupper(names(b_count_sub)) b_count_sub

merge data set

merge species count data with site coordinate data based on a common identifier (SITE_ID),

demonstrating both inner and left joins.

setnames(m_site_sub, "transect_id_serial", "SITE_ID")

setkey(b_count_sub, SITE_ID) setkey(m_site_sub, SITE_ID)

merge(b_count_sub, m_site_sub[bms_id == "FRBMS" , .(SITE_ID, transect_lon_1km, transect_lat_1km)]) merge(b_count_sub, m_site_sub[bms_id == "FRBMS" , .(SITE_ID, transect_lon_1km, transect_lat_1km)], all.x = TRUE) data_m <- merge(b_count_sub, m_site_sub[ , .(SITE_ID, transect_lon_1km, transect_lat_1km)], all.x = TRUE) data_m

Mapping BMS sites

library(data.table) library(sf) library(tmap)

b_count_sub <- readRDS("bms_workshop_data/work_count.rds") m_visit_sub <- readRDS("bms_workshop_data/work_visit.rds") m_site_sub <- readRDS("bms_workshop_data/work_site.rds")

Mapping

bcz <- terra::rast("bms_workshop_data/metzger_bcz_genz3.grd") bb <- sf::st_read("bms_workshop_data/bbox.shp") bcz_class <- data.table::fread("bms_workshop_data/GEnS_v3_classification.csv")

sf transect location

site_sf <- st_as_sf(m_site_sub, coords = c("transect_lon_1km", "transect_lat_1km"), crs = 3035) mapview::mapview(site_sf)

To produce a map with the Bioclimate Region, we use the function levelplot() in the rasterVis package

plot point on raster

site_sf_4326 <- st_transform(site_sf, crs = 4326) bb_4326 <- st_transform(bb, crs = 4326)

Begin the tmap visualization

tm_layout(frame = FALSE) + # Optional: Adjust layout settings as needed tm_shape(bcz) + tm_raster(style = "cont", palette = "viridis", title = "Bioclimate") + tm_shape(site_sf_4326) + tm_dots(col = "cyan4", size = 0.1) + # Adjust size as needed tm_shape(bb_4326) + tm_borders(col = "magenta", lwd = 2, lty = 2)

plot point on raster using tmap

tm_shape(bcz) + tm_raster(palette = "viridis", alpha = 1) + # Example using a viridis palette tm_shape(st_geometry(st_transform(site_sf, 4326))) + tm_symbols(col = "dodgerblue4", size = 0.1, shape = 20) + tm_shape(st_geometry(st_transform(bb, 4326))) + tm_borders(lty = 2, col= 'magenta', lwd=2)

Extracting spatial information

site_dt <- data.table(site_sf_4326)

Convert 'site_sf' to 'data.table', ensuring to include all attributes

site_dt <- data.table::setDT(sf::st_drop_geometry(site_sf_4326)) print(names(site_sf_4326))

Assuming 'bcz' is a SpatRaster object loaded with terra

Transform 'site_sf' to match the CRS of 'bcz'

site_sf_4326 <- st_transform(site_sf_4326, crs = crs(bcz))

Assuming 'site_sf_4326' is correctly transformed

Extract values

extracted_values <- terra::extract(bcz, site_sf_4326, exact=TRUE)

Ensure extracted_values is correctly structured for assignment

If bcz has a single layer, extracted_values should be a vector or a single-column data.frame

if (is.data.frame(extracted_values)) {

Assuming we're interested in the first layer or there's only one

    site_dt$GEnZ_seq <- extracted_values[[1]]

} else {

If it's not a data.frame, it should be directly assignable, but this condition is just for illustration

    site_dt$GEnZ_seq <- extracted_values

}

----

setkey(site_dt, GEnZ_seq) setkey(bcz_class, GEnZ_seq) site_dt <- merge(site_dt, unique(bcz_class[, .(GEnZ_seq, GEnZname)]), all.x=TRUE)

After extraction and before merge

print(names(site_dt))

Exploring results

library(DT) datatable(site_dt[ ,.(bms_id, transect_id_serial, GEnZ_seq, GEnZname)], rownames = FALSE, filter = "top", options = list(pageLength = 5, scrollX = T))

---

mapview::mapview(bcz) + mapview::mapview(st_transform(site_sf, 4326))

Phenology in BMS data - flight curve

if(!requireNamespace("devtools")) install.packages("devtools") devtools::install_github("RetoSchmucki/rbms") library(data.table) library(rbms)

b_count_sub <- readRDS("bms_workshop_data/work_count.rds") m_visit_sub <- readRDS("bms_workshop_data/work_visit.rds")

Explore the data

library(DT) datatable(b_count_sub[1:50, ], rownames = FALSE, filter = "top", options = list(pageLength = 5, scrollX = T)) datatable(m_visit_sub[1:50, ], rownames = FALSE, filter = "top", options = list(pageLength = 5, scrollX = T)) unique(b_count_sub[ , .(bms_id, species_name)]) unique(b_count_sub[order(year), year])

1. Select a single species

s_sp <- "Polyommatus icarus" region_bms <- c("NLBMS", "FRBMS") data(m_visit) data(m_count)

2. Arrange data set name to fit the functions requirements

setnames(m_visit_sub, c("transect_id_serial", "visit_date"), c("SITE_ID", "DATE")) names(m_visit_sub) <- toupper(names(m_visit_sub))

setnames(b_count_sub, c("transect_id_serial", "visit_date", "species_name"), c("SITE_ID", "DATE", "SPECIES")) names(b_count_sub) <- toupper(names(b_count_sub))

3. Prepare a full time-series to align count, visit, monitoring and flight curve estimate,

using the function ts_dwmy_table(), with 'InitYear', 'LastYear' and 'WeekDay1'

ts_date <- rbms::ts_dwmy_table(InitYear = 2008, LastYear = 2017, WeekDay1 = "monday")

4. Define the monitoring season with the function ts_monit_season()(), using ts_date, StartMonth, EndMonth, StartDay,

EndDay, CompltSeason, Anchor, AnchorLength, AnchorLag, TimeUnit (“d” or “w”) as arguments

ts_season <- rbms::ts_monit_season(ts_date, StartMonth = 4, EndMonth = 9, StartDay = 1, EndDay = NULL, CompltSeason = TRUE, Anchor = TRUE, AnchorLength = 2, AnchorLag = 2, TimeUnit = "w")

5. Align the site visit in the region of interest with your monitoring season

MY_visit_region <- m_visit_sub[BMS_ID %in% region_bms, ] ts_season_visit <- rbms::ts_monit_site(ts_season, MY_visit_region)

6. Align the butterfly counts recorded for the species and region of interest with your visit and monitoring season

MY_count_region <- b_count_sub[BMS_ID %in% region_bms, ] ts_season_count <- rbms::ts_monit_count_site(ts_season_visit, MY_count_region, sp = s_sp)

7. Estimate the flight curve for a species and region of interest,

using cubic spline smoother available in the mgcv package and considering

ts_flight_curve <- rbms::flight_curve(ts_season_count, NbrSample = 500, MinVisit = 3, MinOccur = 2, MinNbrSite = 5, MaxTrial = 4, GamFamily = "nb", SpeedGam = FALSE, CompltSeason = TRUE, SelectYear = NULL, TimeUnit = "w" ) saveRDS(ts_flight_curve, file.path("bms_workshopdata", paste(gsub(" ", "", s_sp), paste(regionbms, collapse=""), "pheno.rds", sep = "_")))

8. Extract and plot the estimated phenology

ts_flight_curve <- readRDS(file.path("bms_workshopdata", paste(gsub(" ", "", s_sp), paste(regionbms, collapse=""), "pheno.rds", sep = "_"))) datatable(ts_flight_curve$pheno[1:370, .(SPECIES, M_YEAR, MONTH, trimWEEKNO, WEEK_SINCE, ANCHOR, NM)], rownames = FALSE, filter = "top", options = list(pageLength = 5, scrollX = T))

Plot using base R

Extract phenology part

pheno <- ts_flight_curve$pheno

add the line of the first year

yr <- unique(pheno[order(M_YEAR), as.numeric(as.character(M_YEAR))])

if("trimWEEKNO" %in% names(pheno)){ plot(pheno[M_YEAR == yr[1], trimWEEKNO], pheno[M_YEAR == yr[1], NM], type = 'l', ylim = c(0, max(pheno[, NM])), xlab = 'Monitoring Week', ylab = 'Relative Abundance') } else { plot(pheno[M_YEAR == yr[1], trimDAYNO], pheno[M_YEAR == yr[1], NM], type = 'l', ylim = c(0, max(pheno[, NM])), xlab = 'Monitoring Day', ylab = 'Relative Abundance')

}

add individual curves for additional years

if(length(yr) > 1) { i <- 2 for(y in yr[-1]){ if("trimWEEKNO" %in% names(pheno)){ points(pheno[M_YEAR == y , trimWEEKNO], pheno[M_YEAR == y, NM], type = 'l', col = i) } else { points(pheno[M_YEAR == y, trimDAYNO], pheno[M_YEAR == y, NM], type = 'l', col = i) } i <- i + 1 } }

add legend

legend('topright', legend = c(yr), col = c(seq_along(c(yr))), lty = 1, bty = 'n')

Computing site and collated indices

Generalized Abundance Index

library(data.table) library(rbms) library(ggplot2)

s_sp <- "Maniola jurtina"

s_sp <- "Polyommatus icarus" region_bms <- c("NLBMS", "FRBMS")

We will use the count and visit data

b_count_sub <- readRDS("bms_workshop_data/work_count.rds") m_visit_sub <- readRDS("bms_workshop_data/work_visit.rds")

setnames(m_visit_sub, c("transect_id_serial", "visit_date"), c("SITE_ID", "DATE")) names(m_visit_sub) <- toupper(names(m_visit_sub))

setnames(b_count_sub, c("transect_id_serial", "visit_date", "species_name"), c("SITE_ID", "DATE", "SPECIES")) names(b_count_sub) <- toupper(names(b_count_sub))

ts_date <- rbms::ts_dwmy_table(InitYear = 2008, LastYear = 2017, WeekDay1 = "monday")

ts_season <- rbms::ts_monit_season(ts_date, StartMonth = 4, EndMonth = 9, StartDay = 1, EndDay = NULL, CompltSeason = TRUE, Anchor = TRUE, AnchorLength = 2, AnchorLag = 2, TimeUnit = "w")

MY_visit_region <- m_visit_sub[BMS_ID %in% region_bms, ] ts_season_visit <- rbms::ts_monit_site(ts_season, MY_visit_region)

MY_count_region <- b_count_sub[BMS_ID %in% region_bms, ] ts_season_count <- rbms::ts_monit_count_site(ts_season_visit, MY_count_region, sp = s_sp)

Impute & Site index

impute values for missing week counts

and adequately compute sites abundance index over the entire monitoring season

impt_counts <- rbms::impute_count(ts_season_count = ts_season_count, ts_flight_curve = pheno, YearLimit = NULL, TimeUnit = "w")

sindex <- rbms::site_index(butterfly_count = impt_counts, MinFC = 0.10)

saveRDS(sindex, file.path("bms_workshopdata", paste( gsub(" ", "", s_sp), paste(regionbms, collapse=""), "sindex.rds", sep="_")))

Collated index, using the collated_index function in rbms to estimate annual abundance indices for a wider region

we will focus on the sites monitored in the Netherlands (NLBMS) and compute a collated index for Polyommatus icarus

accross the site monitored in that region over the 10-year period.

bms_id <- "NLBMS" sindex <- readRDS(file.path("bms_workshopdata", paste( gsub(" ", "", s_sp), paste(regionbms, collapse=""), "sindex.rds", sep="_"))) sindex[substr(SITE_ID, 1, 5) == bms_id, ]

co_index <- collated_index(data = sindex[substr(SITE_ID, 1, 5) == bms_id, ], s_sp = s_sp, sindex_value = "SINDEX", glm_weights = TRUE, rm_zero = TRUE) co_index$col_index

Bootstrap CI

bms_id <- "NLBMS" set.seed(125361)

bootsample <- rbms::boot_sample(sindex[substr(SITE_ID, 1, 5) == bms_id, ], boot_n = 500) co_index <- list()

for progression bar, uncomment the following

pb <- txtProgressBar(min = 0, max = dim(bootsample$boot_ind)[1], initial = 0, char = "*", style = 3)

for (i in c(0, seq_len(dim(bootsample$boot_ind)[1]))) { co_index[[i + 1]] <- rbms::collated_index(data = sindex[substr(SITE_ID, 1, 5) == bms_id, ], s_sp = s_sp, sindex_value = "SINDEX", bootID = i, boot_ind = bootsample, glm_weights = TRUE, rm_zero = TRUE)

    ## for progression bar, uncomment the following
    setTxtProgressBar(pb, i)

}

collate and append all the result in a data.table format

co_index <- rbindlist(lapply(co_index, FUN = "[[", "col_index")) co_index$BMS_ID <- bms_id co_index$SPECIES <- s_sp

co_index_boot <- co_index saveRDS(co_index_boot, file.path("bms_workshopdata", paste( gsub(" ", "", s_sp), bms_id, "co_indexboot.rds", sep="")))

Figure with CI

bms_id <- "NLBMS"

co_index_boot <- readRDS(file.path("bms_workshopdata", paste( gsub(" ", "", s_sp), bms_id, "co_indexboot.rds", sep="")))

boot0 <- co_index_boot[ , mean(COL_INDEX, na.rm=TRUE), by = M_YEAR] boot0[, LOGDENSITY0 := log(V1) / log(10)] boot0[, meanlog0 := mean(LOGDENSITY0)]

Add mean of BOOTi == 0 (real data) to all data

boot_ests <- merge(co_index_boot, boot0[, .(M_YEAR, meanlog0)], by = c("M_YEAR") )

boot_ests[, LOGDENSITY := log(COL_INDEX) / log(10)] boot_ests[, TRMOBSCI := LOGDENSITY - meanlog0 + 2]

boot_ests[, TRMOBSCILOW := quantile(TRMOBSCI, 0.025), by = M_YEAR] boot_ests[, TRMOBSCIUPP := quantile(TRMOBSCI, 0.975), by = M_YEAR]

ylim_ <- c(min(floor(range(boot_ests[TRMOBSCI <= TRMOBSCIUPP & TRMOBSCI >= TRMOBSCILOW, TRMOBSCI]))), max(ceiling(range(boot_ests[TRMOBSCI <= TRMOBSCIUPP & TRMOBSCI >= TRMOBSCILOW, TRMOBSCI]))))

plot_col <- ggplot(data = boot_ests[TRMOBSCI <= TRMOBSCIUPP & TRMOBSCI >= TRMOBSCILOW, ], aes(x = M_YEAR, y = TRMOBSCI)) + geompoint(colour = "orange", alpha = 0.2) + ylim(ylim) + geom_line(data = boot_ests[TRMOBSCI <= TRMOBSCIUPP & TRMOBSCI >= TRMOBSCILOW, median(TRMOBSCI, na.rm=TRUE), by = M_YEAR], aes(x = M_YEAR, y = V1), linewidth = 1, colour = "dodgerblue4") + geom_hline(yintercept = 2, linetype = "dashed", color = "grey50") + scale_x_continuous(minor_breaks = waiver(), limits = c(2005, 2020), breaks = seq(2005, 2020, by = 5)) + xlab("Year") + ylab("Abundance Incices (Log/Log(10))") + labs(subtitle = paste(s_sp, bms_id, sep = " "))

plot_col

Calculating species trends

source("workshop_functions.R")

Read in collated indices for species and BMS of interest

Specify the species - note the underscore

spp <- "Maniola_jurtina"

Specify the BMS

bms <- "UKBMS"

Read in the bootstrapped collated indices

co_index <- readRDS(paste0("./bms_workshop_data/", spp, "_co_index_boot.rds"))

Filter to a single BMS to focus upon

co_index <- co_index[BMS_ID == bms] co_index

Convert to log 10 collated indices (TRMOBS)

Only use COL_INDEX larger then 0 for calculation or logdensity and trmobs

co_index[COL_INDEX > 0, LOGDENSITY:= log(COL_INDEX)/log(10)][, TRMOBS := LOGDENSITY - mean(LOGDENSITY) + 2, by = .(BOOTi)] co_index

Estimate and classify species trends with a confidence interval based on the bootstraps

sp_trend <- estimate_boot_trends(co_index) sp_trend

Plot the species log collated index with bootstrapped 95% confidence interval and linear trend line (in red)

Calculate mean log index for original data

co_index0_mean <- mean(co_index[BOOTi == 0]$LOGDENSITY, na.rm = TRUE)

Derive interval from quantiles

co_index_ci <- merge(co_index[BOOTi == 0, .(M_YEAR, TRMOBS, BMS_ID)], co_index[BOOTi != 0, .( LOWER = quantile(LOGDENSITY - co_index0_mean + 2, 0.025, na.rm = TRUE), UPPER = quantile(LOGDENSITY - co_index0_mean + 2, 0.975, na.rm = TRUE)), by = .(BMS_ID, M_YEAR)], by=c("BMS_ID","M_YEAR"))

ggplot(co_index_ci, aes(M_YEAR, TRMOBS))+ theme(text = element_text(size = 14))+ geom_line()+ geom_point()+ geom_ribbon(aes(ymin = LOWER, ymax = UPPER), alpha = .3)+ geomsmooth(method="lm", se=FALSE, color="red")+ xlab("Year")+ylab(expression('log '['(10)']*' Collated Index'))+ ggtitle(paste("Collated index for", gsub("", " ", spp), "in", bms))

Multi-species indicators

Read in collated indices for two species and filter to one BMS

bms <- "UKBMS" co_index <- rbind(readRDS("./bms_workshop_data/Maniola_jurtina_co_index_boot.rds"), readRDS("./bms_workshop_data/Polyommatus_icarus_co_index_boot.rds")) co_index <- co_index[BMS_ID == bms] co_index

Convert to log 10 collated indices (TRMOBS)

co_index[COL_INDEX > 0, LOGDENSITY:= log(COL_INDEX)/log(10)][,TRMOBS := LOGDENSITY - mean(LOGDENSITY) + 2, by = .(SPECIES, BOOTi)] co_index

calculate and plot the indicator, where the confidence interval is based on the bootstraps

First we generate the indicator for real data

msi <- produce_indicator0(co_index[BOOTi == 0]) msi

Then generate an indicator for each bootstrap

msi_boot <- produce_indicators_boot(co_index[BOOTi > 0]) str(msi_boot) msi_boot[,1:5]

add a confidence interval to the indicator based on quantiles from the bootstrapped indicators

msi <- add_indicator_CI(msi, msi_boot) msi

Plot the indicator

ggplot(msi, aes(year, indicator))+ theme(text = element_text(size = 14))+ geom_ribbon(aes(ymin = LOWsmooth1, ymax = UPPsmooth1), alpha=.2, fill = "blue")+ geom_point(color = "blue")+ geom_line(aes(y = SMOOTH), color = "blue")+ ylim(c(0, max(200,max(msi$UPPsmooth1))))+ ggtitle(paste("Example indicator for", bms, "based on two species"))

Calculate the indicator trend including confidence interval from the bootstraps

msi_trend <- estimate_ind_trends(msi, msi_boot) msi_trend

RetoSchmucki commented 6 months ago

@ptg732 Thank you for your help! I will update the website with this code once I tested everything. 😃 🚀

ptg732 commented 5 months ago

Hi Reto One question: the code uses data stored locally. Is it possible to add the ability to search the API to provide data stored directly in the database? Or is there a database limitation preventing this kind of use?

I'm a registered user of the EBMS and would like to explore the data that we are providing for further analysis

regards

Pedro

RetoSchmucki @.***> escreveu (segunda, 26/02/2024 à(s) 15:47):

@ptg732 https://github.com/ptg732 Thank you for your help! I will update the website with this code once I tested everything. 😃 🚀

— Reply to this email directly, view it on GitHub https://github.com/RetoSchmucki/rbms/issues/23#issuecomment-1964462427, or unsubscribe https://github.com/notifications/unsubscribe-auth/ABSXYPPNN2CUU6VPCXG546TYVSU7NAVCNFSM6AAAAABDNNTDWCVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSNRUGQ3DENBSG4 . You are receiving this because you were mentioned.Message ID: @.***>

RetoSchmucki commented 5 months ago

Hi Pedro, Unfortunately, we do not have an API to query the eBMS database. This is partly due to the data agreement we have with the National BMS partners (the data owners). Also, the eBMS online portal only covers some of the data available across the partner schemes. Many countries use their own system (database). We maintain a database that collates all the data up to 2021, but access to these data needs to be granted by the contributing schemes. A data request can be done through the website https://butterfly-monitoring.net/ebms-data%20access

ptg732 commented 5 months ago

Thank you for your answer. regards

Pedro

RetoSchmucki @.***> escreveu (quarta, 10/04/2024 à(s) 11:30):

Hi Pedro, Unfortunately, we do not have an API to query the eBMS database. This is partly due to the data agreement we have with the National BMS partners (the data owners). Also, the eBMS online portal only covers some of the data available across the partner schemes. Many countries use their own system (database). We maintain a database that collates all the data up to 2021, but access to these data needs to be granted by the contributing schemes. A data request can be done through the website https://butterfly-monitoring.net/ebms-data%20access

— Reply to this email directly, view it on GitHub https://github.com/RetoSchmucki/rbms/issues/23#issuecomment-2047164152, or unsubscribe https://github.com/notifications/unsubscribe-auth/ABSXYPNYWPFVF2INWWWXF53Y4UIFPAVCNFSM6AAAAABDNNTDWCVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDANBXGE3DIMJVGI . You are receiving this because you were mentioned.Message ID: @.***>