Open wildintellect opened 1 month ago
@abarenblitt I remembered one more thing.
- r-lme4
- r-arm
and cut the install.R just to optmatch (though now I see optmatch in the conda :thinking: )
I've gone through and separated out some of the loop to run in pieces as suggested. It seems the last part (the extract and cbind) is where the memory is getting clogged up:
results <- s3$list_objects_v2(Bucket = "maap-ops-workspace", Prefix=paste("shared/abarenblitt/GEDI_global_PA_v2/WDPA_gedi_L2A_tiles/",sep="")) all_gedil2_f <- sapply(results$Contents, function(x) {x$Key}) pattern=paste(".gpkg",sep="") all_gedil2_f <- grep(pattern, all_gedil2_f, value=TRUE) all_gedil2_f <- basename(all_gedil2_f)[8]#[4:6] #Currently specifying working files
results4 <- s3$list_objects_v2(Bucket = "maap-ops-workspace", Prefix=paste("shared/abarenblitt/GEDI_global_PA_v2/WDPA_gedi_L4A_tiles/",sep="")) all_gedil4_f <- sapply(results4$Contents, function(x) {x$Key}) pattern4=paste(".gpkg",sep="") all_gedil4_f <- grep(pattern4, all_gedil4_f, value=TRUE) all_gedil4_f <- basename(all_gedil4_f)[8]#[4:6] #Currently specifying working files results2b <- s3$list_objects_v2(Bucket = "maap-ops-workspace", Prefix=paste("shared/abarenblitt/GEDI_global_PA_v2/WDPA_gedi_L2B_tiles/",sep="")) all_gedil2b_f <- sapply(results2b$Contents, function(x) {x$Key}) pattern=paste(".gpkg",sep="") all_gedil2b_f <- grep(pattern, all_gedil2b_f, value=TRUE) all_gedil2b_f <- basename(all_gedil2b_f[8])#[4:6] #Currently specifying working files
extract_gedi2b <- function(matched){
# Initialize an empty list to store results
iso_matched_gedi_df <- NULL # Initialize before loop
# Iterate over the sequence of indices for your files
for (this_csvid in seq_along(all_gedil2_f)) {
cat("Reading in no. ", this_csvid, "csv of ", length(all_gedil2_f), "csvs for iso3", iso3, "\n")
# Read GEDI L4A data
gedil4_f_path <- paste(gedipath, "WDPA_gedi_L4A_tiles/", all_gedil4_f[this_csvid], sep = "")
gedil4_f <- as.data.frame(st_read(gedil4_f_path))
# Read GEDI L2A data
gedil2_f_path <- paste(gedipath, "WDPA_gedi_L2A_tiles/", all_gedil2_f[this_csvid], sep = "")
gedil2_f <- as.data.frame(st_read(gedil2_f_path))
# Read GEDI L2B data
gedil2b_f_path <- paste(gedipath, "WDPA_gedi_L2B_tiles/", all_gedil2b_f[this_csvid], sep = "")
gedil2b_f <- as.data.frame(st_read(gedil2b_f_path))
names(gedil2b_f)[names(gedil2b_f) == "geolocation.lon_lowestmode"] <- "lon_lowestmode"
names(gedil2b_f)[names(gedil2b_f) == "geolocation.lat_lowestmode"] <- "lat_lowestmode"
names(gedil2b_f)[names(gedil2b_f) == "land_cover_data.landsat_treecover"] <- "landsat_treecover"
# Check if GEDI L4A data is empty
if (nrow(gedil4_f) < 1) {
cat("Error: No data for GEDI L4A\n")
gedi_l24 <- gedil2_f
gedi_l24$agbd <- NA
gedi_l24$agbd_se <- NA
gedi_l24$agbd_t <- NA
gedi_l24$agbd_t_se <- NA
} else {
# Select relevant columns from GEDI L4A
gedi_l4_sub <- gedil4_f %>%st_drop_geometry()%>%
dplyr::select(shot_number, agbd, agbd_se, agbd_t, agbd_t_se)
# Join with GEDI L2A data
gedi_l24 <- inner_join(gedil2_f, gedi_l4_sub, by = "shot_number")
}
# print(dim(gedi_l24))
# Check if GEDI L2B data is empty
if (nrow(gedil2b_f) < 1) {
cat("Error: No data for GEDI L4A\n")
gedi_l24b <- gedi_l24
gedi_l24b$landsat_treecover<- NA
gedi_l24b$pai <- NA
gedi_l24b$fhd_normal <- NA
} else {
# Select relevant columns from GEDI L4A
gedi_l2b_sub <- gedil2b_f %>%st_drop_geometry()%>%
dplyr::select(shot_number, landsat_treecover, pai, fhd_normal)
# Join with GEDI L2A data
gedi_l24b <- inner_join(gedi_l24, gedi_l2b_sub, by = "shot_number")
}
# print(class(gedi_l24b))
# Save results to RDS and CSV files
saveRDS(gedi_l24b, file = paste(f.path3, "GNB_extractStep1/", iso3, "_pa_", id_pa,
"_gedi_wk_", gediwk, ".RDS", sep = ""))
cat(id_pa, "in", iso3, "results are written to directory\n")
}}
iso_test<-extract_gedi2b(matched = matched) extracted<-list.files(paste(f.path3,"GNB_extractStep1/",sep=""), pattern=".RDS", full.names = TRUE)
/// THIS PART USES UP TOO MUCH MEMORY****///
extract_gediPart2 <- function(matched,mras){
# Initialize empty spatial object for the current iteration
for (this_csvid in seq_along(extracted)) {
gedi_l24b <- readRDS(extracted[this_csvid])
gedi_l24b_sp <- NULL
if (nrow(gedi_l24b) > 0) {
gedi_l24b_sp <- SpatialPointsDataFrame(
coords = gedi_l24b[, c("lon_lowestmode", "lat_lowestmode")],
data = gedi_l24b,
proj4string = CRS("+init=epsg:4326")
) %>% spTransform(CRS("+init=epsg:6933"))
matched_gedi <- terra::extract(mras,vect(gedi_l24b_sp), df=TRUE)
matched_gedi_metrics <- cbind(matched_gedi,gedi_l24b_sp@data)
matched_gedi_metrics_filtered <- matched_gedi_metrics %>% dplyr::filter(!is.na(status)) %>%
convertFactor(matched0 = matched,exgedi = .)
iso_matched_gedi_df <- rbind(matched_gedi_metrics_filtered,iso_matched_gedi_df)
print(dim(iso_matched_gedi_df))
}
# Store results in a list
# results_list[[this_csvid]] <- iso_matched_gedi_df
}
if (!is.null(iso_matched_gedi_df)) { iso_matched_gedi_df <- do.call(rbind, results_list) }
cat("Done GEDI processing\n") return(iso_matched_gedi_df) }
mras <- tryCatch({ matched2ras(matched) }, error = function(e) { cat("Error converting matched data to raster stack for PA", id_pa, ":", e$message, "\n") return(NULL) })
iso_test2<-extract_gediPart2(matched = matched,mras = mras)
These are links to the necessary variables for testing:
@abarenblitt can you put all those files in your shared-bucket
, extract and mras.
I still can't test it properly because I don't have a gpkg L2A that corresponds to an mras but the following code seems pretty memory efficient and fast:
library(terra)
gedi_l2a <- "s3://maap-ops-workspace/shared/abarenblitt/GEDI_global_PA_v2/WDPA_gedi_L2A_tiles/tile_num_29780_L2A.gpkg"
points <- terra::vect(gsub("s3://","/vsis3/",gedi_l2a))
mras_path <- "s3://maap-ops-workspace/shared/abarenblitt/GEDI_global_PA_v2/GNBmras_pa_342659_gedi_wk_24.tif"
mras <- terra::rast(gsub("s3://","/vsis3/",mras_path))
#reproject to match the raster
points <- project(points, mras)
matched_gedi <- terra::extract(mras, points, id=FALSE, df=TRUE)
I would do this inside a function and before matching the tables.
@abarenblitt https://github.com/GEDI-PA/vl_GEDI-PA_2024/blob/main/matching_func_2024.r
https://github.com/rspatial/terra/issues/38#issuecomment-633094236
I didn't have the
matched
ormras
so I couldn't fully test everything. Mostly tested the loading and joining of the tabular data.Testing code