NIEHS / chopin

Scalable GIS methods for environmental and climate data analysis
https://niehs.github.io/chopin/
Other
6 stars 2 forks source link

Quick development and performance comparison #3

Closed sigmafelix closed 10 months ago

sigmafelix commented 11 months ago

Objective

List of quick development/comparison tasks

sigmafelix commented 10 months ago
Code ``` r ######## pacman initialization ######## if (!require(pacman)) { install.packages('pacman') library(pacman) } #> Loading required package: pacman p_load(sf, terra, stars, exactextractr, tictoc, bench) ## NC, ~300 random points nc = st_read(system.file("shape/nc.shp", package="sf")) #> Reading layer `nc' from data source #> `/home/felix/R/x86_64-pc-linux-gnu-library/4.3/sf/shape/nc.shp' #> using driver `ESRI Shapefile' #> Simple feature collection with 100 features and 14 fields #> Geometry type: MULTIPOLYGON #> Dimension: XY #> Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 #> Geodetic CRS: NAD27 nc = st_transform(nc, 5070) g = st_make_grid(nc, n = c(10, 5)) pp = st_sample(nc, type = "Thomas", kappa = 5e-10, mu = 5, scale = 15000) pp[["id"]] = seq(1,nrow(pp)) st_crs(pp) = "EPSG:5070" ppb = st_buffer(pp, nQuadSegs=180, dist = units::set_units(20, 'km')) nrow(pp) #> [1] 365 # Take mean values across six variables tic() aw_interpolated = sf::st_interpolate_aw(nc[,9:14], ppb, extensive = FALSE) #> Warning in st_interpolate_aw.sf(nc[, 9:14], ppb, extensive = FALSE): #> st_interpolate_aw assumes attributes are constant or uniform over areas of x toc() #> 0.668 sec elapsed ## merra2 merra_terra = terra::rast("/home/felix/Documents/climate/MERRA2_400.inst3_3d_asm_Np.20230627.nc4") # consumed 30+ GB of memory and returned dimension error in "PHIS" layer # merra_stars = stars::read_stars("../../../../../Documents/climate/MERRA2_400.inst3_3d_asm_Np.20230627.nc4") terra::crs(merra_terra) = "EPSG:4326" # sf::st_crs(merra_stars) = "EPSG:4326" # Extract at points pp_merra = sf::st_transform(pp, sf::st_crs(4326)) # pp_extract_ee = exactextractr::exact_extract(merra_terra, pp_merra) pp_extract_tr = terra::extract(merra_terra, pp_merra) # Buffer extract pp_merra_buffer = sf::st_buffer(pp_merra, units::set_units(30, "km")) pp_merra_buffer_sv = terra::vect(pp_merra_buffer) merra_terra_clip = terra::crop(merra_terra, terra::ext(pp_merra_buffer_sv)) tic() pp_extract_buffer_tr = terra::extract(merra_terra_clip, pp_merra_buffer_sv, fun = mean) toc() #> 0.529 sec elapsed tic() pp_extract_buffer_trw = terra::extract(merra_terra_clip, pp_merra_buffer_sv, fun = mean, weights = TRUE, exact = TRUE) toc() #> 0.776 sec elapsed tic() pp_extract_buffer_ee = exactextractr::exact_extract(merra_terra_clip, pp_merra_buffer, fun = "mean") #> | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100% toc() #> 0.829 sec elapsed tic() pp_extract_buffer_eew = exactextractr::exact_extract(merra_terra_clip, pp_merra_buffer, fun = "mean", weights = "area") #> | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100% toc() #> 1.181 sec elapsed ## nc road if (!file.exists("./testdata/tl_2020_37_prisecroads.zip")) { download.file("https://www2.census.gov/geo/tiger/TIGER2020/PRISECROADS/tl_2020_37_prisecroads.zip", "tl_2020_37_prisecroads.zip") unzip("tl_2020_37_prisecroads.zip", exdir = "./testdata") } nc_road = sf::st_read("./testdata/tl_2020_37_prisecroads.shp") %>% sf::st_transform(5070) #> Reading layer `tl_2020_37_prisecroads' from data source #> `/tmp/Rtmp8P7pfR/reprex-39228259db041-piny-loon/testdata/tl_2020_37_prisecroads.shp' #> using driver `ESRI Shapefile' #> Simple feature collection with 8020 features and 4 fields #> Geometry type: LINESTRING #> Dimension: XY #> Bounding box: xmin: -84.31763 ymin: 33.87275 xmax: -75.46554 ymax: 36.578 #> Geodetic CRS: NAD83 nrow(pp) #> [1] 365 nrow(nc_road) #> [1] 8020 tic() nc_road_nn = nngeo::st_nn(pp, nc_road, sparse = F) #> lines or polygons #> | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================ | 24% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================ | 64% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |========================================================== | 84% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100% toc() #> 47.233 sec elapsed tic() nc_road_nf = sf::st_nearest_feature(pp, nc_road) toc() #> 0.165 sec elapsed tic() nc_road_nt = terra::nearest(vect(pp), vect(nc_road)) toc() #> 1.488 sec elapsed # What if the data is converted in advance? pptv = vect(pp) nc_road_tv = vect(nc_road) tic() nc_road_nt = terra::nearest(pptv, nc_road_tv) toc() #> 0.717 sec elapsed ``` Created on 2023-08-30 with [reprex v2.0.2](https://reprex.tidyverse.org)

Key takeaways

TODO

sigmafelix commented 10 months ago

Update

Takeaways

Code

Click to unfold ``` r ######## pacman initialization ######## if (!require(pacman)) { install.packages('pacman') library(pacman) } #> Loading required package: pacman p_load(sf, terra, stars, exactextractr, tictoc, bench, tigris) majority = \(x) names(sort(table(x), decreasing = TRUE))[1] tigris_cache_dir("/home/felix/tigris_shps_cache/") #> Your new tigris cache directory is /home/felix/tigris_shps_cache/. #> To use now, restart R or run `readRenviron('~/.Renviron')` ncbg20 = tigris::block_groups(state = "NC", cb = TRUE, year = 2020) # ncbg20 = sf::st_transform(ncbg20, "EPSG:4326") ## NC, ~300 random points nc = st_read(system.file("shape/nc.shp", package="sf")) #> Reading layer `nc' from data source #> `/home/felix/R/x86_64-pc-linux-gnu-library/4.3/sf/shape/nc.shp' #> using driver `ESRI Shapefile' #> Simple feature collection with 100 features and 14 fields #> Geometry type: MULTIPOLYGON #> Dimension: XY #> Bounding box: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965 #> Geodetic CRS: NAD27 nc = st_transform(nc, 5070) g = st_make_grid(nc, n = c(10, 5)) pp = st_sample(nc, type = "Thomas", kappa = 5e-10, mu = 5, scale = 15000) pp[["id"]] = seq(1,nrow(pp)) st_crs(pp) = "EPSG:5070" ppb = st_buffer(pp, nQuadSegs=180, dist = units::set_units(20, 'km')) nrow(pp) #> [1] 292 plot(nc$geometry) plot(pp$geom, col = 'red', pch = 19, cex = 0.6, add = TRUE) ``` ![](https://i.imgur.com/DsxMVWK.png) ``` r # Take mean values across six variables bench::mark({aw_interpolated = sf::st_interpolate_aw(nc[,9:14], ppb, extensive = FALSE)}, min_iterations = 5, min_time = 10) #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { aw_interpolated = sf::st_interpol… 379ms 380ms 2.60 30.2MB 23.4 ## merra2 merra_terra = terra::rast("/home/felix/Documents/climate/MERRA2_400.inst3_3d_asm_Np.20230627.nc4") # proxy = TRUE option consumes the very small amount of memory and returned dimension error in "PHIS" layer merra_stars = stars::read_stars("/home/felix/Documents/climate/MERRA2_400.inst3_3d_asm_Np.20230627.nc4", proxy = TRUE) #> EPV, H, O3, OMEGA, PHIS, PS, QI, QL, QV, RH, SLP, T, U, V, #> Error in attr(x, "dimensions")[[along]]: subscript out of bounds terra::crs(merra_terra) = "EPSG:4326" # sf::st_crs(merra_stars) = "EPSG:4326" # Extract at points pp_merra = sf::st_transform(pp, sf::st_crs(4326)) # pp_extract_ee = exactextractr::exact_extract(merra_terra, pp_merra) bench::mark({ pp_extract_tr = terra::extract(merra_terra, pp_merra) }, iterations = 10) #> Warning: Some expressions had a GC in every iteration; so filtering is #> disabled. #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { pp_extract_tr = terra::extract(me… 2.38s 2.61s 0.376 86.4MB 0.715 # Buffer extract pp_merra_buffer = sf::st_buffer(pp_merra, units::set_units(30, "km")) pp_merra_buffer_sv = terra::vect(pp_merra_buffer) merra_terra_clip = terra::crop(merra_terra, terra::ext(pp_merra_buffer_sv)) pp_merra_bg = terra::vect(ncbg20) bench::mark({ pp_extract_buffer_tr = terra::extract(merra_terra_clip, pp_merra_buffer_sv, fun = mean) }, iterations = 10) #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { pp_extract_buffer_tr = terra::ext… 277ms 318ms 3.21 29.8MB 0.804 bench::mark({ pp_extract_buffer_trw = terra::extract(merra_terra_clip, pp_merra_buffer_sv, fun = mean, weights = TRUE, exact = TRUE) }, iterations = 10) #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { pp_extract_buffer_trw = terra::ex… 740ms 842ms 1.21 29.8MB 1.21 bench::mark({ pp_extract_buffer_ee = exactextractr::exact_extract(merra_terra_clip, pp_merra_buffer, fun = "mean") }, iterations = 10) #> Warning: Some expressions had a GC in every iteration; so filtering is #> disabled. #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { pp_extract_buffer_ee = exactextra… 756ms 781ms 1.20 167MB 4.08 bench::mark({ pp_extract_buffer_eew = exactextractr::exact_extract(merra_terra_clip, pp_merra_buffer, fun = "mean", weights = "area") }, iterations = 10) #> Warning: Some expressions had a GC in every iteration; so filtering is #> disabled. #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { pp_extract_buffer_eew = exactextr… 796ms 821ms 1.14 167MB 4.23 # small area extract bench::mark({ pp_extract_blockgroup_tr = terra::extract(merra_terra_clip, pp_merra_bg, fun = mean) }, iterations = 10) #> Warning: Some expressions had a GC in every iteration; so filtering is #> disabled. #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { pp_extract_blockgroup_tr = terra:… 5.09s 5.21s 0.190 705MB 0.571 bench::mark({ pp_extract_blockgroup = terra::extract(merra_terra_clip, pp_merra_bg, fun = mean, weights = TRUE, exact = TRUE) }, iterations = 10) #> Warning: Some expressions had a GC in every iteration; so filtering is #> disabled. #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { pp_extract_blockgroup = terra::ex… 5.1s 5.29s 0.187 705MB 0.300 bench::mark({ pp_extract_blockgroup_ee = exactextractr::exact_extract(merra_terra_clip, ncbg20, fun = "mean") }, iterations = 10) #> Warning: Some expressions had a GC in every iteration; so filtering is #> disabled. #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { pp_extract_blockgroup_ee = exacte… 16.7s 17.7s 0.0559 3.56GB 1.52 bench::mark({ pp_extract_blockgroup_eew = exactextractr::exact_extract(merra_terra_clip, ncbg20, fun = "mean", weights = "area") }, iterations = 10) #> Warning in .local(x, y, ...): Polygons transformed to raster CRS (EPSG:4326) #> Warning: Some expressions had a GC in every iteration; so filtering is #> disabled. #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { pp_extract_blockgroup_eew = exact… 18.2s 20.7s 0.0490 3.56GB 1.25 # small area extract: 1000 block groups in NC nlcd21t = terra::rast("/home/felix/Downloads/nlcd_2021_land_cover_l48_20230630/nlcd_2021_land_cover_l48_20230630.img") ncbg20st = terra::vect(ncbg20) ncbg20st = terra::project(ncbg20st, terra::crs(nlcd21t)) ncbg20stb = ncbg20st[1:1000,] plot(ncbg20stb) ``` ![](https://i.imgur.com/ctldVul.png) ``` r # Crop nlcd to the extent of block groups nlcd21t_crop = terra::crop(nlcd21t, terra::ext(ncbg20stb)) #> |---------|---------|---------|---------|========================================= tic() ncbg_nlcd20_terra = terra::extract(nlcd21t_crop, ncbg20stb, fun = majority) toc() #> 113.501 sec elapsed ## nc road if (!file.exists("./testdata/tl_2020_37_prisecroads.zip")) { download.file("https://www2.census.gov/geo/tiger/TIGER2020/PRISECROADS/tl_2020_37_prisecroads.zip", "tl_2020_37_prisecroads.zip") unzip("tl_2020_37_prisecroads.zip", exdir = "./testdata") } nc_road = sf::st_read("./testdata/tl_2020_37_prisecroads.shp") %>% sf::st_transform(5070) #> Reading layer `tl_2020_37_prisecroads' from data source #> `/tmp/RtmpXEOs7f/reprex-507c437bca775-ash-crane/testdata/tl_2020_37_prisecroads.shp' #> using driver `ESRI Shapefile' #> Simple feature collection with 8020 features and 4 fields #> Geometry type: LINESTRING #> Dimension: XY #> Bounding box: xmin: -84.31763 ymin: 33.87275 xmax: -75.46554 ymax: 36.578 #> Geodetic CRS: NAD83 nrow(pp) #> [1] 292 nrow(nc_road) #> [1] 8020 # bench::mark({ # nc_road_nn = nngeo::st_nn(pp, nc_road, sparse = F) # }, min_time = 0.1, min_iterations = 5) bench::mark({ nc_road_nf = sf::st_nearest_feature(pp, nc_road) }, iterations = 10) #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { nc_road_nf = sf::st_nearest_featu… 168ms 172ms 5.67 15.3MB 0 bench::mark({ nc_road_nt = terra::nearest(vect(pp), vect(nc_road)) }, iterations = 10) #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { nc_road_nt = terra::nearest(vect(… 1.33s 1.53s 0.645 64.2MB 0.0717 pptv = vect(pp) nc_road_tv = vect(nc_road) bench::mark({ nc_road_nt = terra::nearest(pptv, nc_road_tv) }, iterations = 10) #> # A tibble: 1 × 6 #> expression min median `itr/sec` mem_alloc `gc/sec` #> #> 1 { nc_road_nt = terra::nearest(pptv,… 350ms 368ms 2.66 316KB 0 ``` Created on 2023-09-01 with [reprex v2.0.2](https://reprex.tidyverse.org)
sigmafelix commented 10 months ago

09/03/2023 update

Objective and description

Code

Click to unfold ```r if (!require(pacman)) { install.packages('pacman') library(pacman) } p_load(scomps, future.batchtools, sf, terra, tigris, spatstat.random, dplyr, tictoc) tigris::tigris_cache_dir("~/tigris_shps_cache") # note that the numbers are in kilometers. BUFFERS = c(5, 10, 20) NQUADSEGS = 90 # random point generation set.seed(2023) usstates20 = tigris::states(year = 2020) carolinas = sf::st_union(usstates20[which(usstates20$GEOID %in% c(37, 45)),]) |> sf::st_transform(5070) points_carolinas = sf::st_sample(carolinas, size = 1e4, type = "Thomas", kappa = 9.8e-10, mu = 50, scale = 3e4, exact = TRUE) grids_carolinas = scomps::sp_index_grid(points_carolinas, 8, 5) # assign CGRIDID to each point points_cuts = st_join(points_carolinas, grids_carolinas) make_cutlist = function(rastpath, feats, extrusion, uidfield = "CGRIDID") { crs_ref = terra::crs(terra::rast(rastpath)) failsafe = ifelse(st_is_longlat(feats), 0.01, 100) feats_conv = terra::project(terra::vect(feats), crs_ref) extlist = split(feats_conv, unlist(feats_conv[[uidfield]])) extlist2 = Map(\(x) terra::buffer(terra::convHull(x), extrusion * 1000, quadsegs = NQUADSEGS, capstyle = "square"), extlist) extlist3 = Map(\(x) terra::ext(x) + failsafe, extlist2) return(extlist3) } chunking = function(input, chunk_size) { nin = nrow(input) if (nin <= chunk_size) { indx_begin = 1 indx_end = nin } else { indx_begin = seq(0, (nin %/% chunk_size), 1) * chunk_size + 1 indx_end = seq(chunk_size, nin, chunk_size) chunk_last = nin %% chunk_size indx_end[length(indx_end) + 1] = ifelse(chunk_last != 0, max(indx_end) + chunk_last, NULL) } mat_indx = data.frame(indx_begin, indx_end) return(mat_indx) } # return extents mcuts = make_cutlist("/home/felix/Documents/NLCD/nlcd_2021_land_cover_l48_20230630.img", feats = grids_carolinas, BUFFERS[3]) # virtually define SpatRaster dataset with win argument in terra::rast nlcdcuts = lapply(mcuts, \(x) terra::rast(NLCDPATH, win = x)) points_cutslist = split(x = st_transform(points_cuts, crs(nlcdcuts[[1]])), f = points_cuts$CGRIDID) ## main test with 12 threads future::plan(multicore, workers = 12) tic() nlcd_extr_mean_12core = future.apply::future_mapply( \(ras, pnt) { ras = terra::crop(ras, terra::ext(ras)) pnts = terra::vect(pnt)[,2] buffsubs = terra::buffer(pnts, width = BUFFERS[3]*1000, quadsegs = NQUADSEGS) terra::extract(ras, buffsubs, fun = mean) }, nlcdcuts, points_cutslist, SIMPLIFY = FALSE, future.seed = TRUE) toc() #> 4645.785 sec elapsed ### - does 'chunking'--making small subsets--reduces processing time? ### we use 5 km radius for this test. ### 1. chunking by 100 tic() nlcd_extr_mean_12core_5k = future.apply::future_mapply( \(ras, pnt) { ras = terra::crop(ras, terra::ext(ras)) pnts = terra::vect(pnt)[,2] buffsubs = terra::buffer(pnts, width = BUFFERS[1]*1000, quadsegs = NQUADSEGS) gochunk = chunking(buffsubs, 100) gochunk = tapply(gochunk, seq(1, nrow(gochunk)), \(x) x) Map(\(q) terra::extract(ras, buffsubs[seq(q[,1], q[,2]),], fun = mean), gochunk) |> Reduce(f = rbind, x = _) }, nlcdcuts, points_cutslist, SIMPLIFY = FALSE, future.seed = TRUE) toc() #> 282.762 sec elapsed ### no chunking tic() nlcd_extr_mean_12core_5k_nochunk = future.apply::future_mapply( \(ras, pnt) { ras = terra::crop(ras, terra::ext(ras)) pnts = terra::vect(pnt)[,2] buffsubs = terra::buffer(pnts, width = BUFFERS[1]*1000, quadsegs = NQUADSEGS) # gochunk = chunking(buffsubs, 200) # gochunk = tapply(gochunk, seq(1, nrow(gochunk)), \(x) x) terra::extract(ras, buffsubs, fun = mean) }, nlcdcuts, points_cutslist, SIMPLIFY = FALSE, future.seed = TRUE) toc() #> 282.509 sec elapsed ```

Summary

sigmafelix commented 10 months ago

I will close this issue for the time being. A test on HPC will be addressed in another issue.