tidymodels / spatialsample

Create and summarize spatial resampling objects 🗺
https://spatialsample.tidymodels.org
Other
71 stars 5 forks source link

`spatial_block_cv()` duplicates observations on boundaries #150

Closed mikemahoney218 closed 11 months ago

mikemahoney218 commented 11 months ago

By way of: https://stackoverflow.com/a/77399241/9625040

spatial_block_cv() duplicates observations on fold boundaries:

drought <- structure(list(x = c(995494.2549, 995924.2549, 996354.2549, 996784.2549, 
                                997214.2549, 997644.2549, 998074.2549, 998504.2549, 998934.2549, 
                                999364.2549, 999794.2549, 1000224.2549, 1000654.2549, 1001084.2549, 
                                1001514.2549, 1001944.2549, 1002374.2549, 1002804.2549, 1003234.2549, 
                                1003664.2549, 1004094.2549, 1004524.2549, 1004954.2549, 1005384.2549, 
                                1005814.2549, 1006244.2549, 1006674.2549, 1007104.2549, 1007534.2549, 
                                1007964.2549, 1008394.2549, 1008824.2549, 1009254.2549, 1009684.2549, 
                                1010114.2549, 1010544.2549, 1010974.2549, 1011404.2549, 1011834.2549, 
                                1012264.2549, 1012694.2549, 1013124.2549, 1013554.2549, 1013984.2549, 
                                1014414.2549, 1014844.2549, 1015274.2549, 1015704.2549, 1016134.2549, 
                                1016564.2549, 1016994.2549, 1017424.2549, 1017854.2549, 1018284.2549, 
                                1018714.2549, 995494.2549, 995924.2549, 996354.2549, 996784.2549, 
                                997214.2549), y = c(1019851.5842, 1019851.5842, 1019851.5842, 
                                                    1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 
                                                    1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 
                                                    1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 
                                                    1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 
                                                    1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 
                                                    1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 
                                                    1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 
                                                    1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 
                                                    1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 
                                                    1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 1019851.5842, 
                                                    1019851.5842, 1019851.5842, 1019421.5842, 1019421.5842, 1019421.5842, 
                                                    1019421.5842, 1019421.5842), ntl = c(9.14866638183594, 15.3856477737427, 
                                                                                         16.3302040100098, 12.454291343689, 10.4823837280273, 11.394606590271, 
                                                                                         8.1963529586792, 4.50725030899048, 3.95374751091003, 5.73203563690186, 
                                                                                         14.3955335617065, 17.0745468139648, 14.2944135665894, 10.333722114563, 
                                                                                         9.80743503570557, 12.5352020263672, 19.8813304901123, 29.2410221099854, 
                                                                                         32.8321876525879, 29.575023651123, 28.5894374847412, 36.4911346435547, 
                                                                                         49.4252128601074, 61.3118171691895, 58.6104736328125, 43.0437355041504, 
                                                                                         28.096061706543, 34.8003845214844, 32.1936340332031, 28.3407783508301, 
                                                                                         22.5178966522217, 17.0638084411621, 20.7549228668213, 18.3547439575195, 
                                                                                         10.2983675003052, 7.3524694442749, 7.17788362503052, 7.06999540328979, 
                                                                                         8.03957176208496, 12.6783542633057, 18.7537479400635, 26.1656856536865, 
                                                                                         36.539493560791, 41.0569839477539, 25.5366401672363, 15.7820110321045, 
                                                                                         9.87918758392334, 7.65169858932495, 6.96318626403809, 8.69833087921143, 
                                                                                         12.1393032073975, 15.151198387146, 14.5944147109985, 9.46016979217529, 
                                                                                         4.53868055343628, 12.8388118743896, 21.1265335083008, 19.3046970367432, 
                                                                                         10.5719947814941, 8.08844661712646), pop = c(31.2753772735596, 
                                                                                                                                      55.8289375305176, 56.4003105163574, 33.795223236084, 31.0511913299561, 
                                                                                                                                      30.5730743408203, 13.667106628418, 7.08161020278931, 6.89333772659302, 
                                                                                                                                      13.9001550674438, 35.5272178649902, 42.4625587463379, 32.9688529968262, 
                                                                                                                                      21.4302787780762, 12.6151924133301, 17.4939270019531, 38.1474113464355, 
                                                                                                                                      60.8120536804199, 65.3665008544922, 53.8765907287598, 46.2705993652344, 
                                                                                                                                      61.42333984375, 70.8307113647461, 53.3152236938477, 31.4083557128906, 
                                                                                                                                      24.9810562133789, 38.3716621398926, 56.114860534668, 67.1656036376953, 
                                                                                                                                      60.8404235839844, 33.7796592712402, 29.8311328887939, 44.3309173583984, 
                                                                                                                                      31.9606342315674, 16.7053775787354, 10.1427822113037, 11.4020376205444, 
                                                                                                                                      10.7794933319092, 18.2773151397705, 34.2912216186523, 50.6655197143555, 
                                                                                                                                      52.1081962585449, 53.0502471923828, 59.4989013671875, 48.5897750854492, 
                                                                                                                                      41.188159942627, 27.0699615478516, 11.5318984985352, 9.09538650512695, 
                                                                                                                                      14.2379903793335, 24.8153190612793, 29.3468627929688, 30.5861835479736, 
                                                                                                                                      15.3130531311035, 9.47307205200195, 37.2332077026367, 94.2268676757812, 
                                                                                                                                      73.2485733032227, 26.8748569488525, 26.8519401550293), agbh = c(0.124395661056042, 
                                                                                                                                                                                                      0.543155550956726, 0.930405616760254, 0.176615670323372, 0.122252210974693, 
                                                                                                                                                                                                      1.86410081386566, 0.201039269566536, 0.00215102708898485, 0.00524011626839638, 
                                                                                                                                                                                                      0.0221506990492344, 1.75632297992706, 0.954743504524231, 0.373224049806595, 
                                                                                                                                                                                                      0.0127956680953503, 0.0007417316082865, 0.0123716788366437, 0.279229581356049, 
                                                                                                                                                                                                      2.30779552459717, 2.58910322189331, 1.23243260383606, 0.819948613643646, 
                                                                                                                                                                                                      1.74025285243988, 4.03071403503418, 2.78268098831177, 2.00978517532349, 
                                                                                                                                                                                                      0.700970351696014, 0.196071043610573, 2.19463133811951, 4.83159875869751, 
                                                                                                                                                                                                      2.20620393753052, 0.321354597806931, 0.00308413081802428, 1.737912774086, 
                                                                                                                                                                                                      0.468539208173752, 0.0156131321564317, 0.00116395147051662, 0.0145542966201901, 
                                                                                                                                                                                                      0.000892410753294826, 0.0419198162853718, 2.84171080589294, 3.22121715545654, 
                                                                                                                                                                                                      2.73401832580566, 2.47091150283813, 2.10038590431213, 1.15651941299438, 
                                                                                                                                                                                                      0.490403175354004, 0.0419915802776814, 0.101970501244068, 0.00181114906445146, 
                                                                                                                                                                                                      0.0132269319146872, 0.212756171822548, 0.111757233738899, 1.2169703245163, 
                                                                                                                                                                                                      0.129767879843712, 0, 0.582266986370087, 2.96843385696411, 1.16728830337524, 
                                                                                                                                                                                                      0.0494964420795441, 0.0664984136819839), nir = c(0.261590600013733, 
                                                                                                                                                                                                                                                       0.250058531761169, 0.238313049077988, 0.246726274490356, 0.241509333252907, 
                                                                                                                                                                                                                                                       0.215491861104965, 0.25552836060524, 0.26755028963089, 0.283316373825073, 
                                                                                                                                                                                                                                                       0.2645283639431, 0.2347122579813, 0.250579416751862, 0.272739976644516, 
                                                                                                                                                                                                                                                       0.26601967215538, 0.260071456432343, 0.283827364444733, 0.270996034145355, 
                                                                                                                                                                                                                                                       0.229571804404259, 0.228905484080315, 0.240774929523468, 0.22843000292778, 
                                                                                                                                                                                                                                                       0.201068416237831, 0.174168020486832, 0.187955036759377, 0.235188364982605, 
                                                                                                                                                                                                                                                       0.226306527853012, 0.197943985462189, 0.192345812916756, 0.18694880604744, 
                                                                                                                                                                                                                                                       0.203041225671768, 0.24348683655262, 0.264572501182556, 0.234625786542892, 
                                                                                                                                                                                                                                                       0.252681404352188, 0.252072751522064, 0.241365790367126, 0.228045880794525, 
                                                                                                                                                                                                                                                       0.252986639738083, 0.261032313108444, 0.233464851975441, 0.235829710960388, 
                                                                                                                                                                                                                                                       0.235184907913208, 0.212146639823914, 0.204127430915833, 0.216947212815285, 
                                                                                                                                                                                                                                                       0.225598230957985, 0.231632620096207, 0.224976778030396, 0.219116434454918, 
                                                                                                                                                                                                                                                       0.255260914564133, 0.241265594959259, 0.237798929214478, 0.241482153534889, 
                                                                                                                                                                                                                                                       0.240964710712433, 0.252938002347946, 0.258243441581726, 0.211435839533806, 
                                                                                                                                                                                                                                                       0.217503502964973, 0.237074509263039, 0.237700119614601)), row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                60L), class = "data.frame")

library(tidymodels)
library(spatialsample)
library(sf)
#> Linking to GEOS 3.11.1, GDAL 3.6.4, PROJ 9.1.1; sf_use_s2() is TRUE
proj_ref_sys <- "EPSG:7760"
drought_sf <- st_as_sf(drought, coords = c("x", "y"),  crs = proj_ref_sys)

set.seed(123)
# default: 61 observations in assessment
folds <- spatial_block_cv(drought_sf, v = 3)
vapply(
  seq_len(nrow(folds)), 
  function(i) nrow(assessment(get_rsplit(folds, i))),
  numeric(1)
) |> 
  sum()
#> [1] 61

set.seed(123)
# With a tiny offset: 60 observations as we'd expect
folds <- spatial_block_cv(
  drought_sf, 
  v = 3,
  # This is the change: move our grid by 1 meter
  offset = st_bbox(drought_sf)[c("xmin", "ymin")] - 1
)
vapply(
  seq_len(nrow(folds)), 
  function(i) nrow(assessment(get_rsplit(folds, i))),
  numeric(1)
) |> 
  sum()
#> [1] 60

Created on 2023-10-31 with reprex v2.0.2

Fast fix: if there are more assessment observations across all folds than expected, throw an error and recommend an offset.

Potential fix: do that, but also try automatically building a grid based on a very slightly buffered bounding box first

mikemahoney218 commented 11 months ago

note for future Mike: generate_folds_from_blocks() should probably be the place to address this, as you need to inspect the number of observations in assessment before running buffers and radii

github-actions[bot] commented 11 months ago

This issue has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex: https://reprex.tidyverse.org) and link to this issue.