r-spatial / sf

Simple Features for R
https://r-spatial.github.io/sf/
Other
1.35k stars 299 forks source link

Aggregate.sf and aggregate.stars fails #2375

Closed alexyshr closed 7 months ago

alexyshr commented 7 months ago

In the code below, if I use the line:

xy2 <- expand.grid(x = seq(0.5, 9.5, 2), y = seq(9.5, 0.5, -2))

instead of

xy2 <- expand.grid(x = seq(0.5, 12.5, 3), y = seq(12.5, 0.5, -3))

the aggregate with two sf objects (points, polygons), and the aggregate with two stars objects (pointsstars, xy2stars) works without errors (the errors below disappear). Nonetheless, the aggregate with stars that is working is not so useful because the resulting dimension packages all the 3 values of the personalized function.

Is the issue related to polygons without intersecting points? or is the problem related to returning multiple values in aggregate?

Thank you!

P.S. I just installed sf from GitHub.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(sf)
#> Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE
library(stars)
#> Loading required package: abind
library(ggplot2)

##################################
###create points
#x and y sequences with names
x <- seq(from = 0, to = 10, by = 1)
y <- seq(from = 10, to = 0, by = -1)
xy <- expand.grid(x = x, y = y)
xy$u <- xy$x + xy$y^2

#randomly select 20% of the u values and set them to NA
#using ifelse, runif and dplyr
the_condition <- runif(nrow(xy)) < 0.2
xy <- xy %>%
  dplyr::mutate(u = ifelse(the_condition, NA, u))

#convert the dataframe xy to sf
points <- sf::st_as_sf(xy, coords = c("x", "y"), crs = 4326)

##################################
###create polygons
xy2 <- expand.grid(x = seq(0.5, 12.5, 3), y = seq(12.5, 0.5, -3))
####
####cCHANGE PREVIOUS LINE FOR NEXT ONE
####
#xy2 <- expand.grid(x = seq(0.5, 9.5, 2), y = seq(9.5, 0.5, -2))
xy2 <- dplyr::mutate(xy2, id = seq_len(nrow(xy2)), u2 = x + y^2)

#convert the dataframe xy to stars
#coming without crs
xy2stars <- stars::st_as_stars(xy2, dims = c("x", "y"),
                               crs = st_crs(4326),
                               coords = c("x", "y"))
st_crs(xy2stars) <- 4326
polygons <- sf::st_as_sf(xy2stars, as_points = FALSE,
                         crs = st_crs(4326))

ggplot() +
  geom_sf(data = points, aes(fill = u)) +
  geom_sf(data = polygons, fill = NA, color = "red") +
  geom_sf_text(data = polygons, aes(label = id), size = 3) +
  labs(title = "", fill = "U")
#> Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
#> give correct results for longitude/latitude data


#aggregate returning one value works fine
stats::aggregate(points[, "u"],
  st_geometry(polygons),
  FUN = function(x) sum(x, na.rm = TRUE)
)
#> Simple feature collection with 25 features and 1 field
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -1 ymin: -1 xmax: 14 ymax: 14
#> Geodetic CRS:  WGS 84
#> First 10 features:
#>      u                       geometry
#> 1   NA POLYGON ((-1 14, 2 14, 2 11...
#> 2   NA POLYGON ((2 14, 5 14, 5 11,...
#> 3   NA POLYGON ((5 14, 8 14, 8 11,...
#> 4   NA POLYGON ((8 14, 11 14, 11 1...
#> 5   NA POLYGON ((11 14, 14 14, 14 ...
#> 6  248 POLYGON ((-1 11, 2 11, 2 8,...
#> 7  627 POLYGON ((2 11, 5 11, 5 8, ...
#> 8  494 POLYGON ((5 11, 8 11, 8 8, ...
#> 9  289 POLYGON ((8 11, 11 11, 11 8...
#> 10  NA POLYGON ((11 11, 14 11, 14 ...

#ERROR
#aggregate returning multiple values does not work
stats::aggregate(points[, "u"],
  st_geometry(polygons),
  FUN = function(x) {
    c("sum" = sum(x, na.rm = TRUE),
      "min" = min(x, na.rm = TRUE))
  }
)
#> Error in xj[i, , drop = FALSE]: (subscript) logical subscript too long

ggplot() +
  geom_stars(data = xy2stars["u2"], fill = "blue", alpha = 0.2) +
  geom_sf(data = polygons, fill = NA, color = "red") +
  geom_sf(data = points, aes(fill = u)) +
  geom_sf_text(data = polygons, aes(label = id), size = 3) +
  labs(title = "", fill = "U")
#> Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
#> give correct results for longitude/latitude data

#convert points to stars
pointsstars <- stars::st_as_stars(points)
#aggregte with stars always works
#but the results in packed in one dimension
xystarsaggn <- aggregate(pointsstars, xy2stars,
  FUN = function(x) {
    c(
      "sum" = sum(x, na.rm = TRUE),
      "min" = min(x, na.rm = TRUE),
      "max" = max(x, na.rm = TRUE)
    )
  }
)
#> Error in aggregate.stars(pointsstars, xy2stars, FUN = function(x) {: unexpected array size: does FUN return a consistent number of values?

xystarsaggn
#> Error in eval(expr, envir, enclos): object 'xystarsaggn' not found

Created on 2024-04-06 with reprex v2.0.2

Session info ``` r sessioninfo::session_info() #> - Session info --------------------------------------------------------------- #> setting value #> version R version 4.3.3 (2024-02-29 ucrt) #> os Windows 10 x64 (build 19045) #> system x86_64, mingw32 #> ui RTerm #> language (EN) #> collate English_United States.utf8 #> ctype English_United States.utf8 #> tz America/New_York #> date 2024-04-06 #> pandoc 2.17.1.1 @ C:/Users/500596~1/AppData/Local/Pandoc/ (via rmarkdown) #> #> - Packages ------------------------------------------------------------------- #> package * version date (UTC) lib source #> abind * 1.4-5 2016-07-21 [1] CRAN (R 4.3.0) #> class 7.3-22 2023-05-03 [2] CRAN (R 4.3.3) #> classInt 0.4-10 2023-09-05 [1] CRAN (R 4.3.1) #> cli 3.6.2 2023-12-11 [2] CRAN (R 4.3.3) #> colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.1) #> curl 5.2.1 2024-03-01 [2] CRAN (R 4.3.3) #> DBI 1.2.2 2024-02-16 [1] CRAN (R 4.3.3) #> digest 0.6.33 2023-07-07 [1] CRAN (R 4.3.1) #> dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.3.3) #> e1071 1.7-14 2023-12-06 [2] CRAN (R 4.3.3) #> evaluate 0.21 2023-05-05 [1] CRAN (R 4.3.1) #> fansi 1.0.6 2023-12-08 [1] CRAN (R 4.3.3) #> farver 2.1.1 2022-07-06 [1] CRAN (R 4.3.1) #> fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.1) #> fs 1.6.3 2023-07-20 [1] CRAN (R 4.3.1) #> generics 0.1.3 2022-07-05 [1] CRAN (R 4.3.1) #> ggplot2 * 3.4.3 2023-08-14 [1] CRAN (R 4.3.1) #> glue 1.7.0 2024-01-09 [2] CRAN (R 4.3.3) #> gtable 0.3.4 2023-08-21 [1] CRAN (R 4.3.1) #> htmltools 0.5.6 2023-08-10 [1] CRAN (R 4.3.1) #> KernSmooth 2.23-22 2023-07-10 [1] CRAN (R 4.3.1) #> knitr 1.44 2023-09-11 [1] CRAN (R 4.3.1) #> labeling 0.4.3 2023-08-29 [1] CRAN (R 4.3.1) #> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.3.3) #> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.1) #> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.3.1) #> pillar 1.9.0 2023-03-22 [1] CRAN (R 4.3.1) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.1) #> proxy 0.4-27 2022-06-09 [1] CRAN (R 4.3.1) #> purrr 1.0.2 2023-08-10 [1] CRAN (R 4.3.1) #> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.3.2) #> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.3.1) #> R.oo 1.26.0 2024-01-24 [1] CRAN (R 4.3.2) #> R.utils 2.12.3 2023-11-18 [1] CRAN (R 4.3.2) #> R6 2.5.1 2021-08-19 [1] CRAN (R 4.3.1) #> Rcpp 1.0.12 2024-01-09 [1] CRAN (R 4.3.3) #> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.3.1) #> rlang 1.1.3 2024-01-10 [2] CRAN (R 4.3.3) #> rmarkdown 2.25 2023-09-18 [1] CRAN (R 4.3.1) #> s2 1.1.6 2023-12-19 [1] CRAN (R 4.3.3) #> scales 1.2.1 2022-08-20 [1] CRAN (R 4.3.1) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.1) #> sf * 1.0-17 2024-04-06 [1] Github (r-spatial/sf@2867dd2) #> stars * 0.6-5 2024-03-26 [1] Github (r-spatial/stars@ec9e76f) #> styler 1.10.2 2023-08-29 [1] CRAN (R 4.3.2) #> tibble 3.2.1 2023-03-20 [1] CRAN (R 4.3.1) #> tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.3.3) #> units 0.8-5 2023-11-28 [2] CRAN (R 4.3.3) #> utf8 1.2.4 2023-10-22 [1] CRAN (R 4.3.3) #> vctrs 0.6.5 2023-12-01 [2] CRAN (R 4.3.3) #> withr 3.0.0 2024-01-16 [1] CRAN (R 4.3.3) #> wk 0.9.1 2023-11-29 [1] CRAN (R 4.3.2) #> xfun 0.40 2023-08-09 [1] CRAN (R 4.3.1) #> xml2 1.3.5 2023-07-06 [1] CRAN (R 4.3.1) #> yaml 2.3.7 2023-01-23 [1] CRAN (R 4.3.0) #> #> [1] C:/Users/500596972/AppData/Local/R/win-library/4.3 #> [2] C:/Program Files/R/R-4.3.3/library #> #> ------------------------------------------------------------------------------ ```
edzer commented 7 months ago

Thanks! I simplified the last part to

f  = function(x) {
    c(
      "sum" = sum(x, na.rm = TRUE),
      "min" = min(x, na.rm = TRUE),
      "max" = max(x, na.rm = TRUE)
    )
  }
xystarsaggn <- aggregate(pointsstars, xy2stars, FUN = f)
xystarsaggn
# stars object with 2 dimensions and 1 attribute
# attribute(s):
#    Min. 1st Qu. Median     Mean 3rd Qu. Max. NA's
# u     0    17.5   61.5 110.1042  104.75  651   27
# dimension(s):
#          from to refsys point
# f           1  3     NA    NA
# geometry    1 25 WGS 84 FALSE
#                                                                 values
# f                                                        sum, min, max
# geometry POLYGON ((-1 14, 2 14, 2 ...,...,POLYGON ((11 2, 14 2, 14 ...
plot(aperm(xystarsaggn))

x

alexyshr commented 7 months ago

It is just amazing!

Thanks Dr. Edzer!