appelmar / gdalcubes_cpp

Earth observation data cubes from GDAL image collections
MIT License
74 stars 7 forks source link

Maintain date format in cube-view attributes when using monthly aggregation #41

Closed carostolle closed 2 years ago

carostolle commented 2 years ago

Hi! We produced a set of pipeable functions out of the gdalcubes image acquisition workflow. In this we generate the STAC query from the cube-view object. Recently we used monthly aggregation for the first time and discovered that t0 and t1 of the cube-view get shortened to only include year and month. This causes the successive STAC search to crash as it requires the format YYYY-mm-dd (see error message).

Therefore our question: Is this shortening necessary? Or could the time period be kept as it is specified by the user like e.g. when using “P30D”?

Thanks!

#reprex

ext = list(4403205, 2961338, 4403379, 2961503)
names(ext) = c("xmin", "ymin", "xmax", "ymax")

## output list
cube_opts = list(
  "dx" = 10
  , "dy" = 10
  , "dt" = "P1M"
  , "aggregation" = "first"
  # , "resampling" = resampling
  , "period" = c("2021-01-01", "2021-08-15")
)

## define cube view
cv_args = list(
  srs = "EPSG:3035"
  , dx = cube_opts$dx
  , dy = cube_opts$dy
  , dt = cube_opts$dt
  , aggregation = cube_opts$aggregation
  # , resampling = cube_opts$resampling
  , extent = list(
    t0 = as.character(cube_opts$period[1])
    , t1 = as.character(cube_opts$period[2])
    , left = ext[["xmin"]]
    , right = ext[["xmax"]]
    , top = ext[["ymax"]]
    , bottom = ext[["ymin"]]
  )
)

cv = do.call(gdalcubes::cube_view, args = cv_args)

str(cv)
#> List of 4
#>  $ space      :List of 9
#>   ..$ right : num 4403382
#>   ..$ left  : num 4403202
#>   ..$ top   : num 2961506
#>   ..$ bottom: num 2961336
#>   ..$ nx    : num 18
#>   ..$ ny    : num 17
#>   ..$ srs   : chr "EPSG:3035"
#>   ..$ dx    : num 10
#>   ..$ dy    : num 10
#>  $ time       :List of 4
#>   ..$ t0: chr "2021-01"
#>   ..$ t1: chr "2021-08"
#>   ..$ dt: chr "P1M"
#>   ..$ nt: num 8
#>  $ aggregation: chr "first"
#>  $ resampling : chr "near"
#>  - attr(*, "class")= chr [1:2] "cube_view" "list"
## create stac query from cube view

bbox = sf::st_bbox(
  c(
    xmin = cv$space$left
    , xmax = cv$space$right
    , ymax = cv$space$top
    , ymin = cv$space$bottom
  )
  , crs = sf::st_crs(cv$space$srs)
)
bbox = sf::st_as_sfc(bbox)
bbox = sf::st_transform(bbox, crs = 4326)
bbox = sf::st_bbox(bbox, crs = 4326)

stacURL = "https://earth-search.aws.element84.com/v0"
collections = "sentinel-s2-l2a-cogs"

query = rstac::stac(base_url = stacURL)
query = rstac::stac_search(
  q = query,
  collections = collections
  , bbox = as.numeric(bbox)
  , datetime = paste0(cv$time$t0, "/", cv$time$t1)
  , limit = 500)
#> Error: The date time provided not follow the RFC 3339 format,please check the RFC 3339 rules.
# items = rstac::post_request(q = query)

Created on 2022-03-29 by the reprex package (v2.0.1)

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.1.3 (2022-03-10) #> os Ubuntu 20.04.4 LTS #> system x86_64, linux-gnu #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz Europe/Berlin #> date 2022-03-29 #> pandoc 2.11.4 @ /usr/lib/rstudio/bin/pandoc/ (via rmarkdown) #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date (UTC) lib source #> assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.1.0) #> class 7.3-20 2022-01-13 [4] CRAN (R 4.1.2) #> classInt 0.4-3 2020-04-07 [2] CRAN (R 4.1.0) #> cli 3.2.0 2022-02-14 [1] CRAN (R 4.1.2) #> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.1.3) #> DBI 1.1.2 2021-12-20 [1] CRAN (R 4.1.2) #> digest 0.6.29 2021-12-01 [1] CRAN (R 4.1.2) #> dplyr 1.0.7 2021-06-18 [2] CRAN (R 4.1.0) #> e1071 1.7-9 2021-09-16 [1] CRAN (R 4.1.2) #> ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.1.0) #> evaluate 0.15 2022-02-18 [1] CRAN (R 4.1.2) #> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.1.3) #> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.1.1) #> fs 1.5.2 2021-12-08 [1] CRAN (R 4.1.2) #> gdalcubes 0.6.1 2022-03-23 [1] CRAN (R 4.1.3) #> generics 0.1.2 2022-01-31 [1] CRAN (R 4.1.2) #> glue 1.6.2 2022-02-24 [1] CRAN (R 4.1.3) #> highr 0.9 2021-04-16 [2] CRAN (R 4.1.0) #> htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.1.1) #> httr 1.4.2 2020-07-20 [2] CRAN (R 4.1.0) #> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.1.3) #> KernSmooth 2.23-20 2021-05-03 [4] CRAN (R 4.0.5) #> knitr 1.38 2022-03-25 [1] CRAN (R 4.1.3) #> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.1.1) #> magrittr 2.0.2 2022-01-26 [1] CRAN (R 4.1.2) #> ncdf4 1.19 2021-12-15 [1] CRAN (R 4.1.2) #> pillar 1.7.0 2022-02-01 [1] CRAN (R 4.1.2) #> pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.1.0) #> proxy 0.4-26 2021-06-07 [2] CRAN (R 4.1.0) #> purrr 0.3.4 2020-04-17 [2] CRAN (R 4.1.0) #> R6 2.5.1 2021-08-19 [2] CRAN (R 4.1.1) #> Rcpp 1.0.8.3 2022-03-17 [1] CRAN (R 4.1.3) #> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.1.2) #> rlang 1.0.2 2022-03-04 [1] CRAN (R 4.1.3) #> rmarkdown 2.13 2022-03-10 [1] CRAN (R 4.1.3) #> rstac 0.9.1-5 2021-10-31 [1] CRAN (R 4.1.1) #> rstudioapi 0.13 2020-11-12 [2] CRAN (R 4.1.0) #> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.1.2) #> sf 1.0-7 2022-03-07 [1] CRAN (R 4.1.3) #> stringi 1.7.6 2021-11-29 [1] CRAN (R 4.1.2) #> stringr 1.4.0 2019-02-10 [2] CRAN (R 4.1.0) #> tibble 3.1.6 2021-11-07 [1] CRAN (R 4.1.2) #> tidyselect 1.1.1 2021-04-30 [2] CRAN (R 4.1.0) #> units 0.8-0 2022-02-05 [1] CRAN (R 4.1.2) #> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.1.2) #> vctrs 0.3.8 2021-04-29 [2] CRAN (R 4.1.0) #> withr 2.5.0 2022-03-03 [1] CRAN (R 4.1.3) #> xfun 0.30 2022-03-02 [1] CRAN (R 4.1.3) #> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.1.2) #> #> [1] /home/carola/R/x86_64-pc-linux-gnu-library/4.1 #> [2] /usr/local/lib/R/site-library #> [3] /usr/lib/R/site-library #> [4] /usr/lib/R/library #> #> ────────────────────────────────────────────────────────────────────────────── ```
appelmar commented 2 years ago

Thanks for the reprex, very useful. I think the question is in general what to do if t0 / t1 and dt have different datetime units. In the example, the cube view would currently be extended to include the entire August (until "2021-08-31"), because the data cube must have equally sized pixels.

However, I agree that the current implementation is problematic because dropping days (and months for yearly aggregation) leads to incompatibilities e.g. with rstac (though it seems to be ISO 8601 compliant).

My suggestion would be to extend the time axis as in the current implementation, but simply return YYYY-mm-dd strings even for monthly or yearly aggregated cubes. In the example, the cube view function would then return t0 = "2021-01-01" and t1 = "2021-08-31". Does this make sense to you?

carostolle commented 2 years ago

Thanks for the quick reply! Yes, your suggestion makes perfect sense, the extension of the time axis was expected and is not an issue at all for our use cases.

appelmar commented 2 years ago

This should work now (with https://github.com/appelmar/gdalcubes_R/commit/703e947eb27d1ebab3f2d9fa8e9fe04785659be5):

ext = list(4403205, 2961338, 4403379, 2961503)
names(ext) = c("xmin", "ymin", "xmax", "ymax")

## output list
cube_opts = list(
  "dx" = 10
  , "dy" = 10
  , "dt" = "P1M"
  , "aggregation" = "first"
  # , "resampling" = resampling
  , "period" = c("2021-01-01", "2021-08-15")
)

## define cube view
cv_args = list(
  srs = "EPSG:3035"
  , dx = cube_opts$dx
  , dy = cube_opts$dy
  , dt = cube_opts$dt
  , aggregation = cube_opts$aggregation
  # , resampling = cube_opts$resampling
  , extent = list(
    t0 = as.character(cube_opts$period[1])
    , t1 = as.character(cube_opts$period[2])
    , left = ext[["xmin"]]
    , right = ext[["xmax"]]
    , top = ext[["ymax"]]
    , bottom = ext[["ymin"]]
  )
)

cv = do.call(gdalcubes::cube_view, args = cv_args)
str(cv)
#> List of 4
#>  $ space      :List of 9
#>   ..$ right : num 4403382
#>   ..$ left  : num 4403202
#>   ..$ top   : num 2961506
#>   ..$ bottom: num 2961336
#>   ..$ nx    : num 18
#>   ..$ ny    : num 17
#>   ..$ srs   : chr "EPSG:3035"
#>   ..$ dx    : num 10
#>   ..$ dy    : num 10
#>  $ time       :List of 4
#>   ..$ t0: chr "2021-01-01"
#>   ..$ t1: chr "2021-08-31"
#>   ..$ dt: chr "P1M"
#>   ..$ nt: num 8
#>  $ aggregation: chr "first"
#>  $ resampling : chr "near"
#>  - attr(*, "class")= chr [1:2] "cube_view" "list"

bbox = sf::st_bbox(
  c(
    xmin = cv$space$left
    , xmax = cv$space$right
    , ymax = cv$space$top
    , ymin = cv$space$bottom
  )
  , crs = sf::st_crs(cv$space$srs)
)
bbox = sf::st_as_sfc(bbox)
bbox = sf::st_transform(bbox, crs = 4326)
bbox = sf::st_bbox(bbox, crs = 4326)

stacURL = "https://earth-search.aws.element84.com/v0"
collections = "sentinel-s2-l2a-cogs"

query = rstac::stac(base_url = stacURL)
query = rstac::stac_search(
  q = query,
  collections = collections
  , bbox = as.numeric(bbox)
  , datetime = paste0(cv$time$t0, "/", cv$time$t1)
  , limit = 500)

Created on 2022-03-30 by the reprex package (v2.0.1)

appelmar commented 2 years ago

I'll close the issue for now, feel free to re-open if anything does not work as expected.