r-spatial / rgee

Google Earth Engine for R
https://r-spatial.github.io/rgee/
Other
692 stars 148 forks source link

ee_extract() function error #367

Closed sooyoung1021 closed 4 months ago

sooyoung1021 commented 4 months ago

At submit an issue, please attached the following information of your rgee session:

library(rgee)

# Initialize the Earth Engine module.
ee_Initialize()
ee$Initialize(project='ee-sooyoung1021') # works

# Print metadata for a DEM dataset.
print(ee$Image('USGS/SRTMGL1_003')$getInfo())

$type [1] "Image"

$bands $bands[[1]] $bands[[1]]$id [1] "elevation"

$bands[[1]]$data_type $bands[[1]]$data_type$type [1] "PixelType"

$bands[[1]]$data_type$precision [1] "int"

$bands[[1]]$data_type$min [1] -32768

$bands[[1]]$data_type$max [1] 32767

$bands[[1]]$dimensions [1] 1296001 417601

$bands[[1]]$crs [1] "EPSG:4326"

$bands[[1]]$crs_transform $bands[[1]]$crs_transform[[1]] [1] 0.0002777778

$bands[[1]]$crs_transform[[2]] [1] 0

$bands[[1]]$crs_transform[[3]] [1] -180.0001

$bands[[1]]$crs_transform[[4]] [1] 0

$bands[[1]]$crs_transform[[5]] [1] -0.0002777778

$bands[[1]]$crs_transform[[6]] [1] 60.00014

$version [1] -1

$id [1] "USGS/SRTMGL1_003"

$properties $properties$system:visualization_0_min [1] "0.0"

$properties$type_name [1] "Image"

$properties$keywords [1] "dem" "elevation" "geophysical" "nasa" "srtm" "topography" "usgs"

$properties$thumb [1] "https://mw1.google.com/ges/dd/images/SRTM90_V4_thumb.png"

$properties$description [1] "

The Shuttle Radar Topography Mission (SRTM, see <a href=\"https://onlinelibrary.wiley.com/doi/10.1029/2005RG000183/full\">Farr\net al. 2007)\ndigital elevation data is an international research effort that\nobtained digital elevation models on a near-global scale. This\nSRTM V3 product (SRTM Plus) is provided by NASA JPL\nat a resolution of 1 arc-second (approximately 30m).

This dataset has undergone a void-filling process using open-source data\n(ASTER GDEM2, GMTED2010, and NED), as opposed to other versions that\ncontain voids or have been void-filled with commercial sources.\nFor more information on the different versions see the\n<a href=\"https://lpdaac.usgs.gov/documents/13/SRTM_Quick_Guide.pdf\">SRTM Quick Guide.

Documentation:

Provider: <a href=\"https://cmr.earthdata.nasa.gov/search/concepts/C1000000240-LPDAAC_ECS.html\">NASA / USGS / JPL-Caltech

Bands<table class=\"eecat\"><th scope=\"col\">Name<th scope=\"col\">Descriptionelevation

Elevation

Terms of Use

Unless otherwise noted, images and video on JPL public\nweb sites (public sites ending with a jpl.nasa.gov address) may\nbe used for any purpose without prior permission. For more information\nand exceptions visit the <a href=\"https://www.jpl.nasa.gov/imagepolicy/\">JPL Image Use Policy site.

Suggested citation(s)

"

$properties$source_tags [1] "nasa" "usgs"

$properties$visualization_0_max [1] "6000.0"

$properties$title [1] "NASA SRTM Digital Elevation 30m"

$properties$product_tags [1] "srtm" "elevation" "topography" "dem" "geophysical"

$properties$provider [1] "NASA / USGS / JPL-Caltech"

$properties$visualization_0_min [1] "0.0"

$properties$visualization_0_name [1] "Elevation"

$properties$date_range [1] -1 -1

$properties$system:visualization_0_gamma [1] "1.6"

$properties$period [1] 0

$properties$system:visualization_0_bands [1] "elevation"

$properties$provider_url [1] "https://cmr.earthdata.nasa.gov/search/concepts/C1000000240-LPDAAC_ECS.html"

$properties$visualization_0_gamma [1] "1.6"

$properties$sample [1] "https://mw1.google.com/ges/dd/images/SRTM90_V4_sample.png"

$properties$tags [1] "dem" "elevation" "geophysical" "nasa" "srtm" "topography" "usgs"

$properties$system:visualization_0_max [1] "6000.0"

$properties$system:visualization_0_name [1] "Elevation"

$properties$system:asset_size [1] -1

$properties$visualization_0_bands [1] "elevation" Attach your Python (reticulate) configuration:

library(reticulate)
py_config()

python: C:/Users/SooyoungKim/AppData/Local/r-miniconda/envs/rgee/python.exe libpython: C:/Users/SooyoungKim/AppData/Local/r-miniconda/envs/rgee/python38.dll pythonhome: C:/Users/SooyoungKim/AppData/Local/r-miniconda/envs/rgee version: 3.8.19 | packaged by conda-forge | (default, Mar 20 2024, 12:38:07) [MSC v.1929 64 bit (AMD64)] Architecture: 64bit numpy: C:/Users/SooyoungKim/AppData/Local/r-miniconda/envs/rgee/Lib/site-packages/numpy numpy_version: 1.24.4 ee: C:\Users\SOOYOU~1\AppData\Local\R-MINI~1\envs\rgee\lib\site-packages\ee__init__.p

NOTE: Python version was forced by RETICULATE_PYTHON

Description

I am trying to extract daily climate variables per administrative area ("ADM2_EN") in the shapefile from ERA5_LAND/DAILY_AGGR image collection between May1 - 30, 2024. The following code used to work perfectly in the past (like until a month ago?) but now it produces an error. It seems to do with the lines within the function ee_extract() that uses create_triplets() on the Image collections. I can't get my head around why this happened. HelP!

What I Did

start <- "2024-05-01"
end <- "2024-05-30"

# Load the ERA5 image collection
era5 <- ee$ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR')$filterDate(start, end)
#print(era5)

# Select the variables of interest
era5_dat <- era5 %>%   # data range
  ee$ImageCollection$map(function(x) x$select(c('temperature_2m_min', 
                                                'temperature_2m_max', 
                                                'temperature_2m',
                                                'total_precipitation_sum'))) %>% 
  ee$ImageCollection$toBands()  # from ImageCollection to Image

#bandNames <- era5_dat$bandNames()
#cat("Band names: ",paste(bandNames$getInfo(),collapse=",")) 

# Load nigeria shapefile as sf
nigeria <- read_sf(dsn = here::here("nga_adm_osgof_20190417", 
                                    "nga_admbnda_adm2_osgof_20190417.shp"))
names(nigeria)
plot(nigeria)

crs(nigeria) # WGS 84

# Extract the time series from Image Collection usign the shapefile
ee_ng_era <- ee_extract(x=era5_dat,
                        y=nigeria["ADM2_EN"],
                        sf = F)
Error in ee$Image$reduceRegions(image = img, collection = ee_y, reducer = fun, : RuntimeError: unused argument (image = img) Run `reticulate::py_last_error()` for details.
![Screenshot 2024-07-16 133022](https://github.com/user-attachments/assets/1759c076-93b4-4618-8e84-3675b95b8ac6) Where I think the error happened (within ee_extract()) ```` create_tripplets <- function(img) { img_reduce_regions <- ee$Image$reduceRegions(image = img, collection = ee_y, reducer = fun, scale = scale, ...) ee$FeatureCollection$map(img_reduce_regions, function(f) { ee$Feature$set(f, "imageId", ee$Image$get(img, "system:index")) }) } triplets <- x_ic %>% ee$ImageCollection$map(create_tripplets) %>% ee$ImageCollection$flatten() ```
ambarja commented 4 months ago

@sooyoung1021 please here I have edited some of your script 👇

library(rgee)
library(sf)
ee_Initialize()

# Defined date of extracting data
start <- "2024-05-01"
end <- "2024-05-30"

# Load the ERA5 image collection
variables <- c(
  'temperature_2m_min',
  'temperature_2m_max',
  'temperature_2m',
  'total_precipitation_sum'
  )

# Define the dataset with variables
era5 <- ee$ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR')$
  filterDate(start, end)$
  select(variables)

# Load nigeria shapefile as sf (https://data.humdata.org/dataset/nigeria-admin-level-2)
nigeria <- read_sf("C:/Users/USER/Downloads/nigeria_admin_level_2/Nigeria_Admin_Level_2.shp")

# Extract the time series from Image Collection usign the shapefile
ee_ng_era <- ee_extract(
  x=era5,
  y=nigeria["StateCode"],
  sf = F)

image

sooyoung1021 commented 4 months ago

Same error still happens. Any idea why? :/ Screenshot 2024-07-18 105619

sooyoung1021 commented 4 months ago

I asked my other colleague to try and same error happens for him too.

ambarja commented 4 months ago

I asked my other colleague to try and same error happens for him too.

@sooyoung1021 Please, You can share some script reproducible with example data?

sooyoung1021 commented 4 months ago

I copy-pasted your script and downloaded the shapefile you used in your example and ran the code. That's when the error got produced.

ambarja commented 4 months ago

I'm closing this issue because there is no clear reproducible example.

sooyoung1021 commented 4 months ago

Well I think it's clearly a bug that happens to some people and since it doesn't happen on your end, I don't know how else to provide examples or proof. If you'd like to see the live demo of it happening, would be more than happy to connect virtually and show you.

see24 commented 1 day ago

I can also reproduce this with a slightly modified version of @ambarja's code to download the boundary data programmatically.

library(rgee)
library(sf)
ee_Initialize()

# Defined date of extracting data
start <- "2024-05-01"
end <- "2024-05-30"

# Load the ERA5 image collection
variables <- c(
  'temperature_2m_min',
  'temperature_2m_max',
  'temperature_2m',
  'total_precipitation_sum'
)

# Define the dataset with variables
era5 <- ee$ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR')$
  filterDate(start, end)$
  select(variables)

nigeria <- geodata::gadm("NGA", resolution = 2, path = ".")

nigeria <- st_as_sf(nigeria)

# Extract the time series from Image Collection usign the shapefile
ee_ng_era <- ee_extract(
  x=era5,
  y=nigeria,
  sf = F)

I am also having the same issue in my own separate example. @sooyoung1021 has provided detailed information on their installation above. Just because it works for you doesn't mean there is no reproducible example. Please let us know what further information you would need to reproduce this issue.

see24 commented 1 day ago

@sooyoung1021 I have created a work around by overwriting ee_extract() in my global environment with this version. All I did was replace

    img_reduce_regions <- ee$Image$reduceRegions(image = img, 
      collection = ee_y, reducer = fun, scale = scale, 
      ...)

with

    img_reduce_regions <- img$reduceRegions(collection = ee_y, reducer = fun, scale = scale,  ...)

which seems to work for the example shown above. I have no idea how this package works in general though so I am not sure if this is a reasonable change.

The full function is: Note I also had to add rgee::: in several spots where internal functions are used.

ee_extract <- function (x, y, fun = ee$Reducer$mean(), scale = NULL, sf = FALSE, 
                        via = "getInfo", container = "rgee_backup", lazy = FALSE, 
                        quiet = FALSE, ...) 
{
  rgee:::ee_check_packages("ee_extract", c("geojsonio", "sf"))
  if (!quiet & is.null(scale)) {
    scale <- 1000
    message(sprintf("The image scale is set to %s.", scale))
  }
  if (!any(class(x) %in% rgee:::ee_get_spatial_objects("i+ic"))) {
    stop("x is neither an ee$Image nor ee$ImageCollection")
  }
  if (any(class(x) %in% "ee.imagecollection.ImageCollection")) {
    x <- ee$ImageCollection$toBands(x)
  }
  oauth_func_path <- system.file("python/ee_extract.py", package = "rgee")
  extract_py <- rgee:::ee_source_python(oauth_func_path)
  sp_objects <- rgee:::ee_get_spatial_objects("Table")
  if (!any(class(y) %in% c("sf", "sfc", sp_objects))) {
    stop("y is not a sf, sfc, ee$Geometry, ee$Feature or ee$FeatureCollection object.")
  }
  if (any("sf" %in% class(y))) {
    sf_y <- y
    ee_y <- sf_as_ee(y[[attr(y, "sf_column")]], quiet = TRUE)
  }
  else if (any("sfc" %in% class(y))) {
    sf_y <- sf::st_sf(id = seq_along(y), geometry = y)
    ee_y <- sf_as_ee(y, quiet = TRUE)
  }
  else if (any(ee_get_spatial_objects("Table") %in% class(y))) {
    ee_y <- ee$FeatureCollection(y)
    sf_y <- tryCatch(expr = ee_as_sf(y, quiet = FALSE, maxFeatures = 10000), 
                     error = function(e) {
                       stop("The ee$FeatureCollection (y) must be not higher than 10 000.")
                     })
  }
  ee_add_rows <- function(f) {
    f_prop <- ee$Feature$get(f, "system:index")
    ee$Feature(ee$Feature$set(f, "ee_ID", f_prop))
  }
  ee_y <- ee$FeatureCollection(ee_y) %>% ee$FeatureCollection$map(ee_add_rows)
  fun_name <- gsub("Reducer.", "", (ee$Reducer$getInfo(fun))[["type"]])
  x_ic <- rgee:::bands_to_image_collection(x)
  create_tripplets <- function(img) {
    img_reduce_regions <- img$reduceRegions(collection = ee_y, reducer = fun, scale = scale, 
                                                 ...)
    ee$FeatureCollection$map(img_reduce_regions, function(f) {
      ee$Feature$set(f, "imageId", ee$Image$get(img, "system:index"))
    })
  }
  triplets <- x_ic %>% ee$ImageCollection$map(create_tripplets) %>% 
    ee$ImageCollection$flatten()
  table <- extract_py$table_format(triplets, "ee_ID", "imageId", 
                                   fun_name)$map(function(feature) {
                                     ee$Feature$setGeometry(feature, NULL)
                                   })
  if (via == "drive") {
    table_id <- basename(tempfile("rgee_file_"))
    ee_user <- ee_exist_credentials()
    dsn <- sprintf("%s/%s.csv", tempdir(), table_id)
    table_task <- ee_init_task_drive_fc(x_fc = table, dsn = dsn, 
                                        container = container, table_id = table_id, ee_user = ee_user, 
                                        selectors = NULL, timePrefix = TRUE, quiet = quiet)
    if (lazy) {
      prev_plan <- future::plan(future::sequential, .skip = TRUE)
      on.exit(future::plan(prev_plan, .skip = TRUE), add = TRUE)
      future::future({
        ee_extract_to_lazy_exp_drive(table_task, dsn, 
                                     quiet, sf, sf_y)
      }, lazy = TRUE)
    }
    else {
      ee_extract_to_lazy_exp_drive(table_task, dsn, quiet, 
                                   sf, sf_y)
    }
  }
  else if (via == "gcs") {
    table_id <- basename(tempfile("rgee_file_"))
    ee_user <- ee_exist_credentials()
    dsn <- sprintf("%s/%s.csv", tempdir(), table_id)
    table_task <- ee_init_task_gcs_fc(x_fc = table, dsn = dsn, 
                                      container = container, table_id = table_id, ee_user = ee_user, 
                                      selectors = NULL, timePrefix = TRUE, quiet = quiet)
    if (lazy) {
      prev_plan <- future::plan(future::sequential, .skip = TRUE)
      on.exit(future::plan(prev_plan, .skip = TRUE), add = TRUE)
      future::future({
        ee_extract_to_lazy_exp_gcs(table_task, dsn, 
                                   quiet, sf, sf_y)
      }, lazy = TRUE)
    }
    else {
      ee_extract_to_lazy_exp_gcs(table_task, dsn, quiet, 
                                 sf, sf_y)
    }
  }
  else {
    table_geojson <- table %>% ee$FeatureCollection$getInfo() %>% 
      ee_utils_py_to_r()
    class(table_geojson) <- "geo_list"
    table_sf <- geojsonio::geojson_sf(table_geojson)
    sf::st_geometry(table_sf) <- NULL
    table_sf <- table_sf[, order(names(table_sf))]
    table_sf["id"] <- NULL
    table_sf["ee_ID"] <- NULL
    if (isTRUE(sf)) {
      table_geometry <- sf::st_geometry(sf_y)
      table_sf <- sf_y %>% sf::st_drop_geometry() %>% 
        cbind(table_sf) %>% sf::st_sf(geometry = table_geometry)
    }
    else {
      table_sf <- sf_y %>% sf::st_drop_geometry() %>% 
        cbind(table_sf)
    }
    table_sf
  }
}