Closed andreatitolo closed 4 years ago
Unfortunately, I've never been able to replicate this bug but I'm sure it happens in the last mile of ee_monitoring
. Could you run the following lines to help me to understand better why this error occurs?.
library(rgee)
ee_Initialize(drive = TRUE, gcs = FALSE)
ee_monitoring_test <- function(task, eeTaskList = FALSE, quiet = FALSE) {
if (missing(task)) {
task <- ee$batch$Task$list()[[1]]
}
while (task$active() & task$state != "CANCEL_REQUESTED") {
if (!quiet) {
cat(sprintf("Polling for task (id: %s).\n", task$id))
}
Sys.sleep(5)
}
if (!quiet) {
cat(sprintf("State: %s\n", task$status()$state))
}
message(class(task$status()$state))
message(class(task$id))
if (task$status()$state != "COMPLETED") {
message(
"ERROR in Earth Engine servers: ",
task$status()$error_message
)
stop("ee_monitoring was forced to stop before getting results")
}
}
geometry = ee$Geometry$Polygon(
coords = list(
c(45.90741759102099, 41.79760334740121),
c(45.90741759102099, 41.68411789893928),
c(46.05847960273974, 41.68411789893928),
c(46.05847960273974, 41.79760334740121)
)
)
maskcloud1 <- function(image) {
QA60 <- image$select(list("QA60"))
return(image$updateMask(QA60$lt(1)))
}
S2 <- ee$ImageCollection("COPERNICUS/S2")$
filterDate("2015-01-01", "2016-10-31")$
filter(ee$Filter$lte("CLOUDY_PIXEL_PERCENTAGE", 20))$
filterBounds(geometry)$
map(maskcloud1)$
select(c('B4', 'B3', 'B2'))
ExpColl <- S2 $sort("system:time_start", FALSE)$limit(5)
# # Loop to output each image
count <- ExpColl$size()$getInfo()
ExpColl_list <- ExpColl$toList(count)
index <- 1
image = ee$Image(ExpColl_list$get(index-1))
name = image$get('system:index')$getInfo()
print(name)
task <- ee$batch$Export$image$toDrive(
image,
name,
scale = 10,
maxPixels = 1e9,
folder = "rgee_backup",
region = geometry
)
task$start()
ee_monitoring_test()
Hi, thanks for answering, I ran the code you provided and I get the same error, but if I understood correctly, you were right about where it happens. This is the error:
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
Polling for task (id: UP6SLQAQFNNZZC34SPXAVYET).
State: COMPLETED
Error in py_get_attr_impl(x, name, silent) :
OverflowError: Python int too large to convert to C long
And the traceback:
8. stop(structure(list(message = "OverflowError: Python int too large to convert to C long",
call = py_get_attr_impl(x, name, silent), cppstack = NULL), class = c("Rcpp::exception", "C++Error", "error", "condition")))
6. py_get_attr(x, name)
5. py_get_attr_or_item(x, name, TRUE)
4. `$.python.builtin.object`(task, "status")
3. task$status
2. message(class(task$status()$state))
1. ee_monitoring_test()
@andreatitolo the traceback helps a lot!. Could you confirm that ee_monitoring_test
works perfectly now?
library(rgee)
ee_Initialize(drive = TRUE, gcs = FALSE)
ee_monitoring_test <- function(task, eeTaskList = FALSE, quiet = FALSE) {
if (missing(task)) {
task <- ee$batch$Task$list()[[1]]
}
while (task$active() & task$state != "CANCEL_REQUESTED") {
if (!quiet) {
cat(sprintf("Polling for task (id: %s).\n", task$id))
}
Sys.sleep(5)
}
task_state <- ee_utils_py_to_r(task$status()$state)
if (!quiet) {
cat(sprintf("State: %s\n", task_state))
}
if (task_state != "COMPLETED") {
message(
"ERROR in Earth Engine servers: ",
task$status()$error_message
)
stop("ee_monitoring was forced to stop before getting results")
}
}
geometry = ee$Geometry$Polygon(
coords = list(
c(45.90741759102099, 41.79760334740121),
c(45.90741759102099, 41.68411789893928),
c(46.05847960273974, 41.68411789893928),
c(46.05847960273974, 41.79760334740121)
)
)
maskcloud1 <- function(image) {
QA60 <- image$select(list("QA60"))
return(image$updateMask(QA60$lt(1)))
}
S2 <- ee$ImageCollection("COPERNICUS/S2")$
filterDate("2015-01-01", "2016-10-31")$
filter(ee$Filter$lte("CLOUDY_PIXEL_PERCENTAGE", 20))$
filterBounds(geometry)$
map(maskcloud1)$
select(c('B4', 'B3', 'B2'))
ExpColl <- S2 $sort("system:time_start", FALSE)$limit(5)
# # Loop to output each image
count <- ExpColl$size()$getInfo()
ExpColl_list <- ExpColl$toList(count)
index <- 1
image = ee$Image(ExpColl_list$get(index-1))
name = image$get('system:index')$getInfo()
print(name)
task <- ee$batch$Export$image$toDrive(
image,
name,
scale = 10,
maxPixels = 1e9,
folder = "rgee_backup",
region = geometry
)
task$start()
ee_monitoring_test()
Please attached me the next information
task$status()
reticulate::py_run_string("import sys\nprint(sys.maxsize)")
sessionInfo()
@csaybar thanks! I can confirm that ee_monitoring_test()
now works perfectly and the image is exported correctly. I have also tested it quickly on the for
loop I posted at the beginning, and I can confirm it works with that too, thanks a lot for your help!
Polling for task (id: PW2BUPR4PSYN2VUTAVQ2Q3FW).
Polling for task (id: PW2BUPR4PSYN2VUTAVQ2Q3FW).
Polling for task (id: PW2BUPR4PSYN2VUTAVQ2Q3FW).
Polling for task (id: PW2BUPR4PSYN2VUTAVQ2Q3FW).
Polling for task (id: PW2BUPR4PSYN2VUTAVQ2Q3FW).
Polling for task (id: PW2BUPR4PSYN2VUTAVQ2Q3FW).
Polling for task (id: PW2BUPR4PSYN2VUTAVQ2Q3FW).
Polling for task (id: PW2BUPR4PSYN2VUTAVQ2Q3FW).
State: COMPLETED
The information you requested:
task$status()
$state
[1] "COMPLETED"
$description
[1] "20160914T074612_20160914T114000_T38TNM"
$creation_timestamp_ms
[1] -1
$update_timestamp_ms
[1] -1
$start_timestamp_ms
[1] -1
$task_type
[1] "EXPORT_IMAGE"
$destination_uris
[1] "https://drive.google.com/#folders/1moEbFRH7Wuti-EFm3OVMfZnKguz-VODi"
$id
[1] "PW2BUPR4PSYN2VUTAVQ2Q3FW"
$name
[1] "projects/earthengine-legacy/operations/PW2BUPR4PSYN2VUTAVQ2Q3FW"
reticulate::py_run_string("import sys\nprint(sys.maxsize)")
9223372036854775807
sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19041)
Matrix products: default
locale:
[1] LC_COLLATE=English_United Kingdom.1252 LC_CTYPE=English_United Kingdom.1252
[3] LC_MONETARY=English_United Kingdom.1252 LC_NUMERIC=C
[5] LC_TIME=English_United Kingdom.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] rgee_1.0.0
loaded via a namespace (and not attached):
[1] Rcpp_1.0.5 rstudioapi_0.11 magrittr_1.5 getPass_0.2-2 tidyselect_1.1.0 lattice_0.20-41
[7] R6_2.4.1 rlang_0.4.7 fansi_0.4.1 httr_1.4.1 dplyr_1.0.0 tools_3.6.3
[13] grid_3.6.3 cli_2.0.2 askpass_1.1 ellipsis_0.3.1 openssl_1.4.2 assertthat_0.2.1
[19] gargle_0.5.0 tibble_3.0.3 lifecycle_0.2.0 crayon_1.3.4 Matrix_1.2-18 purrr_0.3.4
[25] fs_1.4.2 vctrs_0.3.1 curl_4.3 glue_1.4.1 compiler_3.6.3 pillar_1.4.6
[31] generics_0.0.2 googledrive_1.0.1 reticulate_1.16 jsonlite_1.7.0 pkgconfig_2.0.3
Finally, I understand this bug a little better. Apparently, it happens by the presence of large numeric dates (See Considerations.Rmd). With rgee
you can export directly ImageCollections from Earth Engine to your Local env. Please can you reinstall rgee
(I update the ee_monitoring
function), run as follow and tell us if an error occurs? Big thanks!.
library(rgee)
devtools::install_github("r-spatial/rgee")
ee_Initialize(email = "titoloandrea@gmail.com", drive = TRUE, gcs = FALSE)
geometry = ee$Geometry$Polygon(
coords = list(
c(45.90741759102099, 41.79760334740121),
c(45.90741759102099, 41.68411789893928),
c(46.05847960273974, 41.68411789893928),
c(46.05847960273974, 41.79760334740121)
)
)
maskcloud1 <- function(image) {
QA60 <- image$select(list("QA60"))
return(image$updateMask(QA60$lt(1)))
}
S2 <- ee$ImageCollection("COPERNICUS/S2")$
filterDate("2015-01-01", "2016-10-31")$
filter(ee$Filter$lte("CLOUDY_PIXEL_PERCENTAGE", 20))$
filterBounds(geometry)$
map(maskcloud1)$
select(c('B4', 'B3', 'B2'))
ExpColl <- S2$sort("system:time_start", FALSE)$limit(5)
ee_imagecollection_to_local(
ic = ExpColl,
region = geometry,
via = "drive",
scale = 10,
maxPixels = 1e9
)
I see, no problem, I followed your instructions and ran the code above, no errors at all:
I might have to say that I tried to save a collection locally before today's update, and the export worked with no error.
------------------------------------- Downloading ImageCollection - via drive --- region parameters
WKT : POLYGON ((45.90742 41.68412, 46.05848 41.68412, 46.05848 41.7976, 45.90742 41.7976, 45.90742 41.68412))
CRS : 4326
geodesic : TRUE
evenOdd : TRUE
Downloading: 20160914T074612_20160914T114000_T38TNM.tifAuto-refreshing stale OAuth token.
Downloading: 20160914T074612_20160914T075524_T38TNM.tif
Downloading: 20160825T074612_20160825T113505_T38TNM.tif
Downloading: 20160825T074612_20160825T075350_T38TNM.tif
Downloading: 20160805T074612_20160805T114309_T38TNM.tif
--------------------------------------------------------------------------------[1] "20160914T074612_20160914T114000_T38TNM.tif" "20160914T074612_20160914T075524_T38TNM.tif"
[3] "20160825T074612_20160825T113505_T38TNM.tif" "20160825T074612_20160825T075350_T38TNM.tif"
[5] "20160805T074612_20160805T114309_T38TNM.tif"
@andreatitolo have you tried the same ImageCollection?. This strange bug only occurs in some system and apparently when creation_timestamp_ms
, update_timestamp_ms
, or start_timestamp_ms
surpass a threshold of 32 bit (although I'm not really sure), anaconda users have to see this problem more often https://github.com/rstudio/reticulate/issues/154. I'm going to close this issue but if you continue having problems we will really happy to help with this!
At submit an issue, please attached the following information of your
rgee
session:[X] You have the Python API installed (from terminal):
[X] You can find the credentials file on your system:
[X] You can run a simple EE command from R:
Attach your Python (reticulate) configuration:
Earthengine api, python and credentionals
Description
Hi Cesar, thank you so much for this package, I just discovered it recently, and is truly amazing. I am running into an issue: I was trying to export an image collection using
ee$batch$Export$image$toDrive
following the example provided in the export_ImageCollection.R example, using Method 01. If, as per the example, I useee_monitoring()
without any variable inside, I get the "Python int too large to convert to C long" error and even if the task is stated as "COMPLETED", it exports only one image instead of all the images in the collection.However, if I use
ee_monitoring(quiet = TRUE)
or if i remove ee_monitoring() entirely, the images are all exported correctly, as expected. I can do everything with rgee correctly and the installation seems fine to me, am I doing something wrong or this an expected behaviour?This issue might be similar to #47, however, if in that case it was negligible as the task succeeded anyway, my task is said to be completed even if it exported only one image.
What I Did
The result is:
with
ee_monitoring(quiet = TRUE)
or when removing entirely ee_monitoring() the result is:and all the images are exported correctly.