hypertidy / ncmeta

Tidy NetCDF metadata
https://hypertidy.github.io/ncmeta/
11 stars 5 forks source link

sources without recognizable variables give bad attribute tables #29

Closed mdsumner closed 5 years ago

mdsumner commented 5 years ago

Compare

f <- c("/rdsi/PUBLIC/raad/data/oceandata.sci.gsfc.nasa.gov/MODISA/L3BIN/2012/044/A2012044.L3b_DAY_RRS.nc" , "/rdsi/PUBLIC/raad/data/podaac-ftp.jpl.nasa.gov/allData/smap/L3/RSS/V3/8day_running/SCI/40KM/2015/185/RSS_smap_SSS_L3_8day_running_40km_2015_189_FNL_v03.0.nc"  )

ncmeta::nc_meta(f[1])$attribute
# A tibble: 1 x 6
     id name      type          ndims natts dim_coord
  <dbl> <chr>     <chr>         <dbl> <dbl> <lgl>    
1    -1 NC_GLOBAL NA_character_    NA    48 FALSE    
Warning message:
In nc_atts_internal(x, inq$ngatts, vars) : no variables recognizable
> ncmeta::nc_meta(f[2])$attribute
# A tibble: 124 x 3
   attribute     variable value    
   <chr>         <chr>    <list>   
 1 standard_name lon      <chr [1]>
 2 axis          lon      <chr [1]>
 3 long_name     lon      <chr [1]>
 4 units         lon      <chr [1]>
 5 valid_min     lon      <dbl [1]>
 6 valid_max     lon      <dbl [1]>
 7 standard_name lat      <chr [1]>
 8 axis          lat      <chr [1]>
 9 long_name     lat      <chr [1]>
10 units         lat      <chr [1]>
# … with 114 more rows

It should probably include the ID, but at least make sure that dummy NC_GLOBAL thing is consistent:

> ncmeta::nc_atts(f[2])
# A tibble: 124 x 4
      id name          variable value    
   <dbl> <chr>         <chr>    <list>   
 1     0 standard_name lon      <chr [1]>
 2     1 axis          lon      <chr [1]>
 3     2 long_name     lon      <chr [1]>
 4     3 units         lon      <chr [1]>
 5     4 valid_min     lon      <dbl [1]>
 6     5 valid_max     lon      <dbl [1]>
 7     0 standard_name lat      <chr [1]>
 8     1 axis          lat      <chr [1]>
 9     2 long_name     lat      <chr [1]>
10     3 units         lat      <chr [1]>
# … with 114 more rows
mdsumner commented 5 years ago

Saved this set of meta objects into many_meta.rds

f <- raadfiles::get_raad_filenames() %>% dplyr::filter(stringr::str_detect(file, "nc$")) %>% 
  group_by((dirname(dirname(file)))) %>% slice(1) %>% ungroup() %>% sample_frac(1) %>% transmute(fullname = file.path(root, file))
dim(f)
f <- f[!grepl("RRS.nc$",basename(f$fullname)), ]
dim(f)
safefun <- purrr::safely(function(x) ncmeta::nc_meta(x))
system.time(a <- furrr::future_map(f$fullname, safefun))
#get all attribute tables
##d <- purrr::map_dfr(a, ~if (is.null(.x$error)) .x$result$attribute else NULL, .id = "ch")
mdsumner commented 5 years ago

Another problem is $attribute sometimes 0-rows (becomes NULL in tidync)

ncmeta::nc_atts("~/Git/rasterwise/extdata/high-dim/test-1.nc")
# A tibble: 0 x 0
mdsumner commented 5 years ago

Now fixed:

nc_atts(f[1])
# A tibble: 48 x 4
      id name               variable  value    
   <dbl> <chr>              <chr>     <list>   
 1    -1 product_name       NC_GLOBAL <chr [1]>
 2    -1 title              NC_GLOBAL <chr [1]>
 3    -1 instrument         NC_GLOBAL <chr [1]>
 4    -1 platform           NC_GLOBAL <chr [1]>
 5    -1 temporal_range     NC_GLOBAL <chr [1]>
 6    -1 start_orbit_number NC_GLOBAL <dbl [1]>
 7    -1 end_orbit_number   NC_GLOBAL <dbl [1]>
 8    -1 date_created       NC_GLOBAL <chr [1]>
 9    -1 processing_version NC_GLOBAL <chr [1]>
10    -1 history            NC_GLOBAL <chr [1]>
# … with 38 more rows
Warning message:
In nc_atts.NetCDF(nc, variable = variable) : no variables recognizable
> nc_atts(f[2])
# A tibble: 124 x 4
      id name          variable value    
   <dbl> <chr>         <chr>    <list>   
 1     0 standard_name lon      <chr [1]>

 ncmeta::nc_atts("~/Git/rasterwise/extdata/high-dim/test-1.nc")
# A tibble: 0 x 4
# … with 4 variables: id <dbl>, name <chr>, variable <chr>, value <list>