enram / vptstools

Python library to transfer and convert vertical profile time series data
https://enram.github.io/vptstools/
MIT License
3 stars 1 forks source link

Comparison of hdf5/daily/monthly files #35

Closed peterdesmet closed 1 year ago

peterdesmet commented 1 year ago

HDF5

library(dplyr)
library(readr)
library(bioRad)
files <- list.files("~/Downloads/bejab/aloft/hdf5", full.names = TRUE)
vp <- bioRad::read_vpfiles(files)
vpts <-
  bioRad::bind_into_vpts(vp) %>%
  as.data.frame(geo = TRUE, suntime = FALSE) %>%
  dplyr::arrange(datetime, height)
readr::write_csv(vpts, "vpts.csv") # This converts NaN to NA, I have manually set those back to NaN below
nrow(vpts)
# 35575
radar datetime ff dbz dens u v gap w n_dbz dd n DBZH height n_dbz_all eta sd_vvp n_all lat lon height_antenna
bejab 2023-02-02T00:00:00Z 0 NaN -2.578521966934204 18.094850540161133 NaN NaN 1 NaN 2237 NaN 227 25.863487243652344 8821 199.04335021972656 2.0527188777923584 436 51.191700000000004 3.0642000000000005 50
bejab 2023-02-02T00:00:00Z 0 NaN -3.3399300575256348 15.184979438781738 NaN NaN 1 NaN 1568 NaN 163 24.80197525024414 8828 167.03477478027344 2.5198092460632324 456 51.191700000000004 3.0642000000000005 50
bejab 2023-02-02T00:00:00Z 0 NaN -3.988534688949585 13.078372955322266 NaN NaN 1 NaN 2122 NaN 223 23.728849411010742 8862 143.8621063232422 2.4304895401000977 444 51.191700000000004 3.0642000000000005 50
bejab 2023-02-02T00:00:00Z 200 3.0408787727355957 -7.179384708404541 6.272905349731445 2.805347204208374 -1.1734440326690674 0 -21.234346389770508 19771 112.69898986816406 740 16.758867263793945 22850 69.00196075439453 2.9160335063934326 873 51.191700000000004 3.0642000000000005 50
bejab 2023-02-02T00:00:00Z 200 3.2996182441711426 -7.66164493560791 5.613616466522217 2.8031976222991943 -1.740564227104187 0 5.670511245727539 18076 121.8370132446289 568 18.90127182006836 22860 61.749778747558594 2.4480631351470947 866 51.191700000000004 3.0642000000000005 50

Daily

d1 <- readr::read_csv("~/Downloads/aloft/bejab/bejab_vpts_20230202.csv")
d2 <- readr::read_csv("~/Downloads/aloft/bejab/bejab_vpts_20230203.csv")
d3 <- readr::read_csv("~/Downloads/aloft/bejab/bejab_vpts_20230213.csv")
d4 <- readr::read_csv("~/Downloads/aloft/bejab/bejab_vpts_20230214.csv")
d5 <- readr::read_csv("~/Downloads/aloft/bejab/bejab_vpts_20230215.csv")
daily <- bind_rows(d1, d2, d3, d4, d5)
radar datetime height u v w ff dd sd_vvp gap eta dens dbz dbz_all n n_dbz n_all n_dbz_all rcs sd_vvp_threshold vcp radar_latitude radar_longitude radar_height radar_wavelength source_file
bejab 2023-02-02T00:00:00Z 0 NaN NaN NaN NaN NaN 2.0527188777923584 TRUE 199.04335021972656 18.094850540161133 -2.578521966934204 25.863487243652344 227 2237 436 8821 11.0 2.0 51.1917 3.0642 50 5.3 bejab_vp_20230202T000000Z_0x9.h5
bejab 2023-02-02T00:00:00Z 0 NaN NaN NaN NaN NaN 2.5198092460632324 TRUE 167.03477478027344 15.184979438781738 -3.3399300575256348 24.80197525024414 163 1568 456 8828 11.0 2.0 51.1917 3.0642 50 5.3 bejab_vp_20230202T000500Z_0x9.h5
bejab 2023-02-02T00:00:00Z 0 NaN NaN NaN NaN NaN 2.4304895401000977 TRUE 143.8621063232422 13.078372955322266 -3.988534688949585 23.728849411010742 223 2122 444 8862 11.0 2.0 51.1917 3.0642 50 5.3 bejab_vp_20230202T001000Z_0x9.h5
bejab 2023-02-02T00:00:00Z 200 2.805347204208374 -1.1734440326690674 -21.234346389770508 3.0408787727355957 112.69898986816406 2.9160335063934326 FALSE 69.00196075439453 6.272905349731445 -7.179384708404541 16.758867263793945 740 19771 873 22850 11.0 2.0 51.1917 3.0642 50 5.3 bejab_vp_20230202T000000Z_0x9.h5
bejab 2023-02-02T00:00:00Z 200 2.8031976222991943 -1.740564227104187 5.670511245727539 3.2996182441711426 121.8370132446289 2.4480631351470947 FALSE 61.749778747558594 5.613616466522217 -7.66164493560791 18.90127182006836 568 18076 866 22860 11.0 2.0 51.1917 3.0642 50 5.3 bejab_vp_20230202T000500Z_0x9.h5

Monthly

monthly <- readr::read_csv("~/Downloads/aloft/bejab_vpts_202302.csv.gz")
radar datetime height u v w ff dd sd_vvp gap eta dens dbz dbz_all n n_dbz n_all n_dbz_all rcs sd_vvp_threshold vcp radar_latitude radar_longitude radar_height radar_wavelength source_file
bejab 2023-02-02T00:00:00Z 0 2.0527188777923584 TRUE 199.04335021972656 18.094850540161133 -2.578521966934204 25.863487243652344 227 2237 436 8821 11.0 2.0 51.1917 3.0642 50 5.3 bejab_vp_20230202T000000Z_0x9.h5
bejab 2023-02-02T00:00:00Z 0 2.5198092460632324 TRUE 167.03477478027344 15.184979438781738 -3.3399300575256348 24.80197525024414 163 1568 456 8828 11.0 2.0 51.1917 3.0642 50 5.3 bejab_vp_20230202T000500Z_0x9.h5
bejab 2023-02-02T00:00:00Z 0 2.4304895401000977 TRUE 143.8621063232422 13.078372955322266 -3.988534688949585 23.728849411010742 223 2122 444 8862 11.0 2.0 51.1917 3.0642 50 5.3 bejab_vp_20230202T001000Z_0x9.h5
bejab 2023-02-02T00:00:00Z 200 2.805347204208374 -1.1734440326690674 -21.234346389770508 3.0408787727355957 112.69898986816406 2.9160335063934326 FALSE 69.00196075439453 6.272905349731445 -7.179384708404541 16.758867263793945 740 19771 873 22850 11.0 2.0 51.1917 3.0642 50 5.3 bejab_vp_20230202T000000Z_0x9.h5
bejab 2023-02-02T00:00:00Z 200 2.8031976222991943 -1.740564227104187 5.670511245727539 3.2996182441711426 121.8370132446289 2.4480631351470947 FALSE 61.749778747558594 5.613616466522217 -7.66164493560791 18.90127182006836 568 18076 866 22860 11.0 2.0 51.1917 3.0642 50 5.3 bejab_vp_20230202T000500Z_0x9.h5
testthat::expect_equal(daily, monthly)
peterdesmet commented 1 year ago
peterdesmet commented 1 year ago

FYI, I have been wondering if the monthly data product offers enough benefits over the daily one (i.e. why not download daily files via bioRad?). Comparing the current directories, the monthly one is 5 times smaller than the daily one (411.189.534 vs 2.216.069.061 bytes). This is mainly caused by gzipping the data.

I think the size difference significant enough to maintain this product: