HARPgroup / model_meteorology

0 stars 0 forks source link

Raster Data Inventory and Analysis #50

Open rburghol opened 3 weeks ago

rburghol commented 3 weeks ago

Concepts

Get a simple (fast) inventory

\set varkey 'nldas2_obs_hourly'
\set band '1'
select extract(year from to_timestamp(met.tsendtime)) as year,
  min(to_timestamp(met.tsendtime)) as start_date,
  max(to_timestamp(met.tsendtime)) as end_date,
  count(*)
from (
  select met.tsendtime,
    (ST_SummaryStatsAgg(met.rast, :band, TRUE)).mean as precip_in
  from dh_feature as mcov
  left outer join dh_variabledefinition as v
  on (
    v.varkey = :'varkey'
  )
  left outer join dh_timeseries_weather as met
  on (
    mcov.hydroid = met.featureid and met.varid = v.hydroid
    and met.entity_type = 'dh_feature'
  )
  where mcov.hydrocode = 'cbp6_met_coverage'
  and met.rast is not null
  group by met.tsendtime
) as met
group by extract(year from to_timestamp(met.tsendtime))
order by extract(year from to_timestamp(met.tsendtime));

Get Number of Bands in rasters

\set varkey 'nldas2_obs_hourly'
SELECT tid, to_timestamp(tsendtime), ST_NumBands(rast) As numbands 
from dh_feature as mcov
left outer join dh_variabledefinition as v
on (
  v.varkey = :'varkey'
)
left outer join dh_timeseries_weather as met
on (
  mcov.hydroid = met.featureid and met.varid = v.hydroid
  and met.entity_type = 'dh_feature'
)
where mcov.hydrocode = 'cbp6_met_coverage'
and extract(year from to_timestamp(met.tsendtime)) = 2022
;

Get Inventory and average value of available data.

-- \set varkey 'prism_mod_daily'
-- \set varkey 'prism_mod_daily'
\set varkey 'nldas2_obs_hourly'
select extract(year from to_timestamp(met.tsendtime)) as year,
  min(to_timestamp(met.tsendtime)) as start_date,
  max(to_timestamp(met.tsendtime)) as end_date,
  count(*) as num_records,
  round(0.0393701 * sum(precip_in)::numeric,1) as precip_in
from (
  select met.tstime, met.tsendtime,
    (ST_SummaryStatsAgg(met.rast, :band, TRUE)).mean as precip_in
  from dh_feature as mcov
  left outer join dh_variabledefinition as v
  on (
    v.varkey = :'varkey'
  )
  left outer join dh_timeseries_weather as met
  on (
    mcov.hydroid = met.featureid and met.varid = v.hydroid
    and met.entity_type = 'dh_feature'
  )
  where mcov.hydrocode = 'cbp6_met_coverage'
  group by met.tstime, met.tsendtime
) as met
group by extract(year from to_timestamp(met.tsendtime))
order by extract(year from to_timestamp(met.tsendtime));

Summary over coverage

\set varkey 'nldas2_obs_hourly'
\set hydrocode  'usgs_ws_01634000'
\set band '1'
select extract(year from to_timestamp(met.tsendtime)) as year,
  min(to_timestamp(met.tsendtime)) as start_date,
  max(to_timestamp(met.tsendtime)) as end_date,
  count(*) as num_records,
  round(sum(precip_in)::numeric,1) as precip_in
from (
  select met.featureid, met.tstime,
  met.tsendtime,
  (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_mm,
  0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_in
from dh_feature as f
left outer join field_data_dh_geofield as fgeo
on (
  fgeo.entity_id = f.hydroid
  and fgeo.entity_type = 'dh_feature'
)
left outer join dh_variabledefinition as v
on (
  v.varkey = :'varkey'
)
left outer join dh_feature as mcov 
on (
  mcov.hydrocode = 'cbp6_met_coverage'
)
left outer join dh_timeseries_weather as met
on (
  mcov.hydroid = met.featureid and met.varid = v.hydroid
)
where f.hydrocode = :'hydrocode'
order by met.tsendtime
) as met
group by extract(year from to_timestamp(met.tsendtime))
order by extract(year from to_timestamp(met.tsendtime));

Daily Records for a Specified Period of Time over a coverage

\set varkey 'nldas2_obs_hourly'
\set hydrocode  'usgs_ws_01634000'
\set band '1'

select met.featureid, to_timestamp(met.tsendtime) as obs_date,
  extract(month from to_timestamp(met.tsendtime)) as mo,
  (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_mm,
  0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_in
from dh_feature as f
left outer join field_data_dh_geofield as fgeo
on (
  fgeo.entity_id = f.hydroid
  and fgeo.entity_type = 'dh_feature'
)
left outer join dh_variabledefinition as v
on (
  v.varkey = :'varkey'
)
left outer join dh_feature as mcov
on (
  mcov.hydrocode = 'cbp6_met_coverage'
)
left outer join dh_timeseries_weather as met
on (
  mcov.hydroid = met.featureid and met.varid = v.hydroid
)
where f.hydrocode = :'hydrocode' and met.tsendtime >= extract(epoch from '2021-01-01'::date) and met.tsendtime <= extract(epoch from '2021-12-31'::date)

All Daily Records over coverage

\set varkey 'nldas2_obs_hourly'
\set hydrocode  'usgs_ws_01634000'
\set band '1'

select met.featureid, to_timestamp(met.tsendtime) as obs_date,
  extract(month from to_timestamp(met.tsendtime)) as mo,
  (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_mm,
  0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_in
from dh_feature as f
left outer join field_data_dh_geofield as fgeo
on (
  fgeo.entity_id = f.hydroid
  and fgeo.entity_type = 'dh_feature'
)
left outer join dh_variabledefinition as v
on (
  v.varkey = :'varkey'
)
left outer join dh_feature as mcov 
on (
  mcov.hydrocode = 'cbp6_met_coverage'
)
left outer join dh_timeseries_weather as met
on (
  mcov.hydroid = met.featureid and met.varid = v.hydroid
)
where f.hydrocode = :'hydrocode'
order by met.tsendtime;

Daily records Copy to File


\set varkey 'prism_mod_daily'
\set hydrocode  'usgs_ws_01634000'
\set band '1'
\set fname '/tmp/usgs_ws_01634000-prism-all.csv'

copy (
  select met.featureid, to_timestamp(met.tsendtime) as obs_date,
    extract(month from to_timestamp(met.tsendtime)) as mo,
    (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_mm,
    0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_in
  from dh_feature as f
  left outer join field_data_dh_geofield as fgeo
  on (
    fgeo.entity_id = f.hydroid
    and fgeo.entity_type = 'dh_feature'
  )
  left outer join dh_variabledefinition as v
  on (
    v.varkey = :'varkey'
  )
  left outer join dh_feature as mcov
  on (
    mcov.hydrocode = 'cbp6_met_coverage'
  )
  left outer join dh_timeseries_weather as met
  on (
    mcov.hydroid = met.featureid and met.varid = v.hydroid
  )
  where f.hydrocode = :'hydrocode'
  order by met.tsendtime
) to :'fname' WITH HEADER CSV;

Find Rasters with Wrong Number of Bands

copy ( SELECT to_timestamp(tsendtime) from dh_feature as mcov left outer join dh_variabledefinition as v on ( v.varkey = :'varkey' ) left outer join dh_timeseries_weather as met on ( mcov.hydroid = met.featureid and met.varid = v.hydroid and met.entity_type = 'dh_feature' ) where mcov.hydrocode = 'cbp6_met_coverage' and ST_NumBands(rast) > 1 ) to '/tmp/redo_nldas2_obs_hourly_bands.txt' CSV;

rburghol commented 2 weeks ago
\set band '1'
\set varkey 'prism_mod_daily'
\set hydrocode  'usgs_ws_01665500'
\set fname '/tmp/usgs_ws_01665500-prism-all.csv'

copy (
  select met.featureid, to_timestamp(met.tsendtime) as obs_date,
    extract(month from to_timestamp(met.tsendtime)) as mo,
    (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_mm,
    0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_in
  from dh_feature as f
  left outer join field_data_dh_geofield as fgeo
  on (
    fgeo.entity_id = f.hydroid
    and fgeo.entity_type = 'dh_feature'
  )
  left outer join dh_variabledefinition as v
  on (
    v.varkey = :'varkey'
  )
  left outer join dh_feature as mcov
  on (
    mcov.hydrocode = 'cbp6_met_coverage'
  )
  left outer join dh_timeseries_weather as met
  on (
    mcov.hydroid = met.featureid and met.varid = v.hydroid
  )
  where f.hydrocode = :'hydrocode'
  order by met.tsendtime
) to :'fname' WITH HEADER CSV;
COBrogan commented 2 weeks ago

@rburghol Thus far, daymet has underperformed compared to PRISM. This surprised me as nothing I looked through made me think we'd see large scale differences. So I ran a basic test on our sample data sets (based on the data in access-file.R, so you'll need to run a few lines to get the datasets from therewithin). First, let's just compare all the precip data for the Culpepper gage. This is a bad relationship, creating an R^2 of 0.05...

comparePrecip <- lm(comp_data$daymet_p_cfs ~ comp_data$prism_p_cfs)
summary(comparePrecip)$adj.r.squared
plot(comp_data$daymet_p_cfs ~ comp_data$prism_p_cfs)

Briefly looking at the data, it really seemed like there was a date issue. Daymet always seemed like it was one day behind. So, I tried the following comparing today's daymet with tomorrow's PRISM. This yields an R^2 of 0.90. So, I'd say with some confidence that my assumption that the daymet day ENDED on the timestamp downloaded was incorrect. We assumed it was the same as PRISM e.g. downloading May 24th yields the day that is May 23rd 7AM - May 24th 7AM. But, instead, it seeems like daymet does the opposite such that downloading May 24th results in the day May 24th 7AM - May 25th 7AM. Probably warrants more testing before I jump to conclusions and hopefully the daymet team emails us back asap.

compareNextDayPRISM <- lm(comp_data$daymet_p_cfs[1:(nrow(comp_data) - 1)] ~ 
                        comp_data$prism_p_cfs[2:nrow(comp_data)])
summary(compareNextDayPRISM)$adj.r.squared
plot(comp_data$daymet_p_cfs[1:(nrow(comp_data) - 1)] ~ 
       comp_data$prism_p_cfs[2:nrow(comp_data)])