Open rburghol opened 3 months ago
Returning to raster index testing. Looking into NLDAS rasters, we see:
gdalinfo /backup/meteorology/2023/070/NLDAS_FORA0125_H.A20230311.2300.002.grb_CBP.gtiff
Driver: GTiff/GeoTIFF
Files: /backup/meteorology/2023/070/NLDAS_FORA0125_H.A20230311.2300.002.grb_CBP.gtiff
Size is 75, 65
Coordinate System is:
GEOGCRS["WGS 84"...
Band 1 Block=75x13 Type=Float64, ColorInterp=Gray
So, we are storing float64 data. To effectively tile this raster, we'd want to keep each tile to 2048 bytes per tile. float64 is 8 bytes per pixel, so the maximum size of our tile should be 16x16 per the recommendations here. So,
create temp table ts_rast_tile as
select a.tid, a.tstime, a.tsendtime, b.hydroid as varid, a.featureid, a.entity_type, st_tile(a.rast, 1, 16, 16) as rast
from dh_timeseries_weather as a
left outer join dh_variabledefinition as b
on (a.varid = b.hydroid)
where extract(year from to_timestamp(a.tstime)) = 2023
and b.varkey='nldas2_obs_hourly';
-- create an index
CREATE INDEX trast_ix ON ts_rast_tile USING GIST(ST_ConvexHull(rast));
--Add all constraints
SELECT AddRasterConstraints('ts_rast_tile'::name, 'rast'::name);
Let's evaluate the constraints on our rasters to see if these make sense. My current understanding is that constraints can speed up certain processes by ensuring postGIS knows all rasters in the tables fall in the bounds of the constraint. Notice that dh_timeseries_weather has no constrains nor a spatial index. The lack of constraints makes sense given that the data is representative of many different varykeys. But, it should have a spatial index.
select r_table_catalog,r_table_schema,r_table_name, r_raster_column,srid,scale_x,scale_y,blocksize_x,blocksize_y,same_alignment,regular_blocking,num_bands,pixel_types, nodata_values ,out_db, spatial_index--,extent
from raster_columns;
r_table_name | r_raster_column | srid | scale_x | scale_y | blocksize_x | blocksize_y | same_alignment | regular_blocking | num_bands | pixel_types | nodata_values | out_db | spatial_index |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
dh_timeseries_weather | rast | 0 | f | f | f | ||||||||
tmp_precipgrid | rast | 0 | f | f | f | ||||||||
ts_rast_tile | rast | 4326 | 0.125 | -0.125 | 16 | 16 | t | f | 1 | {64BF} | {9999} | {f} | t |
Let's give dh_timeseries_weather a spatial index:
CREATE INDEX trast_ix ON dh_timeseries_weather USING GIST(ST_Envelope(rast));
From here, we should be able to query our temporary table ts_rast_tile
to test if the tile raster has improved our performance at all.
WITH usgs_features AS (
SELECT *
FROM dh_feature
WHERE hydrocode = 'usgs_ws_01668000'
)
Select met.featureid, to_timestamp(met.tsendtime) as obs_date,
extract(year from to_timestamp(met.tsendtime)) as yr,
extract(month from to_timestamp(met.tsendtime)) as mo,
extract(day from to_timestamp(met.tsendtime)) as da,
(ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), 1, TRUE)).mean as precip_mm,
0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), 1, TRUE)).mean as precip_in
FROM usgs_features as f
left outer join field_data_dh_geofield as fgeo
on (
fgeo.entity_id = f.hydroid
and fgeo.entity_type = 'dh_feature'
)
JOIN(
select *
from ts_rast_tile as met
left outer join dh_feature as mcov
on (
mcov.hydrocode = 'cbp6_met_coverage'
)
left outer join dh_variabledefinition as v
on (
v.varkey = 'nldas2_obs_hourly'--'nldas2_precip_hourly_tiled'
)
where mcov.hydroid = met.featureid
and met.varid = v.hydroid
AND extract(year from to_timestamp(met.tsendtime)) = '2023'
) AS met
ON ST_Intersects(ST_ConvexHull(met.rast),fgeo.dh_geofield_geom)
WHERE f.hydrocode = 'usgs_ws_01668000'
order by met.tsendtime;
Time: 36921.604 ms (00:36.922)
One year summarized in 36 seconds. When comparing to dh_timeseries_weather:
WITH usgs_features AS (
SELECT *
FROM dh_feature
WHERE hydrocode = 'usgs_ws_01668000'
)
Select met.featureid, to_timestamp(met.tsendtime) as obs_date,
extract(year from to_timestamp(met.tsendtime)) as yr,
extract(month from to_timestamp(met.tsendtime)) as mo,
extract(day from to_timestamp(met.tsendtime)) as da,
(ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), 1, TRUE)).mean as precip_mm,
0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), 1, TRUE)).mean as precip_in
FROM usgs_features as f
left outer join field_data_dh_geofield as fgeo
on (
fgeo.entity_id = f.hydroid
and fgeo.entity_type = 'dh_feature'
)
JOIN(
select *
from dh_timeseries_weather as met
left outer join dh_variabledefinition as b
on (met.varid = b.hydroid)
where extract(year from to_timestamp(met.tstime)) = 2023
and b.varkey='nldas2_obs_hourly'
) AS met
ON ST_Intersects(ST_ConvexHull(met.rast),fgeo.dh_geofield_geom)
WHERE f.hydrocode = 'usgs_ws_01668000'
order by met.tsendtime;
Time: 29694.877 ms (00:29.695)
Interestingly not a big gain, but this method now seems faster than previously. The spatial index likely helped improve the speed. From dh_timeserires_weather
, the above query took about 26 minutes to run for the full 40-year period for usgs_ws_01668000
. calc_raster_ts
took 49 minutes. Using the tiled temporary table above without constraints took 35 minutes.
Confirm the number of tiles we get make sense:
COPY(
WITH usgs_features AS (
SELECT *
FROM dh_feature
WHERE hydrocode = 'usgs_ws_01668000'
)
Select st_astext(st_convexhull(met.rast))
FROM usgs_features as f
left outer join field_data_dh_geofield as fgeo
on (
fgeo.entity_id = f.hydroid
and fgeo.entity_type = 'dh_feature'
)
JOIN(
select *
from ts_rast_tile as met
left outer join dh_variabledefinition as b
on (met.varid = b.hydroid)
where extract(year from to_timestamp(met.tstime)) = 2023
and b.varkey='nldas2_obs_hourly'
and extract(month from to_timestamp(met.tstime)) = 12
and extract(day from to_timestamp(met.tstime)) = 12
) AS met
ON ST_Intersects(ST_ConvexHull(met.rast),fgeo.dh_geofield_geom)
WHERE f.hydrocode = 'usgs_ws_01668000'
) to '/tmp/getRasterTiles.csv' CSV;
Switching to 4x4 tiles results in one year summaries taking 1 minute, nearly twice the time as the 16x16 tiled raster. However, the below query ran extremely quickly. The only difference is no clipping is used here, so this is the sum of all tiles (which is bigger than our boundary by a lot).
WITH usgs_features AS (
SELECT *
FROM dh_feature
WHERE hydrocode = 'usgs_ws_01668000'
)
Select met.featureid, to_timestamp(met.tsendtime) as obs_date,
extract(year from to_timestamp(met.tsendtime)) as yr,
extract(month from to_timestamp(met.tsendtime)) as mo,
extract(day from to_timestamp(met.tsendtime)) as da,
sum((ST_summarystats(met.rast, 1, TRUE)).mean)as precip_mm,
sum(0.0393701 * (ST_summarystats(met.rast, 1, TRUE)).mean) as precip_in
FROM usgs_features as f
left outer join field_data_dh_geofield as fgeo
on (
fgeo.entity_id = f.hydroid
and fgeo.entity_type = 'dh_feature'
)
JOIN(
select *
from ts_rast_tile as met
left outer join dh_feature as mcov
on (
mcov.hydrocode = 'cbp6_met_coverage'
)
left outer join dh_variabledefinition as v
on (
v.varkey = 'nldas2_obs_hourly'--'nldas2_precip_hourly_tiled'
)
where mcov.hydroid = met.featureid
and met.varid = v.hydroid
AND extract(year from to_timestamp(met.tsendtime)) = '2023'
--and extract(month from to_timestamp(met.tstime)) = 12
--and extract(day from to_timestamp(met.tstime)) = 12
) AS met
ON ST_Intersects(ST_ConvexHull(met.rast),fgeo.dh_geofield_geom)
WHERE f.hydrocode = 'usgs_ws_01668000'
GROUP BY met.featureid,met.tsendtime
order by met.tsendtime;
Time: 2665.869 ms (00:02.666)
Insert 4x4 tiles into db and repeat for 16x16 via 'nldas2_precip_hourly_tiled_16x16
varkey
'nldas2_precip_hourly_tiled_4by4' as varkey,
v.datatype,
'nldas2_precip_hourly_tiled_4by4' as varcode,
v.timestep,
v.timeunits,
v.nodataval,
'NLDAS_tiled4by4' as varabbrev,
v.multiplicity
from dh_variabledefinition as v
where v.varkey='nldas2_obs_hourly';
insert into dh_timeseries_weather(tstime,tsendtime, varid, featureid, entity_type, rast)
select a.tstime, a.tsendtime, c.hydroid as varid, a.featureid, a.entity_type, st_tile(a.rast, 1, 4, 4) as rast
from dh_timeseries_weather as a
left outer join dh_variabledefinition as b
on (a.varid = b.hydroid)
left outer join dh_variabledefinition as c
on (c.varkey = 'nldas2_precip_hourly_tiled_4by4')
where b.varkey='nldas2_obs_hourly'
-- comment this to get them all
and extract(year from to_timestamp(a.tstime)) = 2023
and c.varkey is not null;
INSERT 0 4051680
Use the new tiled data set to confirm results above:
WITH usgs_features AS (
SELECT *
FROM dh_feature
WHERE hydrocode = 'usgs_ws_01668000'
)
Select met.featureid, to_timestamp(met.tsendtime) as obs_date,
extract(year from to_timestamp(met.tsendtime)) as yr,
extract(month from to_timestamp(met.tsendtime)) as mo,
extract(day from to_timestamp(met.tsendtime)) as da,
sum((ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), 1, TRUE)).mean) as precip_mm,
sum(0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), 1, TRUE)).mean) as precip_in
FROM usgs_features as f
left outer join field_data_dh_geofield as fgeo
on (
fgeo.entity_id = f.hydroid
and fgeo.entity_type = 'dh_feature'
)
JOIN(
select *
from dh_timeseries_weather as met
left outer join dh_variabledefinition as b
on (met.varid = b.hydroid)
where extract(year from to_timestamp(met.tstime)) = 2023
and b.varkey='nldas2_precip_hourly_tiled_4by4'
) AS met
ON ST_Intersects(ST_ConvexHull(met.rast),fgeo.dh_geofield_geom)
WHERE f.hydrocode = 'usgs_ws_01668000'
GROUP BY met.featureid,met.tsendtime
order by met.tsendtime;
We can restructure the query to use union. So, the query would first get the USGS feature, then find the tiles that overlap with the feature, Union them into a single raster, and then proceed with clipping and summary. Removing one of the st_summarystats
calls speeds up performance by half. Overall, this query took 16 seconds. Which is still one second slower than the dh_timeseries_weather equivalent. The 16x16 tiles were about on par with dh_timeseries_weather
WITH usgs_features AS (
SELECT *
FROM dh_feature
WHERE hydrocode = 'usgs_ws_01668000'
),
metUnion as (
Select met.featureid, met.tsendtime,
st_union(met.rast) as rast
FROM usgs_features as f
left outer join field_data_dh_geofield as fgeo
on (
fgeo.entity_id = f.hydroid
and fgeo.entity_type = 'dh_feature'
)
JOIN(
select *
from dh_timeseries_weather as met
left outer join dh_variabledefinition as b
on (met.varid = b.hydroid)
where extract(year from to_timestamp(met.tstime)) = 2023
and b.varkey='nldas2_precip_hourly_tiled_4by4'
) AS met
ON ST_Intersects(ST_ConvexHull(met.rast),fgeo.dh_geofield_geom)
group by met.featureid, met.tsendtime
)
Select met.featureid, to_timestamp(met.tsendtime) as obs_date,
extract(year from to_timestamp(met.tsendtime)) as yr,
extract(month from to_timestamp(met.tsendtime)) as mo,
extract(day from to_timestamp(met.tsendtime)) as da,
sum((ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), 1, TRUE)).mean) as precip_mm
FROM usgs_features as f
left outer join field_data_dh_geofield as fgeo
on (
fgeo.entity_id = f.hydroid
and fgeo.entity_type = 'dh_feature'
)
JOIN metUnion AS met
ON ST_Intersects(ST_ConvexHull(met.rast),fgeo.dh_geofield_geom)
GROUP BY met.featureid,met.tsendtime
order by met.tsendtime;
Just check the overlap:
\set band '1'
\set ftype 'cbp6_landseg'
\set varkey 'nldas2_obs_hourly'
\set resample_varkey 'daymet_mod_daily'
\set hydrocode 'N51137'
\set fname '/tmp/N51137-nldas2-all.csv'
\timing ON
select met.featureid, to_timestamp(met.tsendtime) as obs_date,
met.tstime, met.tsendtime,
extract(year from to_timestamp(met.tsendtime)) as yr,
extract(month from to_timestamp(met.tsendtime)) as mo,
extract(day from to_timestamp(met.tsendtime)) as da,
extract(hour from to_timestamp(met.tsendtime)) as hr,
count(met.*) as num_tiles
from dh_feature as f
left outer join field_data_dh_geofield as fgeo
on ( fgeo.entity_id = f.hydroid and fgeo.entity_type = 'dh_feature' )
left outer join dh_variabledefinition as v
on ( v.varkey = :'varkey' )
left outer join raster_templates as rt
on ( rt.varkey = :'resample_varkey' )
left outer join dh_feature as mcov
on ( mcov.hydrocode = 'cbp6_met_coverage' )
left outer join dh_timeseries_weather as met
on ( mcov.hydroid = met.featureid and met.varid = v.hydroid and fgeo.dh_geofield_geom && met.rast )
where f.hydrocode = :'hydrocode'
and f.ftype = :'ftype'
and ( (met.tstime >= 441777600) or (441777600 = -1) ) and ( (met.tsendtime <= 1704085199) or (1704085199 = -1) )
group by met.featureid, to_timestamp(met.tsendtime),
met.tstime, met.tsendtime,
extract(year from to_timestamp(met.tsendtime)) ,
extract(month from to_timestamp(met.tsendtime)) ,
extract(day from to_timestamp(met.tsendtime)) ,
extract(hour from to_timestamp(met.tsendtime))
order by met.tsendtime
;
Just check the overlap:
\set band '1'
\set ftype 'cbp6_landseg'
\set varkey 'nldas2_precip_hourly_tiled'
\set resample_varkey 'daymet_mod_daily'
\set hydrocode 'N51137'
\set fname '/tmp/N51137-nldas2-all.csv'
\timing ON
select met.featureid, to_timestamp(met.tsendtime) as obs_date,
met.tstime, met.tsendtime,
extract(year from to_timestamp(met.tsendtime)) as yr,
extract(month from to_timestamp(met.tsendtime)) as mo,
extract(day from to_timestamp(met.tsendtime)) as da,
extract(hour from to_timestamp(met.tsendtime)) as hr,
count(met.*) as num_tiles
from dh_feature as f
left outer join field_data_dh_geofield as fgeo
on ( fgeo.entity_id = f.hydroid and fgeo.entity_type = 'dh_feature' )
left outer join dh_variabledefinition as v
on ( v.varkey = :'varkey' )
left outer join raster_templates as rt
on ( rt.varkey = :'resample_varkey' )
left outer join dh_feature as mcov
on ( mcov.hydrocode = 'cbp6_met_coverage' )
left outer join dh_timeseries_weather as met
on ( mcov.hydroid = met.featureid and met.varid = v.hydroid and fgeo.dh_geofield_geom && met.rast )
where f.hydrocode = :'hydrocode'
and f.ftype = :'ftype'
and ( (met.tstime >= 441777600) or (441777600 = -1) ) and ( (met.tsendtime <= 1704085199) or (1704085199 = -1) )
group by met.featureid, to_timestamp(met.tsendtime),
met.tstime, met.tsendtime,
extract(year from to_timestamp(met.tsendtime)) ,
extract(month from to_timestamp(met.tsendtime)) ,
extract(day from to_timestamp(met.tsendtime)) ,
extract(hour from to_timestamp(met.tsendtime))
order by met.tsendtime
;
Re-order the tables
WITH f as (select * from dh_feature where hydrocode = :'hydrocode' and ftype = :'ftype' ),
v as (select * from dh_variabledefinition where varkey = :'varkey'),
rt as (select * from raster_templates where varkey = :'resample_varkey' ),
mcov as (select * from dh_feature where hydrocode = 'cbp6_met_coverage' )
select met.featureid, to_timestamp(met.tsendtime) as obs_date,
met.tstime, met.tsendtime,
extract(year from to_timestamp(met.tsendtime)) as yr,
extract(month from to_timestamp(met.tsendtime)) as mo,
extract(day from to_timestamp(met.tsendtime)) as da,
extract(hour from to_timestamp(met.tsendtime)) as hr,
count(met.*) as num_tiles
from dh_timeseries_weather as met, field_data_dh_geofield as fgeo
where met.featureid in (select hydroid from mcov)
and met.varid in (select hydroid from v)
and ( (met.tstime >= 441777600) or (441777600 = -1) )
and ( (met.tsendtime <= 1704085199) or (1704085199 = -1) )
and fgeo.entity_type = 'dh_feature'
and fgeo.entity_id in (select hydroid from f)
and (fgeo.dh_geofield_geom && met.rast )
group by met.featureid, to_timestamp(met.tsendtime),
met.tstime, met.tsendtime,
extract(year from to_timestamp(met.tsendtime)) ,
extract(month from to_timestamp(met.tsendtime)) ,
extract(day from to_timestamp(met.tsendtime)) ,
extract(hour from to_timestamp(met.tsendtime))
order by met.tsendtime
;
WITH f as (select * from dh_feature where hydrocode = :'hydrocode' and ftype = :'ftype' ),
v as (select * from dh_variabledefinition where varkey = :'varkey'),
rt as (select * from raster_templates where varkey = :'resample_varkey' ),
mcov as (select * from dh_feature where hydrocode = 'cbp6_met_coverage' )
select met.featureid, to_timestamp(met.tsendtime) as obs_date,
met.tstime, met.tsendtime,
extract(year from to_timestamp(met.tsendtime)) as yr,
extract(month from to_timestamp(met.tsendtime)) as mo,
extract(day from to_timestamp(met.tsendtime)) as da,
extract(hour from to_timestamp(met.tsendtime)) as hr,
count(met.*) as num_tiles
from dh_timeseries_weather as met, field_data_dh_geofield as fgeo
where met.featureid in (select hydroid from mcov)
and met.varid in (select hydroid from v)
and met.tstime >= 441777600
and met.tsendtime <= 1704085199
and fgeo.entity_type = 'dh_feature'
and fgeo.entity_id in (select hydroid from f)
and (fgeo.dh_geofield_geom && met.rast )
group by met.featureid, to_timestamp(met.tsendtime),
met.tstime, met.tsendtime,
extract(year from to_timestamp(met.tsendtime)) ,
extract(month from to_timestamp(met.tsendtime)) ,
extract(day from to_timestamp(met.tsendtime)) ,
extract(hour from to_timestamp(met.tsendtime))
order by met.tsendtime
;
- Inventory query: count only
WITH f as (select from dh_feature where hydrocode = :'hydrocode' and ftype = :'ftype' ),
v as (select hydroid from dh_variabledefinition where varkey = :'varkey'),
rt as (select from raster_templates where varkey = :'resample_varkey' ),
mcov as (select * from dh_feature where hydrocode = 'cbp6_met_coverage' )
select count(met.featureid)
from dh_timeseries_weather as met, field_data_dh_geofield as fgeo
where met.featureid in (select hydroid from mcov)
and met.varid in (select hydroid from v)
and met.tstime >= 441777600
and met.tsendtime <= 1704085199
and fgeo.entity_type = 'dh_feature'
and fgeo.entity_id in (select hydroid from f)
and (fgeo.dh_geofield_geom && met.rast )
;
- use varid and featureid hard coded
- These resulted in change from 30 seconds down to 10 seconds for the inventory query
\set band '1' \set ftype 'cbp6_landseg' \set varkey 'nldas2_precip_hourly_tiled' \set resample_varkey 'daymet_mod_daily' \set hydrocode 'N51137' \set fname '/tmp/N51137-nldas2-all.csv' \timing ON
-- sets all integer feature and varid with query select hydroid as met_varid from dh_variabledefinition where varkey = :'varkey' \gset select hydroid as fid from dh_feature where hydrocode = :'hydrocode' and ftype = :'ftype' \gset select hydroid as covid from dh_feature where hydrocode = 'cbp6_met_coverage' \gset
select count(met.featureid) from dh_timeseries_weather as met, field_data_dh_geofield as fgeo where met.featureid = :covid and met.varid = :met_varid and met.tstime >= 441777600 and met.tsendtime <= 1704085199 and fgeo.entity_type = 'dh_feature' and fgeo.entity_id = :fid and (fgeo.dh_geofield_geom && met.rast ) ;
350635 (1 row)
Time: 413137.081 ms (06:53.137)
- Add a polygon column with the envelope
alter table dh_timeseries_weather add column bbox geometry; update dh_timeseries_weather set bbox = st_envelope(rast) where rast is not null;
select count(met.featureid) from dh_timeseries_weather as met, field_data_dh_geofield as fgeo where met.featureid = :covid and met.varid = :met_varid and met.tstime >= 441777600 and met.tsendtime <= 1704085199 and fgeo.entity_type = 'dh_feature' and fgeo.entity_id = :fid and (fgeo.dh_geofield_geom && met.bbox ) ;
350635 (1 row)
Time: 24043.444 ms (00:24.043)
- Now retrieve some data:
select met.featureid, to_timestamp(met.tsendtime) as obs_date, met.tstime, met.tsendtime, extract(year from to_timestamp(met.tsendtime)) as yr, extract(month from to_timestamp(met.tsendtime)) as mo, extract(day from to_timestamp(met.tsendtime)) as da, extract(hour from to_timestamp(met.tsendtime)) as hr, (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), 1, TRUE)).mean as precip_mm, 0.0393701 (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), 1, TRUE)).mean as precip_in from dh_timeseries_weather as met, field_data_dh_geofield as fgeo where met.featureid = :covid and met.varid = :met_varid and met.tstime >= 441777600 -- and met.tsendtime <= 441777600 + 86400 21 and met.tsendtime <= 1704085199 and fgeo.entity_type = 'dh_feature' and fgeo.entity_id = :fid and (fgeo.dh_geofield_geom && met.bbox ) order by met.tsendtime ;
This is a mis-statement of what I did -- this is just the time for the NON-resampled. Still an improvement over previous, which took like 2 hours I think? Using bbox
polygon field, tiled nldas2
rasters resampling to daymet
resolution (in conjunction with the spatial summary) took less than 10 minutes
@COBrogan @mwdunlap2004 - reconfiguring the query saved a couple minutes
select
with the \gset
command to populate vars with a querybbox
index on the tiles saved like 90 minutes I thinkH51113 N51047 N51113 H51079 L51079 N51079 N51003 N51137 N51177
RU2_5940_6200 RU2_6091_6090 RU2_6200_6170 RU2_6090_6220 RU2_6220_6170 RU3_6170_6040 RU4_6040_6030
\set band '1'
\set ftype 'cbp6_landseg'
\set varkey 'nldas2_precip_hourly_tiled'
\set resample_varkey 'daymet_mod_daily'
\set hydrocode 'N51177'
\set fname '/tmp/N51177-nldas2-all.csv'
-- sets all integer feature and varid with query
select hydroid as met_varid from dh_variabledefinition where varkey = :'varkey' \gset
select hydroid as fid from dh_feature where hydrocode = :'hydrocode' and ftype = :'ftype' \gset
select hydroid as covid from dh_feature where hydrocode = 'cbp6_met_coverage' \gset
\timing ON
select met.featureid, to_timestamp(met.tsendtime) as obs_date,
met.tid, met.tstime, met.tsendtime,
extract(year from to_timestamp(met.tsendtime)) as yr,
extract(month from to_timestamp(met.tsendtime)) as mo,
extract(day from to_timestamp(met.tsendtime)) as da,
extract(hour from to_timestamp(met.tsendtime)) as hr,
(ST_summarystats(st_clip(st_resample(met.rast, rt.rast), fgeo.dh_geofield_geom), 1, TRUE)).count,
(ST_summarystats(st_clip(st_resample(met.rast, rt.rast), fgeo.dh_geofield_geom), 1, TRUE)).mean as precip_mm,
0.0393701 * (ST_summarystats(st_clip(st_resample(met.rast, rt.rast), fgeo.dh_geofield_geom), 1, TRUE)).mean as precip_in
from dh_timeseries_weather as met, field_data_dh_geofield as fgeo, raster_templates as rt
where met.featureid = :covid
and met.varid = :met_varid
and met.tstime >= 441777600
and rt.varkey = :'resample_varkey'
and met.tsendtime <= 441777600 + 86400
and met.tsendtime <= 1704085199
and fgeo.entity_type = 'dh_feature'
and fgeo.entity_id = :fid
and (fgeo.dh_geofield_geom && met.bbox )
order by met.tsendtime
;
resampled_raster_ts
script
i=N51137; met_scenario="nldas2_resamptile"
sbatch /opt/model/meta_model/run_model raster_met $met_scenario $i tmp/scratch/resam wdm download 02_db_resample
Timing on one that works
Time: 6359980.953 ms (01:45:59.981)
\set band '1'
\set ftype 'cbp6_landseg'
\set varkey 'nldas2_precip_hourly_tiled'
\set resample_varkey 'daymet_mod_daily'
\set hydrocode 'H51113'
\set fname '/tmp/H51113-nldas2-all.csv'
-- sets all integer feature and varid with query
select hydroid as met_varid from dh_variabledefinition where varkey = :'varkey' \gset
select hydroid as fid from dh_feature where hydrocode = :'hydrocode' and ftype = :'ftype' \gset
select hydroid as covid from dh_feature where hydrocode = 'cbp6_met_coverage' \gset
\timing ON
select met.featureid, to_timestamp(met.tsendtime) as obs_date,
met.tid, met.tstime, met.tsendtime,
extract(year from to_timestamp(met.tsendtime)) as yr,
extract(month from to_timestamp(met.tsendtime)) as mo,
extract(day from to_timestamp(met.tsendtime)) as da,
extract(hour from to_timestamp(met.tsendtime)) as hr,
(ST_summarystats(st_clip(st_resample(met.rast, rt.rast), fgeo.dh_geofield_geom), 1, TRUE)).count,
(ST_summarystats(st_clip(st_resample(met.rast, rt.rast), fgeo.dh_geofield_geom), 1, TRUE)).mean as precip_mm,
0.0393701 * (ST_summarystats(st_clip(st_resample(met.rast, rt.rast), fgeo.dh_geofield_geom), 1, TRUE)).mean as precip_in
from dh_timeseries_weather as met, field_data_dh_geofield as fgeo, raster_templates as rt
where met.featureid = :covid
and met.varid = :met_varid
and met.tstime >= 441777600
and rt.varkey = :'resample_varkey'
and met.tsendtime <= 1704085199
and fgeo.entity_type = 'dh_feature'
and fgeo.entity_id = :fid
and (fgeo.dh_geofield_geom && met.bbox )
order by met.tsendtime
;
Try retiling according to https://postgis.net/docs/RT_Retile.html
Tasks:
st_envelope(rast)
??Table Updates
-- get a randomish raster to work with select tid, to_timestamp(tstime) from dh_timeseries_weather as a left outer join dh_variabledefinition as b on (a.varid = b.hydroid and b.varkey='nldas2_obs_hourly') where a.tid is not null limit 1; -- tid | to_timestamp -----------+------------------------ -- 110396782 | 2006-01-01 07:00:00-05
create temp table ts_rast_tile as select a.tid, a.tstime, a.tsendtime, b.hydroid as varid, a.featureid, a.entity_type, st_tile(a.rast, 1, 10, 10) as rast from dh_timeseries_weather as a left outer join dh_variabledefinition as b on (a.varid = b.hydroid and b.varkey='nldas2_precip_hourly') where a.tid = 110396782 ;
select to_timestamp(min(tstime)), max(to_timetamp(tstime)) from ts_rast_tile ;
select count(*) from ts_rast_tile;
3504000 / 3504001
select 3504000/( 365 * 24); ?column?
-- so, we have 400 tiles over the bay area
create temp table ts_rast_tile as select a.tid, a.tstime, a.tsendtime, b.hydroid as varid, a.featureid, a.entity_type, st_tile(a.rast, 1, 50, 50) as rast from dh_timeseries_weather as a left outer join dh_variabledefinition as b on (a.varid = b.hydroid) where extract(year from to_timestamp(a.tstime)) = 2001 and b.varkey='nldas2_obs_hourly';
-- create an index CREATE INDEX trast_ix ON ts_rast_tile USING GIST(ST_Envelope(rast)); CREATE INDEX trast_fix ON ts_rast_tile (featureid); CREATE INDEX trast_vix ON ts_rast_tile (varid); CREATE INDEX trast_tix ON ts_rast_tile (tstime);
explain select count(*) from dh_timeseries_weather as met left outer join field_data_dh_geofield as cov on (met.rast && cov.dh_geofield_geom) where cov.entity_id = 290010 and met.rast is not null and met.varid = 1451;
Aggregate (cost=16.37..16.38 rows=1 width=8) -> Nested Loop (cost=0.99..16.37 rows=1 width=0) Join Filter: ((met.rast)::geometry && cov.dh_geofield_geom) -> Index Scan using dh_timeseries_weather_dh_tsw_vix_idx on dh_timeseries_weather met (cost=0.57..7.16 rows=1 width=18) Index Cond: (varid = 1451) Filter: (rast IS NOT NULL) -> Index Scan using field_data_dh_geofield_entity_id_idx on field_data_dh_geofield cov (cost=0.42..8.44 rows=1 width=2248) Index Cond: (entity_id = 290010) (8 rows)
explain select count(*) from ts_rast_tile as met left outer join field_data_dh_geofield as cov on (met.rast && cov.dh_geofield_geom) where cov.entity_id = 290010 and met.varid = 1451;
Aggregate (cost=2883822.45..2883822.46 rows=1 width=8) -> Nested Loop (cost=0.42..2883813.69 rows=3505 width=0) Join Filter: ((met.rast)::geometry && cov.dh_geofield_geom) -> Index Scan using field_data_dh_geofield_entity_id_idx on field_data_dh_geofield cov (cost=0.42..8.44 rows=1 width=2248) Index Cond: (entity_id = 290010) -> Seq Scan on ts_rast_tile met (cost=0.00..211616.38 rows=3504510 width=293) Filter: (varid = 1451) JIT: Functions: 10 Options: Inlining true, Optimization true, Expressions true, Deforming true (10 rows)
\set band '1' \set varkey 'nldas2_obs_hourly' \set hydrocode 'usgs_ws_02031000'
select met.featureid, to_timestamp(met.tsendtime) as obs_date, extract(year from to_timestamp(met.tsendtime)) as yr, extract(month from to_timestamp(met.tsendtime)) as mo, extract(day from to_timestamp(met.tsendtime)) as da, (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_mm, 0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_in from dh_feature as f left outer join field_data_dh_geofield as fgeo on ( fgeo.entity_id = f.hydroid and fgeo.entity_type = 'dh_feature' ) left outer join dh_variabledefinition as v on ( v.varkey = :'varkey' ) left outer join dh_feature as mcov on ( mcov.hydrocode = 'cbp6_met_coverage' ) left outer join dh_timeseries_weather as met on ( mcov.hydroid = met.featureid and met.varid = v.hydroid AND extract(year from to_timestamp(met.tstime)) = 2001 AND fgeo.dh_geofield_geom && met.rast ) where f.hydrocode = :'hydrocode' and met.tid is not null order by met.tsendtime;
select met.featureid, to_timestamp(met.tsendtime) as obs_date, extract(year from to_timestamp(met.tsendtime)) as yr, extract(month from to_timestamp(met.tsendtime)) as mo, extract(day from to_timestamp(met.tsendtime)) as da, (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_mm, 0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_in from dh_feature as f left outer join field_data_dh_geofield as fgeo on ( fgeo.entity_id = f.hydroid and fgeo.entity_type = 'dh_feature' ) left outer join dh_variabledefinition as v on ( v.varkey = :'varkey' ) left outer join dh_feature as mcov on ( mcov.hydrocode = 'cbp6_met_coverage' ) left outer join dh_timeseries_weather as met on ( mcov.hydroid = met.featureid and met.varid = v.hydroid ) where f.hydrocode = :'hydrocode' order by met.tsendtime;
insert into dh_timeseries_weather(tstime,tsendtime, varid, featureid, entity_type, rast) select a.tstime, a.tsendtime, c.hydroid as varid, a.featureid, a.entity_type, st_tile(a.rast, 1, 50, 50) as rast from dh_timeseries_weather as a left outer join dh_variabledefinition as b on (a.varid = b.hydroid) left outer join dh_variabledefinition as c on (c.varkey = 'nldas2_precip_hourly_tiled') where b.varkey='nldas2_obs_hourly' -- comment this to get them all and extract(year from to_timestamp(a.tstime)) = 2001 and c.varkey is not null;
\set band '1' \set varkey 'nldas2_precip_hourly_tiled' \set hydrocode 'usgs_ws_02031000'
select met.featureid, to_timestamp(met.tsendtime) as obs_date, extract(year from to_timestamp(met.tsendtime)) as yr, extract(month from to_timestamp(met.tsendtime)) as mo, extract(day from to_timestamp(met.tsendtime)) as da, (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_mm, 0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_in from dh_feature as f left outer join field_data_dh_geofield as fgeo on ( fgeo.entity_id = f.hydroid and fgeo.entity_type = 'dh_feature' ) left outer join dh_variabledefinition as v on ( v.varkey = :'varkey' ) left outer join dh_feature as mcov on ( mcov.hydrocode = 'cbp6_met_coverage' ) left outer join dh_timeseries_weather as met on ( mcov.hydroid = met.featureid and met.varid = v.hydroid AND extract(year from to_timestamp(met.tstime)) = 2001 AND fgeo.dh_geofield_geom && met.rast ) where f.hydrocode = :'hydrocode' and met.tid is not null order by met.tsendtime;
\set band '1' \set varkey 'nldas2_precip_hourly_tiled' \set hydrocode 'usgs_ws_02031000' \set covid 290010 \set metid 617850 \set metvarid 1455 -- not resampled into small files -- \set metvarid 1455
select met.featureid, to_timestamp(met.tsendtime) as obs_date, extract(year from to_timestamp(met.tsendtime)) as yr, extract(month from to_timestamp(met.tsendtime)) as mo, extract(day from to_timestamp(met.tsendtime)) as da, (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_mm, 0.0393701 * (ST_summarystats(st_clip(met.rast, fgeo.dh_geofield_geom), :band, TRUE)).mean as precip_in from field_data_dh_geofield as fgeo left outer join dh_timeseries_weather as met on ( met.featureid = :metid and met.varid = :metvarid AND extract(year from to_timestamp(met.tstime)) = 2001 AND fgeo.dh_geofield_geom && met.rast ) where fgeo.entity_id = :covid and met.tid is not null and fgeo.entity_type = 'dh_feature' order by met.tsendtime;
insert into dh_timeseries_weather(tstime,tsendtime, varid, featureid, entity_type, rast) select a.tstime, a.tsendtime, c.hydroid as varid, a.featureid, a.entity_type, st_tile(a.rast, 1, 50, 50) as rast from dh_timeseries_weather as a left outer join dh_variabledefinition as b on (a.varid = b.hydroid) left outer join dh_variabledefinition as c on (c.varkey = 'nldas2_precip_hourly_tiled') where extract(year from to_timestamp(a.tstime)) <> 2001 and b.varkey='nldas2_obs_hourly' and c.varkey is not null;