This is simple processer for the NOAA GOES GLM Lightning processing. This consists of a Makfile that downloads the required datasets from the Amazon S3 storage, and then processes these datasets into two CSV files.
There is a simple NCL processing script ~flashes_datasets_DDD.ncl~ that is used to convert one GLM nc file, appending to the days csv files. Note the the standard nc_dump file mispresents some of the data, See this [[][Note (Section 3.4.5)]] for information on unsigned data.
You run the makefile, specifing the year and julian day you are interested in.
make yyyy=2021 j=189 files flashes clean-cache
This isn't a really time efficient methodology. It takes about 2 minutes to download the 500Mb of files for a day. There are about 4K files per day, and then it takes about 20 minutes (!) to run the ncl script over those 4K files.
You can request access to the [[][converted data]]
** CSV format
*** datasets.csv
This file is the global metadata for the datasets. The data includes the NOAA provided id, the dataset_name and then three timestamps. The creation time, and the start and end time for the duration covered by that file. The files cover about 20 seconds each.
*** flashes.csv
The flashes.csv file contains the flash information.
These integer values that when converted describes the time the flash event started and ended. It is parameterized, and requires the start event of the time.
short flash_time_offset_of_first_event(number_of_flashes) ;
flash_time_offset_of_first_event:long_name = "GLM L2+ Lightning Detection: time of occurrence of first constituent event in flash" ;
flash_time_offset_of_first_event:standard_name = "time" ;
flash_time_offset_of_first_event:_Unsigned = "true" ;
flash_time_offset_of_first_event:scale_factor = 0.0003814756f ;
flash_time_offset_of_first_event:add_offset = -5.f ;
flash_time_offset_of_first_event:units = "seconds since 2021-06-24 12:20:20.000" ;
flash_time_offset_of_first_event:axis = "T" ;
short flash_time_offset_of_last_event(number_of_flashes) ;
flash_time_offset_of_last_event:long_name = "GLM L2+ Lightning Detection: time of occurrence of last constituent event in flash" ;
flash_time_offset_of_last_event:standard_name = "time" ;
flash_time_offset_of_last_event:_Unsigned = "true" ;
flash_time_offset_of_last_event:scale_factor = 0.0003814756f ;
flash_time_offset_of_last_event:add_offset = -5.f ;
flash_time_offset_of_last_event:units = "seconds since 2021-06-24 12:20:20.000" ;
float flash_lat(number_of_flashes) ;
flash_lat:long_name = "GLM L2+ Lightning Detection: flash centroid (mean constituent event latitude weighted by their energies) latitude coordinate" ;
flash_lat:standard_name = "latitude" ;
flash_lat:units = "degrees_north" ;
flash_lat:axis = "Y" ;
float flash_lon(number_of_flashes) ;
flash_lon:long_name = "GLM L2+ Lightning Detection: flash centroid (mean constituent event latitude weighted by their energies) longitude coordinate" ;
flash_lon:standard_name = "longitude" ;
flash_lon:units = "degrees_east" ;
flash_lon:axis = "X" ;
short flash_area(number_of_flashes) ;
flash_area:_FillValue = -1s ;
flash_area:long_name = "GLM L2+ Lightning Detection: flash area coverage (pixels containing at least one constituent event only)" ;
flash_area:_Unsigned = "true" ;
flash_area:valid_range = 0s, -6s ;
flash_area:scale_factor = 152601.9f ;
flash_area:add_offset = 0.f ;
flash_area:units = "m2" ;
flash_area:coordinates = "group_parent_flash_id flash_id lightning_wavelength flash_time_threshold flash_time_offset_of_first_event flash_time_offset_of_last_event flash_lat flash_lon" ;
flash_area:grid_mapping = "goes_lat_lon_projection" ;
flash_area:cell_methods = "lightning_wavelength: sum flash_time_offset_of_first_event: flash_time_offset_of_last_event: sum area: sum (interval: 8 km comment: resolution of sensor data at nadir, area of constituent groups\' areas defined by variable group_parent_flash_id) where cloud" ;
short flash_energy(number_of_flashes) ;
flash_energy:_FillValue = -1s ;
flash_energy:long_name = "GLM L2+ Lightning Detection: flash radiant energy" ;
flash_energy:standard_name = "lightning_radiant_energy" ;
flash_energy:_Unsigned = "true" ;
flash_energy:valid_range = 0s, -6s ;
flash_energy:scale_factor = 9.99996e-16f ;
flash_energy:add_offset = 2.8515e-16f ;
flash_energy:units = "J" ;
flash_energy:coordinates = "group_parent_flash_id flash_id lightning_wavelength flash_time_threshold flash_time_offset_of_first_event flash_time_offset_of_last_event flash_lat flash_lon" ;
flash_energy:grid_mapping = "goes_lat_lon_projection" ;
flash_energy:cell_measures = "area: flash_area" ;
flash_energy:cell_methods = "lightning_wavelength: sum flash_time_offset_of_first_event: flash_time_offset_of_last_event: sum area: mean (centroid location of constituent events defined by variables group_parent_flash_id and event_parent_group_id weighted by their radiant energies) where cloud" ;
flash_energy:ancillary_variables = "flash_quality_flag" ;
short flash_quality_flag(number_of_flashes) ;
flash_quality_flag:_FillValue = -1s ;
flash_quality_flag:long_name = "GLM L2+ Lightning Detection: flash data quality flags" ;
flash_quality_flag:standard_name = "status_flag" ;
flash_quality_flag:_Unsigned = "true" ;
flash_quality_flag:valid_range = 0s, 5s ;
flash_quality_flag:units = "1" ;
flash_quality_flag:coordinates = "flash_id lightning_wavelength flash_time_threshold flash_time_offset_of_first_event flash_time_offset_of_last_event flash_lat flash_lon" ;
flash_quality_flag:grid_mapping = "goes_lat_lon_projection" ;
flash_quality_flag:cell_methods = "lightning_wavelength: sum flash_time_offset_of_first_event: flash_time_offset_of_last_event: sum area: mean (centroid location of constituent events defined by variables group_parent_flash_id and event_parent_group_id weighted by their radiant energies) where cloud" ;
flash_quality_flag:flag_values = 0s, 1s, 3s, 5s ;
flash_quality_flag:flag_meanings = "good_quality_qf degraded_due_to_flash_constituent_events_out_of_time_order_qf degraded_due_to_flash_constituent_event_count_exceeds_threshold_qf degraded_due_to_flash_duration_exceeds_threshold_qf" ;
flash_quality_flag:number_of_qf_values = 4b ;
flash_quality_flag:percent_good_quality_qf = 1.f ;
flash_quality_flag:percent_degraded_due_to_flash_constituent_events_out_of_time_order_qf = 0.f ;
flash_quality_flag:percent_degraded_due_to_flash_constituent_event_count_exceeds_threshold_qf = 0.f ;
flash_quality_flag:percent_degraded_due_to_flash_duration_exceeds_threshold_qf = 0.f ;
** Additional Information
We are only interested in lightning flashes. The GML [[][ATBD]] includes the processing from lightning events to groups to flashes.
Information regarding the GOES downloads can be found at the [[][NOAA GOES Opendata Registry]] or the direct [[][AWS Link]]
There is also some literature comparing the GOES GML to models [[]]
** California Specific Data
The file has most of the information for creating the required functions for the glm data.
Using our 500m pixels from the [[][dwr_grid]] data, we can get a general box of where the pixels are that we need.
create temp table cimis_pixels ( east integer, north integer, foo integer);
\copy cimis_pixels from cimis_pixels.csv with CSV
create table pixel ( pid serial primary key, east integer, north integer, boundary geometry(polygon,3310) );
insert into pixel (east,north,boundary) select east,north, st_setsrid(st_makebox2d( st_makepoint(east-250,north-250), st_makepoint(east+250,north+250)),3310) as boundary from cimis_pixels;
CREATE INDEX glm_pixel_idx ON glm.pixel USING GIST (boundary);
COPY 1642286 |
INSERT 0 1642286 |
with f as ( select st_transform(boundary,4326) as ll from pixel ) select floor(min(st_xmin(ll))) xn, ceil(max(st_xmax(ll))) as xx, floor(min(st_ymin(ll))) as yn , ceil(max(st_ymax(ll))) as yx from f ;
| xn | xx | yn | yx | |------+------+----+----| | -125 | -114 | 32 | 43 |
And now we can create a smaller set of pixels from this regions. It's actually easier to do this on the input csv files, rather than the psql database, since there is no real
We can also make some quick statistics of exactly how many lightning strikes we got for every day, as opposed to the CA ones.
find csv -name flashes.csv | xargs wc -l | while read n f; do echo "$f,$n"; done | sort | tee flash_count.csv find csv -name | xargs wc -l | while read n f; do echo "$f,$n"; done | sort | tee flash.ca_count.csv
select count(*) from dataset;
count |
4352224 |
delete from dataset where dataset_id not in (select distinct dataset_id from flash)
DELETE 4119746 |
And now we have many fewer datasets
select count(*) from dataset;
count |
232478 |
drop table if exists pixel_flash;
create table pixel_flash ( pid integer references pixel(pid), dataset_id uuid references dataset(dataset_id), flash_id integer not null, centroid geometry(point,3310) );
insert into pixel_flash select pid,dataset_id,flash_id, st_transform(f.centroid,3310) as centroid from flash f join pixel p on st_contains(p.boundary,st_transform(f.centroid,3310));
INSERT 0 425923 |
And now we can finally get some stats on our pixels!
create materialized view flashes_date_hr as with d as ( select (f.start_time at time zone 'utc' at time zone 'america/los_angeles')::date as date, extract(hour from f.start_time at time zone 'utc' at time zone 'america/los_angeles') as hour from flash f ) select date,hour,count(*) as count from d group by date,hour order by date,hour;
SELECT 4753 |
select date,sum(count) as count from flashes_date_hr
group by date order by count desc limit 5;
| date | count | |------------+-------| | 2019-09-05 | 21005 | | 2020-08-16 | 19232 | | 2019-06-13 | 18195 | | 2020-05-30 | 16975 | | 2020-06-24 | 16094 |
create materialized view glm.flashes_pid_date_hr as
with d as
( select pid,,(f.start_time at time zone 'utc' at time zone 'america/los_angeles')::date as date,
extract(hour from f.start_time at time zone 'utc' at time zone 'america/los_angeles') as hour
from flash f join pixel_flash pf on f.flash_id=pf.flash_id and f.dataset_id=pf.dataset_id
select pid,date,hour,sum(energy) as energy,
count(*) as count from d group by pid,date,hour order by date,hour,pid;
SELECT 334786 |
select * from glm.flashes_pid_date_hr where extract(month from date) in (7,8,9,10) order by count desc limit 5
| pid | date | hour | energy | count | |---------+------------+------+--------+-------| | 1174222 | 2019-07-05 | 11 | 2394 | 86 | | 1175320 | 2019-07-06 | 11 | 3107 | 84 | | 1172033 | 2019-07-01 | 11 | 7143 | 84 | | 1175340 | 2019-07-08 | 11 | 1141 | 74 | | 265129 | 2020-07-26 | 18 | 3300 | 68 |
create materialized view fire_year_threat as
with d as ( select ,extract(year from date) as year from flashes_pid_date_hr where extract(month from date) in (7,8,9,10) ), intensity_by_year as ( select pid,year,sum(energy) as energy,sum(count) as count from d group by year,pid ) select from intensity_by_year
select * from flashes_pid_date_hr where date in ('2020-08-15'::date,'2020-08-16'::date,'2020-08-17'::date,'2020-08-18'::date) order by count desc limit 10
| pid | date | hour | energy | count | |---------+------------+------+--------+-------| | 762286 | 2020-08-16 | 17 | 1139 | 40 | | 575223 | 2020-08-16 | 19 | 1308 | 37 | | 865208 | 2020-08-15 | 17 | 763 | 35 | | 967304 | 2020-08-15 | 17 | 730 | 34 | | 587373 | 2020-08-16 | 18 | 1027 | 33 | | 836352 | 2020-08-16 | 16 | 709 | 29 | | 1005100 | 2020-08-16 | 14 | 751 | 28 | | 763165 | 2020-08-16 | 17 | 373 | 21 | | 586669 | 2020-08-16 | 18 | 666 | 21 | | 1002078 | 2020-08-15 | 17 | 401 | 21 |
drop table if exists siege;
create table siege ( dataset_id uuid, flash_id integer, time timestamp with time zone, date date, energy integer, flash geometry(Polygon,3310), primary key (dataset_id,flash_id) );
with d as (
select f.dataset_id,f.flash_id,
(f.start_time at time zone 'utc' at time zone 'america/los_angeles')::timestamp
as time,,f.area_m2,f.centroid,
(f.start_time at time zone 'utc' at time zone 'america/los_angeles')::date as
from flash f join pixel_flash using (dataset_id,flash_id)
insert into siege(dataset_id,flash_id,time,date,energy,flash)
select dataset_id,flash_id,time,date,energy,
from d where date in ('2020-08-15'::date,'2020-08-16'::date,'2020-08-17'::date)
INSERT 0 17088 |
We can also extend the flashes by their area for these regions. And summarize those.
drop table if exists siege_pixel;
create table siege_pixel (
pid integer references pixel(pid),
dataset_id uuid references dataset(dataset_id),
flash_id integer not null,
date date
insert into siege_pixel
select pid,dataset_id,flash_id,
(f.start_time at time zone 'utc' at time zone 'america/los_angeles')::date as date
from flash f join pixel p
on st_within(p.boundary,st_buffer(st_transform(f.centroid,3310),sqrt(f.area_m2)/1.5)) and
(f.start_time at time zone 'utc' at time zone 'america/los_angeles')::date
in ('2020-08-15'::date,'2020-08-16'::date,'2020-08-17'::date,'2020-08-18'::date);
INSERT 0 4679363 |
create table siege_pixel_flash ( pid integer, date date, flashes integer, boundary geometry(Polygon,3310) );
with spsum as (
select pid,date,count(*) as flashes from siege_pixel group by pid,date ) insert into siege_pixel_flash select pid,date,flashes,boundary from spsum join pixel using(pid);
INSERT 0 667053 |
*** Calfire Data
We want to show fire symbols on the page, so we need the centroids of the
fires. These fires came from the CalFIRE gis database, the fire20_1.gdb.
#+begin_src bash
ogr2ogr -nlt CONVERT_TO_LINEAR -f "PostgreSQL" PG:"service=glm" fire20_1.gdb firep20_1
#+begin_src sql
alter table firep20_1 set schema calfire;
set search_path=glm,calfire,public,topology;
alter table calfire.firep20_1 rename to fires;
SELECT AddGeometryColumn('calfire','fires','centroid',3310,'point',2);
update calfire.fires set centroid=st_centroid(shape);