vergauwenthomas / MetObs_toolkit

A toolkit for using non-traditional meteorological observations
https://vergauwenthomas.github.io/MetObs_toolkit/
MIT License
12 stars 4 forks source link

Modeldata extraction on Station #391

Closed vergauwenthomas closed 10 months ago

vergauwenthomas commented 10 months ago

An error occures when extracting modeldata on a Station.

To reproduce, use demo data:

import metobs_toolkit
your_dataset = metobs_toolkit.Dataset()
your_dataset.update_settings(
    input_data_file=metobs_toolkit.demo_datafile, # path to the data file
    input_metadata_file=metobs_toolkit.demo_metadatafile,
    template_file=metobs_toolkit.demo_template,
)

your_dataset.update_qc_settings(gapsize_in_records = 20) 

#Update the gapsize BEFORE reading in the data from file
your_dataset.import_data_from_file()

your_dataset.coarsen_time_resolution(freq='15T')

#first apply quality control
your_dataset.apply_quality_control(obstype='temp') #we use the default settings in this example

#Interpret the outliers as missing observations and gaps.
your_dataset.update_gaps_and_missing_from_outliers(obstype='temp', 
                                                   n_gapsize=None) 

#Update the settings (defenition of the period to calculate biases for)
your_dataset.update_gap_and_missing_fill_settings(
                                                  gap_debias_prefered_leading_period_hours=24,
                                                  gap_debias_prefered_trailing_period_hours=24,
                                                  gap_debias_minimum_leading_period_hours=6,
                                                  gap_debias_minimum_trailing_period_hours=6,
                                                  )
#(As a demonstration, we will fill the gaps of a single station. The following functions can also be
# directly applied on the dataset.)
your_station = your_dataset.get_station('vlinder05')

#Get ERA5 modeldata at the location of your stations and period.
ERA5_modeldata = your_station.get_modeldata(modelname='ERA5_hourly',
                                                                    obstype='temp')

Raises this error:


KeyError Traceback (most recent call last) File ~/anaconda3/envs/metobs_dev/lib/python3.9/site-packages/pandas/core/indexes/base.py:3790, in Index.get_loc(self, key) 3789 try: -> 3790 return self._engine.get_loc(casted_key) 3791 except KeyError as err:

File index.pyx:152, in pandas._libs.index.IndexEngine.get_loc()

File index.pyx:181, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:7080, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas/_libs/hashtable_class_helper.pxi:7088, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'name'

The above exception was the direct cause of the following exception:

KeyError Traceback (most recent call last) Cell In[46], line 14 10 your_station = your_dataset.get_station('vlinder05') 13 #Get ERA5 modeldata at the location of your stations and period. ---> 14 ERA5_modeldata = your_station.get_modeldata(modelname='ERA5_hourly', 15 obstype='temp') 17 #Use the debias method to fill the gaps 18 gapfill_df = your_station.get_stafill_gaps_era5(modeldata=ERA5_modeldata, 19 method='debias', 20 obstype='temp')

File ~/Documents/VLINDER_github/MetObs_toolkit/metobs_toolkit/dataset.py:804, in Dataset.get_modeldata(self, modelname, modeldata, obstype, stations, startdt, enddt) 802 # fill modell with data 803 if modelname == "ERA5_hourly": --> 804 Modl.get_ERA5_data(metadf=metadf, 805 startdt_utc=startdt_utc, 806 enddt_utc=enddt_utc, 807 obstype=obstype) 809 else: 810 Modl.get_gee_dataset_data(mapname=modelname, 811 metadf=metadf, 812 startdt_utc=startdt_utc, 813 enddt_utc=enddt_utc, 814 obstype=obstype)

File ~/Documents/VLINDER_github/MetObs_toolkit/metobs_toolkit/modeldata.py:464, in Modeldata.get_ERA5_data(self, metadf, startdt_utc, enddt_utc, obstype) 429 def get_ERA5_data(self, metadf, startdt_utc, enddt_utc, obstype='temp'): 430 """Extract timeseries of the ERA5_hourly dataset. 431 432 The units are converted to the toolkit standard units. (...) 462 463 """ --> 464 self.get_gee_dataset_data(mapname='ERA5_hourly', 465 metadf=metadf, 466 startdt_utc=startdt_utc, 467 enddt_utc=enddt_utc, 468 obstype=obstype)

File ~/Documents/VLINDER_github/MetObs_toolkit/metobs_toolkit/modeldata.py:400, in Modeldata.get_gee_dataset_data(self, mapname, metadf, startdt_utc, enddt_utc, obstype, target_unit_name, conv_expr) 397 connect_to_gee() 399 # Get data using GEE --> 400 df = gee_extract_timeseries(metadf=metadf, 401 mapinfo=geeinfo, 402 startdt=startdt_utc, 403 enddt=enddt_utc, 404 obstype=obstype, 405 latcolname="lat", 406 loncolname="lon", 407 ) 409 if not df.empty: 410 self._df_units[obstype] = geeinfo['band_of_use'][obstype]['units']

File ~/Documents/VLINDER_github/MetObs_toolkit/metobs_toolkit/landcover_functions.py:472, in gee_extract_timeseries(metadf, mapinfo, startdt, enddt, obstype, latcolname, loncolname) 467 use_drive = True 468 # ============================================================================= 469 # df to featurecollection 470 # ============================================================================= --> 472 ee_fc = _df_to_features_point_collection(metadf) 474 # ============================================================================= 475 # extract raster values 476 # ============================================================================= 478 def rasterExtraction(image):

File ~/Documents/VLINDER_github/MetObs_toolkit/metobs_toolkit/landcover_functions.py:194, in _df_to_features_point_collection(df) 192 poi_geometry = ee.Geometry.Point([row["lon"], row["lat"]]) 193 # construct the attributes (properties) for each point --> 194 poi_properties = poi_properties = {"name": row["name"]} 195 # construct feature combining geometry and properties 196 poi_feature = ee.Feature(poi_geometry, poi_properties)

File ~/anaconda3/envs/metobs_dev/lib/python3.9/site-packages/pandas/core/series.py:1040, in Series.getitem(self, key) 1037 return self._values[key] 1039 elif key_is_scalar: -> 1040 return self._get_value(key) 1042 # Convert generator to list before going through hashable part 1043 # (We will iterate through the generator there to check for slices) 1044 if is_iterator(key):

File ~/anaconda3/envs/metobs_dev/lib/python3.9/site-packages/pandas/core/series.py:1156, in Series._get_value(self, label, takeable) 1153 return self._values[label] 1155 # Similar to Index.get_value, but we do not fall back to positional -> 1156 loc = self.index.get_loc(label) 1158 if is_integer(loc): 1159 return self._values[loc]

File ~/anaconda3/envs/metobs_dev/lib/python3.9/site-packages/pandas/core/indexes/base.py:3797, in Index.get_loc(self, key) 3792 if isinstance(casted_key, slice) or ( 3793 isinstance(casted_key, abc.Iterable) 3794 and any(isinstance(x, slice) for x in casted_key) 3795 ): 3796 raise InvalidIndexError(key) -> 3797 raise KeyError(key) from err 3798 except TypeError: 3799 # If we have a listlike key, _check_indexing_error will raise 3800 # InvalidIndexError. Otherwise we fall through and re-raise 3801 # the TypeError. 3802 self._check_indexing_error(key)

KeyError: 'name'

vergauwenthomas commented 10 months ago

Priority because i want to use this feature in de example (#384) to avoid writing to google-Drive