metno / pyaerocom

Python tools for climate and air quality model evaluation
https://pyaerocom.readthedocs.io/
GNU General Public License v3.0
26 stars 15 forks source link

csv_timeseries reader does not work with ExperimentProcessor #1372

Closed Ovewh closed 1 month ago

Ovewh commented 1 month ago

csv_timeseries reader does not work with ExperimentProcessor Please provide a clear and concise description of what the bug is.

data_id = "csv_timeseries"

columns = { "station" : 3, "longitude" : 1, "latitude" : 2, "altitude" : 6, "value": 5, "variable": 10, "units" : 11, "flag" : "0", "start_time" : 8, "end_time" : 9, "country" : "NaN", "standard_deviation" : 7,
} filters = {"variables": {"include": ["rsdscs"]}} csvreader_kwargs = {"delimiter": ","}

name_map = {"rsdscs":"rsdscs"} config = PyaroConfig( name="GEBA", data_id=data_id, filename_or_obj_or_url="/lustre/storeB/users/oveh/DURF/GEBA/no_header_cleanedfullGEBADataWithRFfit_2017-08-02_v4.csv", filters=filters, columns=columns, name_map=name_map, csvreader_kwargs=csvreader_kwargs

)

folder_DURF = ( "/lustre/storeB/project/aerocom/aerocom-users-database/DURF" )

MODELS = {

"CAM-ATRAS (glb)" : dict(
    model_id="CAM-ATRAS_DURF-histSST-dust-glb",
    model_data_dir="/lustre/storeB/project/aerocom/aerocom-users-database/DURF/histSST-dust-glb/CAM-ATRAS_DURF-histSST-dust-glb",
    model_vert_type="Column",
    model_ts_type_read = 'monthly',
)

} OBS_GROUNDBASED = { ################

Pyaro

################
"GEBA": dict(
    # obs_id="csv_timeseries",
    config=config,
    web_interface_name="GEBA",
    obs_vars=["rsdscs"],
    obs_vert_type="Surface",
    colocate_time=True,
    min_num_obs={
        "yearly": {"monthly": 9},
    },
    ts_type="monthly"
),

} output_dir = "/lustre/storeB/users/oveh/DURF/aeroval/data" coldata_dir = "/lustre/storeB/users/oveh/DURF/aeroval/coldala"

exp_pi = "Ove Haugvaldstad" experiment_id="DURF GEBA" proj_id = "AeroCom"

CFG = dict(

Output directories

model_cfg=MODELS,
obs_cfg=OBS_GROUNDBASED,

json_basedir=output_dir,
coldata_basedir=coldata_dir,
# Run options
reanalyse_existing=True,  # if True, existing colocated data files will be deleted
raise_exceptions=True,  # if True, the analysis will stop whenever an error occurs
clear_existing_json=True,  # if True, deletes previous output before running
# Map Options
# add_model_maps=True,  # Adds a plot of the whole map. Very slow!!!
# only_model_maps=True,  # Adds only plot above, without any other evaluation
map_zoom="World",  # Zoom level. For EMEP, Europe is typically used
# Time and Frequency Options
ts_type="monthly",  # Colocation frequency (no statistics in higher resolution can be computed)
freqs=["monthly", "yearly"],  # Frequencies that are evaluated
main_freq="monthly",  # Frequency that is displayed when opening webpage
periods=[
    "1980-2000",
    "1960-1980",
    "1940-1960",
    "1950-2000"
],  # List of years or periods of years that are evaluated. E.g. "2005" or "2001-2020"
# Statistical Options

Map Options

add_model_maps=False,           # Adds a plot of the whole map. Very slow!!!
only_model_maps=False,          # Adds only plot above, without any other evaluation

obs_remove_outliers=False,
model_remove_outliers=False,
colocate_time=True,
zeros_to_nan=False,
weighted_stats=True,
annual_stats_constrained=True,
harmonise_units=False,

start=1950,
# Experiment Metadata
exp_pi=exp_pi,
proj_id=proj_id,
exp_id=experiment_id,
exp_name="DURF",
exp_descr=("Comparison of DURF-dust-spt and DURF-dust-glb"),
public=True,

)

if name == "main":
from pyaerocom.aeroval import EvalSetup, ExperimentProcessor from pyaerocom import const print( const.CACHEDIR ) # Prints where to find the caching folder. Not needed but this folder should be emptied now and then, so I like to see where it is stp = EvalSetup(**CFG) # Makes a setup object from the dict, that PyAeroval can use ana = ExperimentProcessor(stp) # Makes an experiment object

res = ana.run(var_list=['rsdscs'])  # Runs the experiment

 - Error message (if applicable):

File /modules/rhel8/user-apps/fou-kl/aerotools/pya-v2024.09/lib/python3.11/site-packages/pyaerocom/colocation/colocator.py:334, in Colocator.prepare_run(self, var_list) 331 if isinstance(self.colocation_setup.obs_vars, str): 332 self.colocation_setup.obs_vars = (self.colocation_setup.obs_vars,) --> 334 self._check_obs_vars_available() 335 self._check_obs_filters() 336 self._check_model_add_vars()

File /modules/rhel8/user-apps/fou-kl/aerotools/pya-v2024.09/lib/python3.11/site-packages/pyaerocom/colocation/colocator.py:683, in Colocator._check_obs_vars_available(self) 681 invalid = [var for var in self.colocation_setup.obs_vars if var not in avail] 682 invalid = "; ".join(invalid) --> 683 raise DataCoverageError( 684 f"Invalid obs var(s) for {self.colocation_setup.obs_id}: {invalid}" 685 ) 687 self.obs_vars = avail

DataCoverageError: Invalid obs var(s) for : rsdscs


Error message if i set obs_id:

File /modules/rhel8/user-apps/fou-kl/aerotools/pya-v2024.09/lib/python3.11/site-packages/pyaerocom/colocation/colocator.py:378, in Colocator.run(self, var_list) 376 # ToDo: see if the following could be solved via custom context manager 377 try: --> 378 vars_to_process = self.prepare_run(var_list) 379 except Exception as ex: 380 logger.exception(ex)

File /modules/rhel8/user-apps/fou-kl/aerotools/pya-v2024.09/lib/python3.11/site-packages/pyaerocom/colocation/colocator.py:334, in Colocator.prepare_run(self, var_list) 331 if isinstance(self.colocation_setup.obs_vars, str): 332 self.colocation_setup.obs_vars = (self.colocation_setup.obs_vars,) --> 334 self._check_obs_vars_available() 335 self._check_obs_filters() 336 self._check_model_add_vars()

File /modules/rhel8/user-apps/fou-kl/aerotools/pya-v2024.09/lib/python3.11/site-packages/pyaerocom/colocation/colocator.py:662, in Colocator._check_obs_vars_available(self) 660 oreader = self.obs_reader 661 if self.obs_is_ungridded: --> 662 avail = oreader.get_vars_supported( 663 self.colocation_setup.obs_id, self.colocation_setup.obs_vars 664 ) 665 else: 666 avail = []

File /modules/rhel8/user-apps/fou-kl/aerotools/pya-v2024.09/lib/python3.11/site-packages/pyaerocom/io/readungridded.py:880, in ReadUngridded.get_vars_supported(self, obs_id, vars_desired) 876 obs_vars.append(var) 878 else: 879 # check if variable can be read from a dataset on disk --> 880 _oreader = self.get_lowlevel_reader(obs_id) 881 for var in varlist_aerocom(vars_desired): 882 if _oreader.var_supported(var):

File /modules/rhel8/user-apps/fou-kl/aerotools/pya-v2024.09/lib/python3.11/site-packages/pyaerocom/io/readungridded.py:295, in ReadUngridded.get_lowlevel_reader(self, data_id) 293 if data_id not in self._readers: 294 _cls = self._find_read_class(data_id) --> 295 reader = self._init_lowlevel_reader(_cls, data_id) 296 self._readers[data_id] = reader 297 return self._readers[data_id]

File /modules/rhel8/user-apps/fou-kl/aerotools/pya-v2024.09/lib/python3.11/site-packages/pyaerocom/io/readungridded.py:431, in ReadUngridded._init_lowlevel_reader(self, reader, data_id) 428 else: 429 ddir = None --> 431 return reader(data_id=data_id, data_dir=ddir)

TypeError: ReadPyaro.init() got an unexpected keyword argument 'data_id'



**To Reproduce**
Steps to reproduce the behavior:
1. Run config_file.py
2.

**Expected behavior**
The data should be collocated and analysis should run. 

**Additional context**
Pyaerocom can read the i.e by `rp = pya.io.ReadUngridded(configs=config)`. However since the csv_timeseries reader does not support the `obs_id="csv_timeseries"` to be provided, does raise error. I believe the colocator cannot find the obs_reader. 
Ovewh commented 1 month ago

Apparently my config was miss configured

    "GEBA": dict(
        # obs_id="csv_timeseries",
        config=config,
        web_interface_name="GEBA",

should be

    "GEBA": dict(
        # obs_id="csv_timeseries",
        obs_config=config,
        web_interface_name="GEBA",
lewisblake commented 1 month ago

@dulte In light of this error, is it possible for the PyaroConfig to be set up in such a way that it crashes if it gets an argument it doesn't expect. This is the default pydantic behavior, so what would need to be changed on our end to ensure this.