noaa-oar-arl / monetio

The Model and ObservatioN Evaluation Tool I/O package
https://monetio.readthedocs.io
MIT License
16 stars 19 forks source link

RRFS-SD Reader #152

Open bbakernoaa opened 6 months ago

bbakernoaa commented 6 months ago

It came to my attention that the RRFS-SD verification has been using the _rrfs_cmaq_mm.py reader. While this works it is probably better to create a separate one that is a simpler use case than the complex chemistry of cmaq itself.

Specially there are three variables for air composition within the files.

smoke (pm25) dust (pm25) coarse_pm (PM10 coarse mode only)

We need to aggregate these into PM25 and PM10

PM25 = smoke + dust

PM10 = smoke + dust + PM10

@jordanschnell can you confirm the variable names here

zmoon commented 6 months ago

Comparing current develop to what @jordanschnell was using:

```patch --- monetio/models/_rrfs_cmaq_mm.py 2023-12-13 19:10:27.241244093 +0000 +++ /scratch1/BMC/wrf-chem/Jordan/miniconda3/envs/melodies-monet/lib/python3.9/site-packages/monetio/models/_rrfs_ cmaq_mm.py 2023-08-05 01:17:24.000000000 +0000 @@ -19,7 +19,8 @@ var_list=None, fname_pm25=None, surf_only=False, - **kwargs, + convert_pm25=True, + **kwargs ): # Like WRF-chem add var list that just determines whether to calculate sums or not to speed this up. """Method to open RFFS-CMAQ dyn* netcdf files. @@ -107,7 +108,7 @@ var_list.append("tmp") var_list.append("pressfc") var_list.append("dpres") - var_list.append("hgtsfc") +# var_list.append("hgtsfc") var_list.append("delz") # Remove duplicates just in case: @@ -119,7 +120,7 @@ # If variables in pm25 files are included remove these as these are not in the main file # And will be added later. for pm25_var in [ - "PM25_TOT", +# "PM25_TOT", "PM25_TOT_NSOM", "PM25_EC", "PM25_NH4", @@ -153,8 +154,6 @@ ] if fname_pm25 is not None: - from ..util import _try_merge_exact - # Add the processed pm2.5 species. dset_pm25 = xr.open_mfdataset(fname_pm25, concat_dim="time", combine="nested", **kwargs) dset_pm25 = dset_pm25.drop( @@ -164,7 +163,7 @@ # same pressure levels from the model dynf* files. # Attributes are formatted differently in pm25 file so remove attributes and use those from dynf* files. dset_pm25.attrs = {} - dset = _try_merge_exact(dset, dset_pm25, right_name="PM2.5") + dset = dset.merge(dset_pm25) # Standardize some variable names dset = dset.rename( @@ -178,26 +177,23 @@ "tmp": "temperature_k", # standard temperature (kelvin) "pressfc": "surfpres_pa", "dpres": "dp_pa", # Change names so standard surfpres_pa and dp_pa - "hgtsfc": "surfalt_m", +# "hgtsfc": "surfalt_m", "delz": "dz_m", } ) # Optional, but when available include altitude info # Calculate pressure. This has to go before sorting because ak and bk # are not sorted as they are in attributes - dset["pres_pa_mid"] = _calc_pressure(dset) + dset["pres_pa_mid"] = _calc_pressure(dset,surf_only) # Adjust pressure levels for all models such that the surface is first. - if np.all(np.diff(dset.z.values) > 0): # increasing pressure - dset = dset.isel(z=slice(None, None, -1)) # -> decreasing - if np.all(np.diff(dset.z_i.values) > 0): # increasing pressure - dset = dset.isel(z_i=slice(None, None, -1)) # -> decreasing - dset["dz_m"] = dset["dz_m"] * -1.0 # Change to positive values. + dset = dset.sortby("z", ascending=False) + dset = dset.sortby("z_i", ascending=False) # Note this altitude calcs needs to always go after resorting. # Altitude calculations are all optional, but for each model add values that are easy to calculate. - if not surf_only: - dset["alt_msl_m_full"] = _calc_hgt(dset) +# dset["alt_msl_m_full"] = _calc_hgt(dset) +# dset["dz_m"] = dset["dz_m"] * -1.0 # Change to positive values. # Set coordinates dset = dset.reset_index( @@ -226,9 +222,10 @@ for i in dset.variables: if "units" in dset[i].attrs: if "ug/kg" in dset[i].attrs["units"]: - # ug/kg -> ug/m3 using dry air density - dset[i] = dset[i] * dset["pres_pa_mid"] / dset["temperature_k"] / 287.05535 - dset[i].attrs["units"] = r"$\mu g m^{-3}$" + if convert_pm25: + # ug/kg -> ug/m3 using dry air density + dset[i] = dset[i] * dset["pres_pa_mid"] / dset["temperature_k"] / 287.05535 + dset[i].attrs["units"] = r"$\mu g m^{-3}$" # add lazy diagnostic variables # Note that because there are so many species to sum. Summing the aerosols is slowing down the code. @@ -259,7 +256,7 @@ if "pm25_om" in list_calc_sum: dset = add_lazy_om_pm25(dset, dict_sum) # Change the times to pandas format - dset["time"] = dset.indexes["time"].to_datetimeindex(unsafe=True) +# JLS dset["time"] = dset.indexes["time"].to_datetimeindex(unsafe=True) # Turn off warning for now. This is just because the model is in julian time # Drop extra variables that were part of sum, but are not in original var_list @@ -1074,7 +1071,7 @@ return z -def _calc_pressure(dset): +def _calc_pressure(dset,surf_only): """Calculate the mid-layer pressure in Pa from surface pressure and ak and bk constants. @@ -1094,14 +1091,19 @@ xarray.DataArray Mid-layer pressure with attributes. """ - p = dset.dp_pa.copy().load() # Have to load into memory here so can assign levels. - psfc = dset.surfpres_pa.copy().load() + pres = dset.dp_pa.copy().load() # Have to load into memory here so can assign levels. + srfpres = dset.surfpres_pa.copy().load() for k in range(len(dset.z)): - pres_2 = dset.ak[k + 1] + psfc * dset.bk[k + 1] - pres_1 = dset.ak[k] + psfc * dset.bk[k] - p[:, k, :, :] = (pres_2 - pres_1) / np.log(pres_2 / pres_1) - - p.name = "pres_pa_mid" - p.attrs["units"] = "pa" - p.attrs["long_name"] = "Pressure Mid Layer in Pa" - return p + if surf_only: + pres_2 = 0.0 + srfpres * 0.9978736 + pres_1 = 0.0 + srfpres * 1.0 + else: + pres_2 = dset.ak[k + 1] + srfpres * dset.bk[k + 1] + pres_1 = dset.ak[k] + srfpres * dset.bk[k] + + pres[:, k, :, :] = (pres_2 - pres_1) / np.log(pres_2 / pres_1) + + pres.name = "pres_pa_mid" + pres.attrs["units"] = "pa" + pres.attrs["long_name"] = "Pressure Mid Layer in Pa" + return pres ```