Closed larsbuntemeyer closed 4 months ago
Here is my code to compare between cordex and cmip6
import pandas as pd def retrieve_cmip6_mip_tables(): """retrieve and concat all cmip6 mip tables from https://c6dreq.dkrz.de/docs/CMIP6_MIP_tables.xlsx """ cols = [ "frequency", "modeling_realm", "standard_name", "units", "cell_methods", "cell_measures", "long_name", "comment", "dimensions", "out_name", "type", "positive", "valid_min", "valid_max", "ok_min_mean_abs", "ok_max_mean_abs", "cmip6_table", ] cmip6_mip_tables_url = "https://c6dreq.dkrz.de/docs/CMIP6_MIP_tables.xlsx" tables = pd.read_excel(cmip6_mip_tables_url, sheet_name=None) del tables["Notes"] def add_table_name(df, table): df["cmip6_table"] = table return df df = pd.concat(add_table_name(df, table) for table, df in tables.items()) df.rename( columns={ "CF Standard Name": "standard_name", "Long name": "long_name", "Variable Name": "out_name", }, inplace=True, ) return df[cols].drop_duplicates(ignore_index=True) def get_cmip6_entry(out_name, frequency): tables = ["Amon", "day"] select = cmip6[(cmip6.out_name == out_name) & (cmip6.frequency == frequency)] # print(len(select)) if len(select) > 1: select = select[select.cmip6_table.isin(tables)] if len(select) > 1: print(f"could find no unique entry for {out_name}, {frequency}") raise Exception if select.empty: # print(f"could not find entry for {out_name}, {frequency}") return None return select.iloc[0] def compare(): rows = [] for index, row in cordex.iterrows(): # print(row.out_name, row.frequency) cmip6_row = get_cmip6_entry(row.out_name, row.frequency) if cmip6_row is None: continue if row.cell_methods != cmip6_row.cell_methods: rows.append( { "out_name": row.out_name, "frequency": row.frequency, "cordex": row.cell_methods, "cmip6": cmip6_row.cell_methods, "cmip6_realm": cmip6_row.modeling_realm, } ) return rows cmip6 = retrieve_cmip6_mip_tables() cordex = pd.read_csv("CORDEX-CMIP6/data-request.csv") # rename frequencies to compare with CMIP6 cordex.loc[cordex.cell_methods.str.contains("time: point"), "frequency"] = ( cordex.loc[cordex.cell_methods.str.contains("time: point"), "frequency"] + "Pt" ) diff_cell_methods = pd.DataFrame(compare()) print(diff_cell_methods.to_markdown(index=False))
Here is my code to compare between cordex and cmip6