WCRP-CORDEX / data-request-table

Machine readable data request tables
MIT License
0 stars 0 forks source link

check cell_methods #19

Closed larsbuntemeyer closed 4 months ago

larsbuntemeyer commented 5 months ago

Here is my code to compare between cordex and cmip6

import pandas as pd

def retrieve_cmip6_mip_tables():
    """retrieve and concat all cmip6 mip tables from
    https://c6dreq.dkrz.de/docs/CMIP6_MIP_tables.xlsx
    """
    cols = [
        "frequency",
        "modeling_realm",
        "standard_name",
        "units",
        "cell_methods",
        "cell_measures",
        "long_name",
        "comment",
        "dimensions",
        "out_name",
        "type",
        "positive",
        "valid_min",
        "valid_max",
        "ok_min_mean_abs",
        "ok_max_mean_abs",
        "cmip6_table",
    ]
    cmip6_mip_tables_url = "https://c6dreq.dkrz.de/docs/CMIP6_MIP_tables.xlsx"
    tables = pd.read_excel(cmip6_mip_tables_url, sheet_name=None)
    del tables["Notes"]

    def add_table_name(df, table):
        df["cmip6_table"] = table
        return df

    df = pd.concat(add_table_name(df, table) for table, df in tables.items())
    df.rename(
        columns={
            "CF Standard Name": "standard_name",
            "Long name": "long_name",
            "Variable Name": "out_name",
        },
        inplace=True,
    )
    return df[cols].drop_duplicates(ignore_index=True)

def get_cmip6_entry(out_name, frequency):
    tables = ["Amon", "day"]
    select = cmip6[(cmip6.out_name == out_name) & (cmip6.frequency == frequency)]
    # print(len(select))
    if len(select) > 1:
        select = select[select.cmip6_table.isin(tables)]
    if len(select) > 1:
        print(f"could find no unique entry for {out_name}, {frequency}")
        raise Exception
    if select.empty:
        # print(f"could not find entry for {out_name}, {frequency}")
        return None
    return select.iloc[0]

def compare():
    rows = []
    for index, row in cordex.iterrows():
        # print(row.out_name, row.frequency)
        cmip6_row = get_cmip6_entry(row.out_name, row.frequency)
        if cmip6_row is None:
            continue
        if row.cell_methods != cmip6_row.cell_methods:
            rows.append(
                {
                    "out_name": row.out_name,
                    "frequency": row.frequency,
                    "cordex": row.cell_methods,
                    "cmip6": cmip6_row.cell_methods,
                    "cmip6_realm": cmip6_row.modeling_realm,
                }
            )
    return rows

cmip6 = retrieve_cmip6_mip_tables()

cordex = pd.read_csv("CORDEX-CMIP6/data-request.csv")
# rename frequencies to compare with CMIP6
cordex.loc[cordex.cell_methods.str.contains("time: point"), "frequency"] = (
    cordex.loc[cordex.cell_methods.str.contains("time: point"), "frequency"] + "Pt"
)

diff_cell_methods = pd.DataFrame(compare())
print(diff_cell_methods.to_markdown(index=False))