IAMconsortium / pyam

Analysis & visualization of energy & climate scenarios
https://pyam-iamc.readthedocs.io/
Apache License 2.0
226 stars 118 forks source link

Aggregation tests #317

Open znicholls opened 4 years ago

znicholls commented 4 years ago

In https://github.com/IAMconsortium/pyam/pull/315, the aggregation tests were removed. They should be added back in future in a way which makes the intent of the tests clearer. Testing code in master before #315 is below

# test_feature_aggregate.py

import pytest
import logging

import numpy as np
import pandas as pd
from pyam import check_aggregate, IamDataFrame, IAMC_IDX

from conftest import TEST_DTS

LONG_IDX = IAMC_IDX + ['year']

PE_MAX_DF = pd.DataFrame([
    ['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 2005, 9.0],
    ['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 2010, 10.0],
    ['model_a', 'scen_a', 'reg_a', 'Primary Energy', 'EJ/y', 2005, 6.0],
    ['model_a', 'scen_a', 'reg_a', 'Primary Energy', 'EJ/y', 2010, 6.0],
    ['model_a', 'scen_a', 'reg_b', 'Primary Energy', 'EJ/y', 2005, 3.0],
    ['model_a', 'scen_a', 'reg_b', 'Primary Energy', 'EJ/y', 2010, 4.0],

],
    columns=LONG_IDX + ['value']
)

CO2_MAX_DF = pd.DataFrame([
    ['model_a', 'scen_a', 'World', 'Emissions|CO2', 'EJ/y', 2005, 6.0],
    ['model_a', 'scen_a', 'World', 'Emissions|CO2', 'EJ/y', 2010, 8.0],
    ['model_a', 'scen_a', 'reg_a', 'Emissions|CO2', 'EJ/y', 2005, 4.0],
    ['model_a', 'scen_a', 'reg_a', 'Emissions|CO2', 'EJ/y', 2010, 5.0],
    ['model_a', 'scen_a', 'reg_b', 'Emissions|CO2', 'EJ/y', 2005, 2.0],
    ['model_a', 'scen_a', 'reg_b', 'Emissions|CO2', 'EJ/y', 2010, 3.0],
],
    columns=LONG_IDX + ['value']
)

REG_IDX = ['model', 'scenario', 'variable', 'unit', 'year']

PRICE_MAX_DF = pd.DataFrame([
    ['model_a', 'scen_a', 'Price|Carbon', 'USD/tCO2', 2005, 10.0],
    ['model_a', 'scen_a', 'Price|Carbon', 'USD/tCO2', 2010, 30.0],
],
    columns=REG_IDX + ['value']
)

def test_aggregate(aggregate_df):
    df = aggregate_df

    # primary energy is a direct sum (within each region)
    assert df.check_aggregate('Primary Energy') is None

    # rename sub-category to test setting components as list
    _df = df.rename(variable={'Primary Energy|Wind': 'foo'})
    assert _df.check_aggregate('Primary Energy') is not None
    components = ['Primary Energy|Coal', 'foo']
    assert _df.check_aggregate('Primary Energy', components=components) is None

    # use other method (max) both as string and passing the function
    exp = PE_MAX_DF.set_index(LONG_IDX).value

    obs = df.aggregate('Primary Energy', method='max')
    pd.testing.assert_series_equal(obs, exp)

    obs = df.aggregate('Primary Energy', method=np.max)
    pd.testing.assert_series_equal(obs, exp)

    # using illegal method raises an error
    pytest.raises(ValueError, df.aggregate, 'Primary Energy', method='foo')

def test_aggregate_by_list(aggregate_df):
    df = aggregate_df
    var_list = ['Primary Energy', 'Emissions|CO2']

    # primary energy and emissions are a direct sum (within each region)
    assert df.check_aggregate(var_list) is None

    # use other method (max) both as string and passing the function
    exp = (
        pd.concat([PE_MAX_DF, CO2_MAX_DF])
        .set_index(LONG_IDX).value
        .sort_index()
    )

    obs = df.aggregate(var_list, method='max')
    pd.testing.assert_series_equal(obs, exp)

    obs = df.aggregate(var_list, method=np.max)
    pd.testing.assert_series_equal(obs, exp)

    # using list of variables and components raises an error
    components = ['Primary Energy|Coal', 'Primary Energy|Wind']
    pytest.raises(ValueError, df.aggregate, var_list, components=components)

def test_aggregate_region(aggregate_df):
    df = aggregate_df

    # primary energy is a direct sum (across regions)
    assert df.check_aggregate_region('Primary Energy') is None

    # CO2 emissions have "bunkers" only defined at the region level
    v = 'Emissions|CO2'
    assert df.check_aggregate_region(v) is not None
    assert df.check_aggregate_region(v, components=True) is None

    # rename emissions of bunker to test setting components as list
    _df = df.rename(variable={'Emissions|CO2|Bunkers': 'foo'})
    assert _df.check_aggregate_region(v, components=['foo']) is None

    # carbon price shouldn't be summed but be weighted by emissions
    assert df.check_aggregate_region('Price|Carbon') is not None
    assert df.check_aggregate_region('Price|Carbon', weight=v) is None

    # inconsistent index of variable and weight raises an error
    _df = df.filter(variable='Emissions|CO2', region='reg_b', keep=False)
    pytest.raises(ValueError, _df.aggregate_region, 'Price|Carbon',
                  weight='Emissions|CO2')

    # setting both weight and components raises an error
    pytest.raises(ValueError, df.aggregate_region, v, components=True,
                  weight='bar')

    # use other method (max) both as string and passing the function
    exp = PRICE_MAX_DF.set_index(REG_IDX).value
    obs = df.aggregate_region('Price|Carbon', method='max')
    pd.testing.assert_series_equal(obs, exp)

    obs = df.aggregate_region('Price|Carbon', method=np.max)
    pd.testing.assert_series_equal(obs, exp)

    # using illegal method raises an error
    pytest.raises(ValueError, df.aggregate_region, v, method='foo')

    # using weight and method other than 'sum' raises an error
    pytest.raises(ValueError, df.aggregate_region, v, method='max',
                  weight='bar')

def test_aggregate_region_by_list(aggregate_df):
    df = aggregate_df
    var_list = ['Primary Energy', 'Primary Energy|Coal', 'Primary Energy|Wind']

    # primary energy and sub-categories are a direct sum (across regions)
    assert df.check_aggregate_region(var_list) is None

    # emissions and carbon price are _not_ a direct sum (across regions)
    var_list = ['Price|Carbon', 'Emissions|CO2']
    assert df.check_aggregate_region(var_list) is not None

    # using list of variables and components raises an error
    pytest.raises(ValueError, df.aggregate_region, var_list, components=True)

    # using list of variables and weight raises an error (inconsistent weight)
    pytest.raises(ValueError, df.aggregate_region, var_list, weight=True)

    # use other method (max) both as string and passing the function
    _co2_df = CO2_MAX_DF[CO2_MAX_DF.region == 'World'].drop(columns='region')
    exp = pd.concat([_co2_df, PRICE_MAX_DF]).set_index(REG_IDX).value

    obs = df.aggregate_region(var_list, method='max')
    pd.testing.assert_series_equal(obs, exp)

    obs = df.aggregate_region(var_list, method=np.max)
    pd.testing.assert_series_equal(obs, exp)

def test_missing_region(check_aggregate_df):
    # for now, this test makes sure that this operation works as expected
    exp = check_aggregate_df.aggregate_region('Primary Energy', region='foo')
    assert len(exp) == 8
    # # this test should be updated to the below after the return type of
    # # aggregate_region() is updated
    # exp = check_aggregate_df.aggregate_region(
    #     'Primary Energy', region='foo', append=False
    # ).data
    # check_aggregate_df.aggregate_region(
    #     'Primary Energy', region='foo', append=True
    # )
    # obs = check_aggregate_df.filter(region='foo').data
    # assert len(exp) > 0
    # pd.testing.assert_frame_equal(obs.reset_index(drop=True),
    #                               exp.reset_index(drop=True))

def test_aggregate_region_extra_subregion():
    cols = ['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010]
    data = pd.DataFrame([
        ['model_a', 'scen_a', 'foo', 'Primary Energy', 'EJ/y', 1, 6],
        ['model_a', 'scen_a', 'bar', 'Primary Energy', 'EJ/y', 0.75, 5]],
        columns=cols)
    df = IamDataFrame(data=data)
    obs = df.aggregate_region(variable='Primary Energy',
                              region='R5ASIA',
                              subregions=['foo', 'bar', 'baz'],
                              components=[], append=False)
    assert len(obs) == 2

def test_aggregate_region_missing_all_subregions():
    cols = ['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010]
    data = pd.DataFrame([
        ['model_a', 'scen_a', 'foo', 'Primary Energy', 'EJ/y', 1, 6],
        ['model_a', 'scen_a', 'bar', 'Primary Energy', 'EJ/y', 0.75, 5]],
        columns=cols)
    df = IamDataFrame(data=data)
    obs = df.aggregate_region(variable='Primary Energy',
                              region='R5ASIA',
                              subregions=['China', 'Vietnam', 'Japan']
                              )
    assert len(obs) == 0

def test_do_aggregate_append(test_df):
    test_df.rename({'variable': {'Primary Energy': 'Primary Energy|Gas'}},
                   inplace=True)
    test_df.aggregate('Primary Energy', append=True)
    df = test_df.filter(variable='Primary Energy')

    times = [2005, 2010] if "year" in test_df.data else TEST_DTS
    exp = pd.DataFrame([
        ['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 1.5, 9.],
        ['model_a', 'scen_b', 'World', 'Primary Energy', 'EJ/y', 2, 7],
    ],
        columns=['model', 'scenario', 'region', 'variable', 'unit'] + times
    ).set_index(IAMC_IDX)
    if "year" in test_df.data:
        exp.columns = list(map(int, exp.columns))
    else:
        df.data.time = df.data.time.dt.normalize()
        exp.columns = pd.to_datetime(exp.columns)

    pd.testing.assert_frame_equal(df.timeseries(), exp)

def test_aggregate_unknown_method(reg_df):
    pytest.raises(ValueError, reg_df.aggregate_region, 'Primary Energy',
                  method='foo')

def test_check_aggregate_pass(check_aggregate_df):
    obs = check_aggregate_df.filter(
        scenario='a_scen'
    ).check_aggregate('Primary Energy')
    assert obs is None

def test_check_internal_consistency_no_world_for_variable(
    check_aggregate_df, caplog
):
    assert check_aggregate_df.check_internal_consistency() is None
    test_df = check_aggregate_df.filter(
        variable='Emissions|CH4', region='World', keep=False
    )
    caplog.set_level(logging.INFO, logger="pyam.core")
    test_df.check_internal_consistency()
    warn_idx = caplog.messages.index("variable `Emissions|CH4` does not exist "
                                     "in region `World`")
    assert caplog.records[warn_idx].levelname == "INFO"

def test_check_aggregate_fail(test_df):
    obs = test_df.check_aggregate('Primary Energy', exclude_on_fail=True)
    assert len(obs.columns) == 2
    assert obs.index.get_values()[0] == (
        'model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y'
    )

def test_check_aggregate_top_level(test_df):
    obs = check_aggregate(test_df, variable='Primary Energy', year=2005)
    assert len(obs.columns) == 1
    assert obs.index.get_values()[0] == (
        'model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y'
    )

def test_df_check_aggregate_pass(check_aggregate_df):
    obs = check_aggregate_df.check_aggregate('Primary Energy')
    assert obs is None

    for variable in check_aggregate_df.variables():
        obs = check_aggregate_df.check_aggregate(variable)
        assert obs is None

def test_df_check_aggregate_region_pass(check_aggregate_df):
    comp = dict(components=True)
    obs = check_aggregate_df.check_aggregate_region('Primary Energy', **comp)
    assert obs is None

    for variable in check_aggregate_df.variables():
        obs = check_aggregate_df.check_aggregate_region(variable, **comp)
        assert obs is None

def run_check_agg_fail(pyam_df, tweak_dict, test_type):
    mr = pyam_df.data.model == tweak_dict['model']
    sr = pyam_df.data.scenario == tweak_dict['scenario']
    rr = pyam_df.data.region == tweak_dict['region']
    vr = pyam_df.data.variable == tweak_dict['variable']
    ur = pyam_df.data.unit == tweak_dict['unit']

    row_to_tweak = mr & sr & rr & vr & ur
    assert row_to_tweak.any()

    pyam_df.data.value.iloc[np.where(row_to_tweak)[0]] *= 0.99

    # the error variable is always the top level one
    expected_index = tweak_dict
    agg_test = test_type == 'aggregate'
    region_world_only_contrib = test_type == 'region-world-only-contrib'
    if agg_test or region_world_only_contrib:
        expected_index['variable'] = '|'.join(
            expected_index['variable'].split('|')[:2]
        )
    elif 'region' in test_type:
        expected_index['region'] = 'World'

    expected_index = [v for k, v in expected_index.items()]

    for variable in pyam_df.variables():
        if test_type == 'aggregate':
            obs = pyam_df.check_aggregate(
                variable,
            )
        elif 'region' in test_type:
            obs = pyam_df.check_aggregate_region(
                variable, components=True
            )

        if obs is not None:
            assert len(obs.columns) == 2
            assert set(obs.index.get_values()[0]) == set(expected_index)

def test_df_check_aggregate_fail(check_aggregate_df):
    to_tweak = {
        'model': 'IMG',
        'scenario': 'a_scen_2',
        'region': 'R5REF',
        'variable': 'Emissions|CO2',
        'unit': 'Mt CO2/yr',
    }
    run_check_agg_fail(check_aggregate_df, to_tweak, 'aggregate')

def test_df_check_aggregate_fail_no_regions(check_aggregate_df):
    to_tweak = {
        'model': 'MSG-GLB',
        'scenario': 'a_scen_2',
        'region': 'World',
        'variable': 'Emissions|C2F6|Solvents',
        'unit': 'kt C2F6/yr',
    }
    run_check_agg_fail(check_aggregate_df, to_tweak, 'aggregate')

def test_df_check_aggregate_region_fail(check_aggregate_df):
    to_tweak = {
        'model': 'IMG',
        'scenario': 'a_scen_2',
        'region': 'World',
        'variable': 'Emissions|CO2',
        'unit': 'Mt CO2/yr',
    }
    run_check_agg_fail(check_aggregate_df, to_tweak, 'region')

def test_df_check_aggregate_region_fail_no_subsector(check_aggregate_df):
    to_tweak = {
        'model': 'MSG-GLB',
        'scenario': 'a_scen_2',
        'region': 'R5REF',
        'variable': 'Emissions|CH4',
        'unit': 'Mt CH4/yr',
    }
    run_check_agg_fail(check_aggregate_df, to_tweak, 'region')

def test_df_check_aggregate_region_fail_world_only_var(check_aggregate_df):
    to_tweak = {
        'model': 'MSG-GLB',
        'scenario': 'a_scen_2',
        'region': 'World',
        'variable': 'Emissions|CO2|Agg Agg',
        'unit': 'Mt CO2/yr',
    }

    run_check_agg_fail(
        check_aggregate_df, to_tweak, 'region-world-only-contrib'
    )

def test_df_check_aggregate_region_errors(check_aggregate_regional_df):
    # these tests should fail because our dataframe has continents and regions
    # so checking without providing components leads to double counting and
    # hence failure
    obs = check_aggregate_regional_df.check_aggregate_region(
        'Emissions|N2O', 'World'
    )

    assert len(obs.columns) == 2
    assert obs.index.get_values()[0] == (
        'AIM', 'cscen', 'World', 'Emissions|N2O', 'Mt N/yr'
    )

    obs = check_aggregate_regional_df.check_aggregate_region(
        'Emissions|N2O', 'REUROPE'
    )

    assert len(obs.columns) == 2
    assert obs.index.get_values()[0] == (
        'AIM', 'cscen', 'REUROPE', 'Emissions|N2O', 'Mt N/yr'
    )

def test_df_check_aggregate_region_components(check_aggregate_regional_df):
    obs = check_aggregate_regional_df.check_aggregate_region(
        'Emissions|N2O', 'World', subregions=['REUROPE', 'RASIA'],
        components=True
    )
    assert obs is None

    obs = check_aggregate_regional_df.check_aggregate_region(
        'Emissions|N2O|Ind|Solvents', 'World', subregions=['REUROPE', 'RASIA'],
        components=True
    )
    assert obs is None

    obs = check_aggregate_regional_df.check_aggregate_region(
        'Emissions|N2O', 'REUROPE', subregions=['Germany', 'UK'],
        components=True
    )
    assert obs is None

    obs = check_aggregate_regional_df.check_aggregate_region(
        'Emissions|N2O', 'RASIA', subregions=['China', 'Japan'],
        components=True
    )
    assert obs is None

    obs = check_aggregate_regional_df.check_aggregate_region(
        'Emissions|N2O|Ind|Transport', 'REUROPE', subregions=['Germany', 'UK'],
        components=True
    )
    assert obs is None

@pytest.mark.parametrize("components,exp_vals", (
    # should find sub-components including nested bunkers
    (True, [1.9, 15.7]),
    # should only add AFOLU onto regional sum, not Shipping emissions
    (["Emissions|N2O|AFOLU"], [0.9, 9.7]),
    # specifying Ind leads to double counting (and not skipping AFOLU) but as
    # it's user specified no warning etc. is raised
    (["Emissions|N2O|Ind"], [2.6, 25.2]),
))
def test_aggregate_region_components_handling(check_aggregate_regional_df,
                                              components, exp_vals):
    tdf = check_aggregate_regional_df.filter(variable="*N2O*")
    # only get Europe and Asia to avoid double counting
    res = tdf.aggregate_region("Emissions|N2O", components=components,
                               subregions=["REUROPE", "RASIA"])
    exp_idx = pd.MultiIndex.from_product(
        [["AIM"], ["cscen"], ['Emissions|N2O'], ["Mt N/yr"], [2005, 2010]],
        names=["model", "scenario", "variable", "unit", "year"]
    )
    exp = pd.Series(exp_vals, index=exp_idx)
    exp.name = "value"

    pd.testing.assert_series_equal(res, exp)

def test_check_aggregate_region_no_world(check_aggregate_regional_df, caplog):
    test_df = check_aggregate_regional_df.filter(region='World', keep=False)
    caplog.set_level(logging.INFO, logger="pyam.core")
    test_df.check_aggregate_region('Emissions|N2O', region='World')
    warn_idx = caplog.messages.index("variable `Emissions|N2O` does not exist "
                                     "in region `World`")
    assert caplog.records[warn_idx].levelname == "INFO"
# conftest.py
# has to go first for environment setup reasons
import matplotlib
matplotlib.use('agg')

import os
import pytest
import pandas as pd

from datetime import datetime
from pyam import IamDataFrame, IAMC_IDX

here = os.path.dirname(os.path.realpath(__file__))
IMAGE_BASELINE_DIR = os.path.join(here, 'expected_figs')
TEST_DATA_DIR = os.path.join(here, 'data')

TEST_DF = pd.DataFrame([
    ['model_a', 'scen_a', 'World', 'Primary Energy', 'EJ/y', 1, 6.],
    ['model_a', 'scen_a', 'World', 'Primary Energy|Coal', 'EJ/y', 0.5, 3],
    ['model_a', 'scen_b', 'World', 'Primary Energy', 'EJ/y', 2, 7],
],
    columns=IAMC_IDX + [2005, 2010],
)

FULL_FEATURE_DF = pd.DataFrame([
    ['World', 'Primary Energy', 'EJ/y', 12, 15],
    ['reg_a', 'Primary Energy', 'EJ/y', 8, 9],
    ['reg_b', 'Primary Energy', 'EJ/y', 4, 6],
    ['World', 'Primary Energy|Coal', 'EJ/y', 9, 10],
    ['reg_a', 'Primary Energy|Coal', 'EJ/y', 6, 6],
    ['reg_b', 'Primary Energy|Coal', 'EJ/y', 3, 4],
    ['World', 'Primary Energy|Wind', 'EJ/y', 3, 5],
    ['reg_a', 'Primary Energy|Wind', 'EJ/y', 2, 3],
    ['reg_b', 'Primary Energy|Wind', 'EJ/y', 1, 2],
    ['World', 'Emissions|CO2', 'EJ/y', 10, 14],
    ['World', 'Emissions|CO2|Energy', 'EJ/y', 6, 8],
    ['World', 'Emissions|CO2|AFOLU', 'EJ/y', 3, 4],
    ['World', 'Emissions|CO2|Bunkers', 'EJ/y', 1, 2],
    ['reg_a', 'Emissions|CO2', 'EJ/y', 6, 8],
    ['reg_a', 'Emissions|CO2|Energy', 'EJ/y', 4, 5],
    ['reg_a', 'Emissions|CO2|AFOLU', 'EJ/y', 2, 3],
    ['reg_b', 'Emissions|CO2', 'EJ/y', 3, 4],
    ['reg_b', 'Emissions|CO2|Energy', 'EJ/y', 2, 3],
    ['reg_b', 'Emissions|CO2|AFOLU', 'EJ/y', 1, 1],
    ['World', 'Price|Carbon', 'USD/tCO2', 4, 27],
    ['reg_a', 'Price|Carbon', 'USD/tCO2', 1, 30],
    ['reg_b', 'Price|Carbon', 'USD/tCO2', 10, 21],
    ['World', 'Population', 'm', 3, 5],
    ['reg_a', 'Population', 'm', 2, 3],
    ['reg_b', 'Population', 'm', 1, 2],
],
    columns=['region', 'variable', 'unit', 2005, 2010],
)

REG_DF = pd.DataFrame([
    ['IMAGE', 'a_scenario', 'NAF', 'Primary Energy', 'EJ/y', 1, 6],
    ['IMAGE', 'a_scenario', 'ME', 'Primary Energy', 'EJ/y', 2, 7],
    ['IMAGE', 'a_scenario', 'World', 'Primary Energy', 'EJ/y', 3, 13],
    ['MESSAGE-GLOBIOM', 'a_scenario', 'MEA', 'Primary Energy', 'EJ/y', 1, 6],
    ['MESSAGE-GLOBIOM', 'a_scenario', 'AFR', 'Primary Energy', 'EJ/y', 2, 7],
    ['MESSAGE-GLOBIOM', 'a_scenario', 'World', 'Primary Energy', 'EJ/y', 3, 13],
],
    columns=IAMC_IDX + [2005, 2010],
)

mg_ascen = ['MSG-GLB', 'a_scen']
mg_ascen_2 = ['MSG-GLB', 'a_scen_2']
CHECK_AGG_DF = pd.DataFrame([
    ['IMG', 'a_scen', 'R5ASIA', 'Primary Energy', 'EJ/y', 1, 6],
    ['IMG', 'a_scen', 'R5ASIA', 'Primary Energy|Coal', 'EJ/y', 0.75, 5],
    ['IMG', 'a_scen', 'R5ASIA', 'Primary Energy|Gas', 'EJ/y', 0.25, 1],
    ['IMG', 'a_scen', 'R5ASIA', 'Emissions|CO2', 'Mt CO2/yr', 3, 8],
    ['IMG', 'a_scen', 'R5ASIA', 'Emissions|CO2|Cars', 'Mt CO2/yr', 1, 3],
    ['IMG', 'a_scen', 'R5ASIA', 'Emissions|CO2|Tar', 'Mt CO2/yr', 2, 5],
    ['IMG', 'a_scen', 'R5REF', 'Primary Energy', 'EJ/y', 0.3, 0.6],
    ['IMG', 'a_scen', 'R5REF', 'Primary Energy|Coal', 'EJ/y', 0.15, 0.4],
    ['IMG', 'a_scen', 'R5REF', 'Primary Energy|Gas', 'EJ/y', 0.15, 0.2],
    ['IMG', 'a_scen', 'R5REF', 'Emissions|CO2', 'Mt CO2/yr', 1, 1.4],
    ['IMG', 'a_scen', 'R5REF', 'Emissions|CO2|Cars', 'Mt CO2/yr', 0.6, 0.8],
    ['IMG', 'a_scen', 'R5REF', 'Emissions|CO2|Tar', 'Mt CO2/yr', 0.4, 0.6],
    ['IMG', 'a_scen', 'World', 'Primary Energy', 'EJ/y', 1.3, 6.6],
    ['IMG', 'a_scen', 'World', 'Primary Energy|Coal', 'EJ/y', 0.9, 5.4],
    ['IMG', 'a_scen', 'World', 'Primary Energy|Gas', 'EJ/y', 0.4, 1.2],
    ['IMG', 'a_scen', 'World', 'Emissions|CO2', 'Mt CO2/yr', 4, 9.4],
    ['IMG', 'a_scen', 'World', 'Emissions|CO2|Cars', 'Mt CO2/yr', 1.6, 3.8],
    ['IMG', 'a_scen', 'World', 'Emissions|CO2|Tar', 'Mt CO2/yr', 2.4, 5.6],
    ['IMG', 'a_scen_2', 'R5ASIA', 'Primary Energy', 'EJ/y', 1.4, 6.4],
    ['IMG', 'a_scen_2', 'R5ASIA', 'Primary Energy|Coal', 'EJ/y', 0.95, 5.2],
    ['IMG', 'a_scen_2', 'R5ASIA', 'Primary Energy|Gas', 'EJ/y', 0.45, 1.2],
    ['IMG', 'a_scen_2', 'R5ASIA', 'Emissions|CO2', 'Mt CO2/yr', 3.4, 8.4],
    ['IMG', 'a_scen_2', 'R5ASIA', 'Emissions|CO2|Cars', 'Mt CO2/yr', 1.2, 3.2],
    ['IMG', 'a_scen_2', 'R5ASIA', 'Emissions|CO2|Tar', 'Mt CO2/yr', 2.2, 5.2],
    ['IMG', 'a_scen_2', 'R5REF', 'Primary Energy', 'EJ/y', 0.7, 1.0],
    ['IMG', 'a_scen_2', 'R5REF', 'Primary Energy|Coal', 'EJ/y', 0.35, 0.6],
    ['IMG', 'a_scen_2', 'R5REF', 'Primary Energy|Gas', 'EJ/y', 0.35, 0.4],
    ['IMG', 'a_scen_2', 'R5REF', 'Emissions|CO2', 'Mt CO2/yr', 1.4, 1.8],
    ['IMG', 'a_scen_2', 'R5REF', 'Emissions|CO2|Cars', 'Mt CO2/yr', 0.8, 1.0],
    ['IMG', 'a_scen_2', 'R5REF', 'Emissions|CO2|Tar', 'Mt CO2/yr', 0.6, 0.8],
    ['IMG', 'a_scen_2', 'World', 'Primary Energy', 'EJ/y', 2.1, 7.4],
    ['IMG', 'a_scen_2', 'World', 'Primary Energy|Coal', 'EJ/y', 1.3, 5.8],
    ['IMG', 'a_scen_2', 'World', 'Primary Energy|Gas', 'EJ/y', 0.8, 1.6],
    ['IMG', 'a_scen_2', 'World', 'Emissions|CO2', 'Mt CO2/yr', 4.8, 10.2],
    ['IMG', 'a_scen_2', 'World', 'Emissions|CO2|Cars', 'Mt CO2/yr', 2.0, 4.2],
    ['IMG', 'a_scen_2', 'World', 'Emissions|CO2|Tar', 'Mt CO2/yr', 2.8, 6.0],
    mg_ascen + ['R5ASIA', 'Primary Energy', 'EJ/y', 0.8, 5.8],
    mg_ascen + ['R5ASIA', 'Primary Energy|Coal', 'EJ/y', 0.65, 4.9],
    mg_ascen + ['R5ASIA', 'Primary Energy|Gas', 'EJ/y', 0.15, 0.9],
    mg_ascen + ['R5ASIA', 'Emissions|CO2', 'Mt CO2/yr', 2.8, 7.8],
    mg_ascen + ['R5ASIA', 'Emissions|CO2|Cars', 'Mt CO2/yr', 0.9, 2.9],
    mg_ascen + ['R5ASIA', 'Emissions|CO2|Tar', 'Mt CO2/yr', 1.9, 4.9],
    mg_ascen + ['R5REF', 'Primary Energy', 'EJ/y', 0.1, 0.4],
    mg_ascen + ['R5REF', 'Primary Energy|Coal', 'EJ/y', 0.05, 0.3],
    mg_ascen + ['R5REF', 'Primary Energy|Gas', 'EJ/y', 0.05, 0.1],
    mg_ascen + ['R5REF', 'Emissions|CO2', 'Mt CO2/yr', 0.8, 1.2],
    mg_ascen + ['R5REF', 'Emissions|CO2|Cars', 'Mt CO2/yr', 0.5, 0.7],
    mg_ascen + ['R5REF', 'Emissions|CO2|Tar', 'Mt CO2/yr', 0.3, 0.5],
    mg_ascen + ['World', 'Primary Energy', 'EJ/y', 0.9, 6.2],
    mg_ascen + ['World', 'Primary Energy|Coal', 'EJ/y', 0.7, 5.2],
    mg_ascen + ['World', 'Primary Energy|Gas', 'EJ/y', 0.2, 1.0],
    mg_ascen + ['World', 'Emissions|CO2', 'Mt CO2/yr', 3.6, 9.0],
    mg_ascen + ['World', 'Emissions|CO2|Cars', 'Mt CO2/yr', 1.4, 3.6],
    mg_ascen + ['World', 'Emissions|CO2|Tar', 'Mt CO2/yr', 2.2, 5.4],
    mg_ascen_2 + ['R5ASIA', 'Primary Energy', 'EJ/y', -1.4, -6.4],
    mg_ascen_2 + ['R5ASIA', 'Primary Energy|Coal', 'EJ/y', -0.95, -5.2],
    mg_ascen_2 + ['R5ASIA', 'Primary Energy|Gas', 'EJ/y', -0.45, -1.2],
    mg_ascen_2 + ['R5ASIA', 'Emissions|CO2', 'Mt CO2/yr', -3.4, -8.4],
    mg_ascen_2 + ['R5ASIA', 'Emissions|CO2|Cars', 'Mt CO2/yr', -1.2, -3.2],
    mg_ascen_2 + ['R5ASIA', 'Emissions|CO2|Tar', 'Mt CO2/yr', -2.2, -5.2],
    mg_ascen_2 + ['R5REF', 'Primary Energy', 'EJ/y', -0.7, -1.0],
    mg_ascen_2 + ['R5REF', 'Primary Energy|Coal', 'EJ/y', -0.35, -0.6],
    mg_ascen_2 + ['R5REF', 'Primary Energy|Gas', 'EJ/y', -0.35, -0.4],
    mg_ascen_2 + ['R5REF', 'Emissions|CO2', 'Mt CO2/yr', -1.4, -1.8],
    mg_ascen_2 + ['R5REF', 'Emissions|CO2|Cars', 'Mt CO2/yr', -0.8, -1.0],
    mg_ascen_2 + ['R5REF', 'Emissions|CO2|Tar', 'Mt CO2/yr', -0.6, -0.8],
    mg_ascen_2 + ['World', 'Primary Energy', 'EJ/y', -2.1, -7.4],
    mg_ascen_2 + ['World', 'Primary Energy|Coal', 'EJ/y', -1.3, -5.8],
    mg_ascen_2 + ['World', 'Primary Energy|Gas', 'EJ/y', -0.8, -1.6],
    mg_ascen_2 + ['World', 'Emissions|CO2', 'Mt CO2/yr', -5.0, -10.6],
    mg_ascen_2 + ['World', 'Emissions|CO2|Cars', 'Mt CO2/yr', -2.0, -4.2],
    mg_ascen_2 + ['World', 'Emissions|CO2|Tar', 'Mt CO2/yr', -2.8, -6.0],
    mg_ascen_2 + ['World', 'Emissions|CO2|Agg Agg', 'Mt CO2/yr', -0.2, -0.4],
    mg_ascen_2 + ['World', 'Emissions|CF4', 'kt CF4/yr', 54, 56],
    mg_ascen_2 + ['World', 'Emissions|C2F6', 'kt C2F6/yr', 32, 27],
    mg_ascen_2 + ['World', 'Emissions|C2F6|Solvents', 'kt C2F6/yr', 30, 33],
    mg_ascen_2 + ['World', 'Emissions|C2F6|Industry', 'kt C2F6/yr', 2, -6],
    mg_ascen_2 + ['World', 'Emissions|CH4', 'Mt CH4/yr', 322, 217],
    mg_ascen_2 + ['R5REF', 'Emissions|CH4', 'Mt CH4/yr', 30, 201],
    mg_ascen_2 + ['R5ASIA', 'Emissions|CH4', 'Mt CH4/yr', 292, 16],
],
    columns=['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010],
)

ms = ['AIM', 'cscen']
CHECK_AGG_REGIONAL_DF = pd.DataFrame([
    ms + ['World', 'Emissions|N2O', 'Mt N/yr', 1.9, 15.7],
    ms + ['World', 'Emissions|N2O|AFOLU', 'Mt N/yr', 0.1, 0.1],
    ms + ['World', 'Emissions|N2O|Ind', 'Mt N/yr', 1.8, 15.6],
    ms + ['World', 'Emissions|N2O|Ind|Shipping', 'Mt N/yr', 1, 6],
    ms + ['World', 'Emissions|N2O|Ind|Solvents', 'Mt N/yr', 1.6, 3.8],
    ms + ['World', 'Emissions|N2O|Ind|Transport', 'Mt N/yr', -0.8, 5.8],
    ms + ['RASIA', 'Emissions|N2O', 'Mt N/yr', 0, 5.9],
    ms + ['RASIA', 'Emissions|N2O|Ind', 'Mt N/yr', 0, 5.9],
    ms + ['RASIA', 'Emissions|N2O|Ind|Solvents', 'Mt N/yr', 0.8, 2.6],
    ms + ['RASIA', 'Emissions|N2O|Ind|Transport', 'Mt N/yr', -0.8, 3.3],
    ms + ['REUROPE', 'Emissions|N2O', 'Mt N/yr', 0.8, 3.7],
    ms + ['REUROPE', 'Emissions|N2O|Ind', 'Mt N/yr', 0.8, 3.7],
    ms + ['REUROPE', 'Emissions|N2O|Ind|Solvents', 'Mt N/yr', 0.8, 1.2],
    ms + ['REUROPE', 'Emissions|N2O|Ind|Transport', 'Mt N/yr', 0, 2.5],
    ms + ['China', 'Emissions|N2O', 'Mt N/yr', 0.2, 1.3],
    ms + ['China', 'Emissions|N2O|Ind', 'Mt N/yr', 0.2, 1.3],
    ms + ['China', 'Emissions|N2O|Ind|Transport', 'Mt N/yr', 0.2, 1.3],
    ms + ['Japan', 'Emissions|N2O', 'Mt N/yr', -1, 2],
    ms + ['Japan', 'Emissions|N2O|Ind', 'Mt N/yr', -1, 2],
    ms + ['Japan', 'Emissions|N2O|Ind|Transport', 'Mt N/yr', -1, 2],
    ms + ['Germany', 'Emissions|N2O', 'Mt N/yr', 2, 3],
    ms + ['Germany', 'Emissions|N2O|Ind', 'Mt N/yr', 2, 3],
    ms + ['Germany', 'Emissions|N2O|Ind|Transport', 'Mt N/yr', 2, 3],
    ms + ['UK', 'Emissions|N2O', 'Mt N/yr', -2, -0.5],
    ms + ['UK', 'Emissions|N2O|Ind', 'Mt N/yr', -2, -0.5],
    ms + ['UK', 'Emissions|N2O|Ind|Transport', 'Mt N/yr', -2, -0.5],

],
    columns=['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010],
)

TEST_STACKPLOT_DF = pd.DataFrame([
    ['World', 'Emissions|CO2|Energy|Oil', 'Mt CO2/yr', 2, 3.2, 2.0, 1.8],
    ['World', 'Emissions|CO2|Energy|Gas', 'Mt CO2/yr', 1.3, 1.6, 1.0, 0.7],
    ['World', 'Emissions|CO2|Energy|BECCS', 'Mt CO2/yr', 0.0, 0.4, -0.4, 0.3],
    ['World', 'Emissions|CO2|Cars', 'Mt CO2/yr', 1.6, 3.8, 3.0, 2.5],
    ['World', 'Emissions|CO2|Tar', 'Mt CO2/yr', 0.3, 0.35, 0.35, 0.33],
    ['World', 'Emissions|CO2|Agg', 'Mt CO2/yr', 0.5, -0.1, -0.5, -0.7],
    ['World', 'Emissions|CO2|LUC', 'Mt CO2/yr', -0.3, -0.6, -1.2, -1.0]
],
    columns=['region', 'variable', 'unit', 2005, 2010, 2015, 2020],
)
# appease stickler
TEST_STACKPLOT_DF['model'] = 'IMG'
TEST_STACKPLOT_DF['scenario'] = 'a_scen'

TEST_YEARS = [2005, 2010]
TEST_DTS = [datetime(2005, 6, 17), datetime(2010, 7, 21)]
TEST_TIME_STR = ['2005-06-17', '2010-07-21']
TEST_TIME_STR_HR = ['2005-06-17 00:00:00', '2010-07-21 12:00:00']

# minimal IamDataFrame with four different time formats
@pytest.fixture(
    scope="function",
    params=[
        TEST_YEARS,
        TEST_DTS,
        TEST_TIME_STR,
        TEST_TIME_STR_HR
    ]
)
def test_df(request):
    tdf = TEST_DF.rename({2005: request.param[0], 2010: request.param[1]},
                         axis="columns")
    df = IamDataFrame(data=tdf)
    yield df

# minimal IamDataFrame for specifically testing 'year'-column features
@pytest.fixture(scope="function")
def test_df_year():
    df = IamDataFrame(data=TEST_DF)
    yield df

# minimal test data provided as pandas.DataFrame (only 'year' time format)
@pytest.fixture(scope="function")
def test_pd_df():
    yield TEST_DF.copy()

# IamDataFrame with variable-and-region-structure for testing aggregation tools
@pytest.fixture(scope="function")
def aggregate_df():
    df = IamDataFrame(model='model_a', scenario='scen_a', data=FULL_FEATURE_DF)
    yield df

@pytest.fixture(scope="function")
def check_aggregate_df():
    df = IamDataFrame(data=CHECK_AGG_DF)
    yield df

@pytest.fixture(scope="function")
def check_aggregate_regional_df():
    df = IamDataFrame(data=CHECK_AGG_REGIONAL_DF)
    yield df

@pytest.fixture(scope="function")
def reg_df():
    df = IamDataFrame(data=REG_DF)
    yield df

@pytest.fixture(scope="session")
def plot_df():
    df = IamDataFrame(data=os.path.join(TEST_DATA_DIR, 'plot_data.csv'))
    yield df

@pytest.fixture(scope="session")
def plot_stack_plot_df():
    df = IamDataFrame(TEST_STACKPLOT_DF)
    yield df
danielhuppmann commented 4 years ago

Thanks @znicholls - though I didn't just remove all aggregation tests, most of them were just refactored (and ordered so that it's all tests for [check_]aggregate(), then [check_]aggregate_region(), then check_internal_consistency(), with the same order of features tested).

A useful first step is to make a list of all tests that were removed, with a line or two indicating the feature or specific corner case that was covered by the test (which was sometimes not obvious to me). I can then go through and indicate whether this is covered in one of the new tests or if it was dropped. Then we can re-insert those tests that were indeed dropped during the refactoring.

gidden commented 4 years ago

I agree on the first step - identifying which tests were not moved over during the refactor. I can try to help then put those missing back in during my now very productive train rides between Berlin and Vienna =). When we get a list here, I'll check back in.