Deltares-research / kenmerkendewaarden

Derive indicators from waterlevel measurements
https://deltares-research.github.io/kenmerkendewaarden/
GNU General Public License v3.0
1 stars 0 forks source link

combined exceedance line shows odd pattern when including more data #37

Closed veenstrajelmer closed 3 months ago

veenstrajelmer commented 3 months ago

The blend_distributions function does not behave as expected when including more years of data. This is since all values except the last 100 are included, while this should be fixed to a certain frequency. The result is that with long timeseries the weibull line is too short and therefor the combined line shows odd patterns.

import os
import pandas as pd
import matplotlib.pyplot as plt
plt.close('all')
import hatyan
import kenmerkendewaarden as kw # pip install git+https://github.com/Deltares-research/kenmerkendewaarden
from kenmerkendewaarden.overschrijding import blend_distributions # TODO: make public function if required

dir_base = r'p:\11210325-005-kenmerkende-waarden\work'
dir_meas = os.path.join(dir_base,'measurements_wl_18700101_20240101') # this data is retrieved from the DDL with ddlpy
dir_vali_overschr = os.path.join(dir_base,'data_overschrijding') # TODO: this data is not reproducible yet

stat_list = ['HOEKVHLD']

for current_station in stat_list:

    print(f'loading data for {current_station}')
    data_pd_HWLW_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True)

    # this works as expected
    data_pd_HWLW_sel = data_pd_HWLW_all.loc["1970":"2012"]
    # this works as expected
    data_pd_HWLW_sel = data_pd_HWLW_all.loc["1970":"2021-01-01"]
    # this gives strange pattern, probably due to df_trend.iloc[:-100] in blend_distribution
    data_pd_HWLW_sel = data_pd_HWLW_all.loc["1900":"2021-01-01"]

    data_pd_measext = hatyan.calc_HWLW12345to12(data_pd_HWLW_sel) #convert 12345 to 12 by taking minimum of 345 as 2 (laagste laagwater)

    ###OVERSCHRIJDINGSFREQUENTIES
    Tfreqs_interested = [5, 2, 1, 1/2, 1/5, 1/10, 1/20, 1/50, 1/100, 1/200, #overschrijdingsfreqs
                         1/500, 1/1000, 1/2000, 1/4000, 1/5000, 1/10000]

    print(f'overschrijdingsfrequenties for {current_station}')
    data_pd_HW = data_pd_measext.loc[data_pd_measext['HWLWcode']==1]
    data_pd_LW = data_pd_measext.loc[data_pd_measext['HWLWcode']!=1]

    #get Hydra-NL and KWK-RMM validation data (only for HOEKVHLD)
    dist_vali_exc = {}
    dist_vali_dec = {}
    if current_station =='HOEKVHLD':
        stat_name = 'Hoek_van_Holland'
        print('Load Hydra-NL distribution data and other validation data')
        dist_vali_exc['Hydra-NL'] = pd.read_csv(os.path.join(dir_vali_overschr,'Processed_HydraNL','Without_model_uncertainty',f'{stat_name}.csv'), sep=';', header=[0])
        dist_vali_exc['Hydra-NL']['values'] /= 100 # cm to m
        dist_vali_exc['Hydra-NL met modelonzekerheid'] = pd.read_csv(os.path.join(dir_vali_overschr,'Processed_HydraNL','With_model_uncertainty',f'{stat_name}_with_model_uncertainty.csv'), sep=';', header=[0])
        dist_vali_exc['Hydra-NL met modelonzekerheid']['values'] /= 100 # cm to m

    #set station rules
    station_rule_type = 'break'
    station_break_value = data_pd_measext.index.min()

    # 1. Exceedance
    print('Exceedance') #TODO: hatyan.get_weibull.der_pfunc() throws "RuntimeWarning: invalid value encountered in double_scalars"
    dist_exc = kw.compute_overschrijding(data_pd_HW, rule_type=station_rule_type, rule_value=station_break_value)
    dist_exc.update(dist_vali_exc)

    # blend again
    dist_exc['Gecombineerd'] = blend_distributions(dist_exc['Trendanalyse'].copy(),
                                                   dist_exc['Weibull'].copy(),
                                                   dist_exc['Hydra-NL'].copy(), 
                                                   )

    df_interp = kw.interpolate_interested_Tfreqs(dist_exc['Gecombineerd'], Tfreqs=Tfreqs_interested)

    fig, ax = kw.plot_distributions(dist_exc, name=current_station, color_map='default')
    ax.set_ylim(0,5.5)

    fig, ax = kw.plot_distributions(dist_exc, name=current_station, color_map='default')
    ax.set_ylim(2.1,2.6)
    ax.set_xlim(1.5,0.5)

Gives: image image

Todo: