Deltares / ddlpy

API to Dutch Rijkswaterstaat archive (DDL, waterinfo.rws.nl) of monitoring water data
https://deltares.github.io/ddlpy/
GNU General Public License v3.0
19 stars 6 forks source link

Add timezone to `ddlpy.dataframe_to_xarray()` #105

Closed veenstrajelmer closed 3 months ago

veenstrajelmer commented 3 months ago

Description

When converting a measurements dataframe to xarray the timezone gets converted to UTC, this is not desireable. Example code:

import ddlpy

locations = ddlpy.locations()
bool_hoedanigheid = locations['Hoedanigheid.Code'].isin(['NAP'])
bool_stations = locations.index.isin(['HOEKVHLD', 'IJMDBTHVN','SCHEVNGN'])
bool_grootheid = locations['Grootheid.Code'].isin(['WATHTE'])
bool_groepering = locations['Groepering.Code'].isin(['NVT'])
selected = locations.loc[bool_grootheid & bool_hoedanigheid & bool_groepering & bool_stations]

start_date = "2019-01-01"
end_date = "2020-02-01"
measurements = ddlpy.measurements(selected.iloc[0], start_date, end_date)

ds = ddlpy.dataframe_to_xarray(measurements)
print(measurements.index[0])
print(ds.time.to_pandas().iloc[0])

Gives:

2019-01-01 01:00:00+01:00
2019-01-01 00:00:00

Todo:

Code for testing timezones:

import ddlpy

locations = ddlpy.locations()
bool_hoedanigheid = locations['Hoedanigheid.Code'].isin(['NAP'])
bool_stations = locations.index.isin(['HOEKVHLD', 'IJMDBTHVN','SCHEVNGN'])
bool_grootheid = locations['Grootheid.Code'].isin(['WATHTE'])
bool_groepering = locations['Groepering.Code'].isin(['NVT'])
selected = locations.loc[bool_grootheid & bool_hoedanigheid & bool_groepering & bool_stations]

start_date = "2019-01-01"
end_date = "2019-02-01"
measurements = ddlpy.measurements(selected.iloc[0], start_date, end_date)
print(measurements["Meetwaarde.Waarde_Numeriek"].iloc[0:1])

# measurements.index = measurements.index.tz_convert(None)

ds_clean = ddlpy.dataframe_to_xarray(measurements)
print()
# print(measurements.index[0])
# print(ds_clean.time.to_pandas().iloc[0])
print(ds_clean.time.encoding["units"])

# check if times and timezone are correct
refdate = str(measurements.index[0])
if measurements.index.tz is None:
    time0 = measurements.index[0]
else:
    time0 = measurements.tz_convert(None).index[0]
assert time0 == ds_clean.time.to_pandas().iloc[0]
# assert ds_clean.time.encoding['units'].endswith("+01:00")
assert ds_clean.time.encoding['units'] == f"minutes since {refdate}"

import os
import xarray as xr
import pandas as pd
date_str = str(pd.Timestamp.now()).replace(" ","_").replace(".","_").replace(":","")
file_nc = os.path.join(f"meas_with_timezone_{date_str}.nc")
ds_clean.to_netcdf(file_nc)
ds_fromfile = xr.open_dataset(file_nc, decode_times=False)
print(ds_fromfile.time.to_numpy())
print(ds_fromfile["Meetwaarde.Waarde_Numeriek"].to_pandas().iloc[0:1])