ArtesiaWater / hydropandas

Module for loading observation data into custom DataFrames
https://hydropandas.readthedocs.io
MIT License
52 stars 10 forks source link

Support Quality Controlled Rain Gauge Data from KNMI Data Platform #203

Open martinvonk opened 3 months ago

martinvonk commented 3 months ago

KNMI Data Platform provides Quality Controlled Rain Gauge Data.

We can read KNMI Data Platform with NLMOD and the Quality Controlled Rain Gauge Data with the FEWS reader in HydroPandas. I am not suggesting that we include this per se. But it could be a nice addition to the existing KNMI data. I could not find any metadata on the stations locations so that is a problem if we don't know the locations beforehand..

Example Script:


# %%
import nlmod
import nlmod.read.knmi_data_platform as kdp
import pandas as pd
from xml.etree import ElementTree as ET
from pathlib import Path
nlmod.util.get_color_logger("INFO")
import pastastore as pst
import matplotlib.pyplot as plt

#%%
dataset_names = [
    "watercompany_raingauge_quality_controlled_waternet",
    "watercompany_raingauge_quality_controlled_pwn",
    "waterboard_raingauge_quality_controlled_rijnland",
    "waterboard_raingauge_quality_controlled_noorderzijlvest",
    "waterboard_raingauge_quality_controlled_limburg",
    "waterboard_raingauge_quality_controlled_hhnk",
    "waterboard_raingauge_quality_controlled_delfland",
    "waterboard_raingauge_quality_controlled_dommel",
    "waterboard_raingauge_quality_controlled_hunzeenaas",
    "waterboard_raingauge_quality_controlled_aaenmaas",
    "waterboard_raingauge_quality_controlled_hdsr"
]

dataset_name_all = "waterboard_raingauge_quality_controlled_all_combined"

# %%
dataset_name = dataset_name_all
dataset_version = "1.0"
files = kdp.get_list_of_files(
    dataset_name=dataset_name,
    dataset_version=dataset_version,
)

# %%
kdp.download_files(
    dataset_name=dataset_name,
    dataset_version=dataset_version,
    fnames=files[-1:],
    dirname=dataset_name,
    api_key=own_api_key
)

#%%
dfiles = list(Path(dataset_name).glob("*.xml"))
tree = ET.parse(dfiles[0])
root = tree.getroot()

obsd = {}
for item in root:
    if item.tag.endswith("series"):
        header = {}
        date = []
        time = []
        events = []
        for subitem in item:
            if subitem.tag.endswith("header"):
                for subsubitem in subitem:
                    prop = subsubitem.tag.split("}")[-1]
                    val = subsubitem.text
                    if prop == "x" or prop == "y" or prop == "lat" or prop == "lon":
                        val = float(val)
                    header[prop] = val
            elif subitem.tag.endswith("event"):
                date.append(subitem.attrib.pop("date"))
                time.append(subitem.attrib.pop("time"))
                events.append({**subitem.attrib})
        index = pd.to_datetime(
            [d + " " + t for d, t in zip(date, time)], errors="coerce"
        )
        ts = pd.DataFrame(events, index=index)
    obsd[header["locationId"]] = {"header": header, "series": ts}