ec-jrc / pyPoseidon

Framework for Hydrodynamic simulations
https://pyposeidon.readthedocs.io/
European Union Public License 1.2
20 stars 8 forks source link

`pyposeidon.model.read()` is broken. #143

Closed pmav99 closed 1 year ago

pmav99 commented 1 year ago
python -c 'import pyposeidon.model as pmodel; pmodel.read("iceland/181001.00/schism_model.json")'

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/home/panos/Prog/poseidon/pyPoseidon/pyposeidon/model.py", line 103, in read
    info = pd.read_json(f, lines=True).T
  File "/home/panos/Prog/poseidon/pyPoseidon/.venv/lib/python3.10/site-packages/pandas/util/_decorators.py", line 211, in wrapper
    return func(*args, **kwargs)
  File "/home/panos/Prog/poseidon/pyPoseidon/.venv/lib/python3.10/site-packages/pandas/util/_decorators.py", line 331, in wrapper
    return func(*args, **kwargs)
  File "/home/panos/Prog/poseidon/pyPoseidon/.venv/lib/python3.10/site-packages/pandas/io/json/_json.py", line 757, in read_json
    return json_reader.read()
  File "/home/panos/Prog/poseidon/pyPoseidon/.venv/lib/python3.10/site-packages/pandas/io/json/_json.py", line 913, in read
    obj = self._get_object_parser(self._combine_lines(data_lines))
  File "/home/panos/Prog/poseidon/pyPoseidon/.venv/lib/python3.10/site-packages/pandas/io/json/_json.py", line 937, in _get_object_parser
    obj = FrameParser(json, **kwargs).parse()
  File "/home/panos/Prog/poseidon/pyPoseidon/.venv/lib/python3.10/site-packages/pandas/io/json/_json.py", line 1064, in parse
    self._parse_no_numpy()
  File "/home/panos/Prog/poseidon/pyPoseidon/.venv/lib/python3.10/site-packages/pandas/io/json/_json.py", line 1321, in _parse_no_numpy
    loads(json, precise_float=self.precise_float), dtype=None
ValueError: Expected object or value
pmav99 commented 1 year ago

Since this is not currently tested, we should create tests/test_model.py with the following contents:

import pyposeidon.model as pmodel
from pyposeidon.schism import Schism

# fmt: off
SCHISM_MODEL = """
{"geometry": null, "start_date": "2018-10-01T00:00:00", "end_date": "2018-10-04T00:00:00", "rdate": "2018-10-01T00:00:00", "tag": "schism", "tide": false, "atm": true, "monitor": false, "epath": null, "solver_name": "schism", "time_frame": "72H", "mesh_file": "base_model/hgrid.gr3", "meteo_source": ["/home/panos/Prog/poseidon/pyPoseidon/iceland/uvp_2018100100.grib"], "dem_source": "./dem.nc", "update": ["model", "meteo"], "rpath": "/home/panos/Prog/poseidon/pyPoseidon/iceland/181001.00/", "parameters": {"dt": 150, "rnday": 3.0, "nhot": 1, "ihot": 0, "nspool": 24, "ihfskip": 96, "nhot_write": 288}, "misc": {}, "lon_min": -25.0, "lon_max": -13.0, "lat_min": 61.0, "lat_max": 69.0, "params": {"core": {"ipre": 0, "ibc": 1, "ibtp": 1, "rnday": 3.0, "dt": 150, "msc2": 24, "mdc2": 30, "ntracer_gen": 2, "ntracer_age": 4, "sed_class": 5, "eco_class": 27, "nspool": 24, "ihfskip": 96}, "opt": {"ipre2": 0, "start_year": 2018, "start_month": 10, "start_day": 1, "start_hour": 0, "utc_start": 0, "ics": 2, "ihot": 0, "ieos_type": 0, "ieos_pres": 0, "eos_a": -0.1, "eos_b": 1001.0, "nramp": 1, "dramp": 1.0, "nrampbc": 0, "drampbc": 1.0, "iupwind_mom": 0, "indvel": 1, "ihorcon": 0, "hvis_coef0": 0.025, "ishapiro": 1, "niter_shap": 1, "shapiro0": 0.5, "thetai": 1.0, "icou_elfe_wwm": 0, "nstep_wwm": 1, "iwbl": 0, "hmin_radstress": 1.0, "nrampwafo": 0, "drampwafo": 1.0, "turbinj": 0.15, "imm": 0, "ibdef": 10, "slam0": -124, "sfea0": 45, "iunder_deep": 0, "h1_bcc": 50.0, "h2_bcc": 100.0, "hw_depth": 1000000.0, "hw_ratio": 0.5, "ihydraulics": 0, "if_source": 0, "nramp_ss": 1, "dramp_ss": 2, "ihdif": 0, "nchi": -1, "dzb_min": 0.5, "hmin_man": 1.0, "ncor": 0, "rlatitude": 46, "coricoef": 0, "ic_elev": 0, "nramp_elev": 0, "inv_atm_bnd": 0, "prmsl_ref": 101325.0, "flag_ic": [0, 0, 1, null, 1, 1, 1, 1, 1, 1, 1, 0], "gen_wsett": 0, "ibcc_mean": 0, "rmaxvel": 10.0, "velmin_btrack": 0.0001, "btrack_nudge": 0.009013, "ihhat": 1, "inunfl": 0, "h0": 0.01, "shorewafo": 0, "moitn0": 50, "mxitn0": 1500, "rtol0": 1e-12, "nadv": 1, "dtb_max": 30.0, "dtb_min": 10.0, "inter_mom": 0, "kr_co": 1, "itr_met": 1, "h_tvd": 5.0, "eps1_tvd_imp": 0.0001, "eps2_tvd_imp": 1e-14, "ielm_transport": 0, "max_subcyc": 10, "ip_weno": 2, "courant_weno": 0.5, "nquad": 2, "ntd_weno": 1, "epsilon1": 0.001, "epsilon2": 1e-10, "i_prtnftl_weno": 0, "epsilon3": 1e-25, "ielad_weno": 0, "small_elad": 0.0001, "nws": 2, "wtiminc": 400.0, "nrampwind": 1, "drampwind": 1.0, "iwind_form": -1, "impose_net_flux": 0, "ihconsv": 0, "isconsv": 0, "itur": 0, "dfv0": 0.01, "dfh0": 0.0001, "mid": "KL", "stab": "KC", "xlsc0": 0.1, "inu_elev": 0, "inu_uv": 0, "inu_tr": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "vnh1": 400, "vnf1": 0.0, "vnh2": 500, "vnf2": 0.0, "step_nu_tr": 86400.0, "h_bcc1": 100.0, "s1_mxnbt": 0.5, "s2_mxnbt": 3.5, "iharind": 0, "iflux": 0, "izonal5": 0, "ibtrack_test": 0, "irouse_test": 0, "flag_fib": 1, "slr_rate": 120.0, "isav": 0, "nstep_ice": 1, "level_age": [9, -999], "rearth_pole": 6378206.4, "rearth_eq": 6378206.4, "shw": 4184.0, "rho0": 1000.0, "vclose_surf_frac": 1.0, "iadjust_mass_consv0": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}, "schout": {"nhot": 1, "nhot_write": 288, "iout_sta": 0, "nspool_sta": 1, "iof_hydro": [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]}}, "mesh": "tri2d", "dem_attrs": {"standard_name": "height_above_reference_ellipsoid", "long_name": "Elevation relative to sea level", "units": "m", "sdn_parameter_urn": "SDN:P01::BATHHGHT", "sdn_parameter_name": "Sea floor height (above mean sea level) {bathymetric height}", "sdn_uom_urn": "SDN:P06:ULAA", "sdn_uom_name": "Metres"}, "meteo": [{"GRIB_edition": 1, "GRIB_centre": "ecmf", "GRIB_centreDescription": "European Centre for Medium-Range Weather Forecasts", "GRIB_subCentre": 0, "Conventions": "CF-1.7", "institution": "European Centre for Medium-Range Weather Forecasts", "history": "2023-06-02T12:02 GRIB to CDM+CF via cfgrib-0.9.10.4/ecCodes-2.30.0 with {\\"source\\": \\"uvp_2018100100.grib\\", \\"filter_by_keys\\": {}, \\"encode_cf\\": [\\"parameter\\", \\"time\\", \\"geography\\", \\"vertical\\"]}"}], "coastlines": null, "version": "0.6.0"}
""".strip()
# fmt: on

def test_read_schism_from_json(tmp_path):
    json_file = tmp_path / "schism_model.json"
    json_file.write_text(SCHISM_MODEL)
    model = pmodel.read(str(json_file))
    assert isinstance(model, Schism)

Proposed fix:

diff --git a/pyposeidon/model.py b/pyposeidon/model.py
index 904924f..b48e740 100644
--- a/pyposeidon/model.py
+++ b/pyposeidon/model.py
@@ -10,29 +10,15 @@ Currently supported : DELFT3D , SCHISM
 # Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the Licence for the specific language governing permissions and limitations under the Licence.

-import os
-import datetime
-import numpy as np
-import xml.dom.minidom as md
-from shutil import copy2
-import subprocess
 import sys
 import json
-from collections import OrderedDict
-import pandas as pd
-import glob
-from shutil import copyfile
-import xarray as xr

 # local modules
 from . import tools
-import pyposeidon
-import pyposeidon.mesh as pmesh
-import pyposeidon.meteo as pmeteo
-from pyposeidon.utils.get_value import get_value
 import logging
 import logging.config
-import colorlog
+
+logger = logging.getLogger(__name__)

 LOGGING = {
@@ -95,16 +81,16 @@ def set(solver_name, atm=True, tide=False, **kwargs):
     return instance

-def read(filename, **kwargs):
-    end = filename.split(".")[-1]
+def read(filename: str):
+    """
+    Return a model instance from a pyposeidon model definition file (JSON).

-    if end in ["txt", "json"]:
-        with open(filename, "rb") as f:
-            info = pd.read_json(f, lines=True).T
-            info[info.isnull().values] = None
-            info = info.to_dict()[0]
-    else:
-        logger.error("Model file should be .txt or .json")
-        sys.exit(0)
+    """
+    with open(filename, "rb") as fd:
+        try:
+            info = json.load(fd)
+        except Exception:
+            logger.exception("Model definition file could not be parsed: %s", filename)
+            sys.exit(1)

     return set(**info)
pmav99 commented 1 year ago

OK. I had a more careful look into this. The actual issue is that the read() function cannot handle multiline/pretty-printed JSON files. The cause of this is https://github.com/ec-jrc/pyPoseidon/blob/master/pyposeidon/model.py#L104 i.e. lines=True.

That being said, the rationale behind using pd.read_json() is not apparent to me. It feels like a work-around something. Probably something related to the info[info.isnull().values] = None line. Nevertheless, given that the JSON files that read() parses are created by pyposeidon itself, it should be possible to create them in a way that doesn't require such work-arounds in read().

In any case, we should add tests for both single line and pretty-printed json files.

pmav99 commented 1 year ago

Fixed in 41149b9137f9721e2f835a82283a05554982b07f