Closed oyvindeide closed 3 months ago
Can be reproduced like this:
import datetime
import numpy as np
from ert.config import EnkfObs
from ert.config.enkf_observation_implementation_type import (
EnkfObservationImplementationType,
)
from ert.config.observation_vector import GenObservation, ObsVector
from ert.storage import open_storage
def test_other_failure(tmpdir):
storage = open_storage(tmpdir, "w")
_ = storage.create_experiment(
[],
observations=EnkfObs(
obs_vectors={
"A": ObsVector(
observation_type=EnkfObservationImplementationType.GEN_OBS,
observation_key="A",
data_key="A",
observations={
datetime.datetime(2000, 1, 1, 0, 0): GenObservation(
values=np.array([0.0]),
stds=np.array([0.1]),
indices=np.array([0], dtype=np.int32),
std_scaling=np.array([1.0]),
),
datetime.datetime(2001, 1, 1, 1, 0, 0, 1): GenObservation(
values=np.array([0.0]),
stds=np.array([1.0]),
indices=np.array([0], dtype=np.int32),
std_scaling=np.array([0.0]),
),
},
)
},
obs_time=[datetime.datetime(2001, 1, 1, 0, 0)],
).datasets,
)
Problem seems to be related to there both being dates with millisecond time and second time present.
The issue can be provoked with the following
dataset = xr.combine_by_coords(
[
xr.Dataset(
{"value": (["step"], [0.0])},
coords={
"step": np.array(
[datetime.datetime(2000, 1, 1, 0, 0)], dtype="datetime64[ns]"
),
},
),
xr.Dataset(
{"value": (["step"], [0.0])},
coords={
"step": np.array(
[datetime.datetime(2000, 1, 1, 1, 0, 0, 1)],
dtype="datetime64[ns]",
),
},
),
]
)
dataset.to_netcdf(tmp_path / "netcdf", engine="scipy")
There is no problem when using netcdf4:
dataset.to_netcdf(tmp_path / "netcdf")
The same problem occurrs with 1second precision after ~70 years:
def test_netcdf_second_precision(tmp_path):
dataset = xr.combine_by_coords(
[
xr.Dataset(
{"value": (["step"], [0.0])},
coords={
"step": np.array(
[datetime.datetime(1930, 1, 1, 0, 0, 0)],
dtype="datetime64[ns]",
),
},
),
xr.Dataset(
{"value": (["step"], [0.0])},
coords={
"step": np.array(
[datetime.datetime(2000, 1, 1, 1, 0, 1)],
dtype="datetime64[ns]",
),
},
),
]
)
dataset.to_netcdf(tmp_path / "netcdf", engine="scipy")
The same can happen when the index is large (mind still within int32 so 2gb worth of 32 bit floats):
storage = open_storage(tmpdir, "w")
_ = storage.create_experiment(
[],
observations=EnkfObs(
obs_vectors={'A': ObsVector(
observation_type=EnkfObservationImplementationType.GEN_OBS,
observation_key='A',
data_key='A',
observations={0: GenObservation(
values=array([0., 0.]),
stds=array([1., 1.]),
indices=[0, 2147483648],
std_scaling=array([0., 0.]),
)},
)},
obs_time=[],
), parameters=[])
).datasets,
)
Blocked by #7015
This has been fixed
After refactor of summary data, got reports of:
Reproducible with Drogon DST