Closed AndresTerrer closed 5 months ago
This is the full error message
ValueError Traceback (most recent call last) Cell In[6], line 2 1 # This will take a while the first time, while the interface creates all the indeces. ----> 2 ismn_data = ISMN_Interface(data_path, parallel=True)
File ...\ismn\interface.py:162, in ISMN_Interface.init(self, data_path, meta_path, network, parallel, keep_loaded_data, temp_root, custom_meta_reader, force_metadata_collection) 159 self.meta_path = meta_path 160 self.temp_root = temp_root --> 162 self.activate_network( 163 network=network, 164 meta_path=self.meta_path, 165 temp_root=self.temp_root)
File ...\ismn\interface.py:196, in ISMN_Interface.activate_network(self, network, meta_path, temp_root) 187 self.file_collection = IsmnFileCollection.build_from_scratch( 188 self.root, 189 parallel=self.parallel, (...) 192 custom_meta_readers=self.custom_meta_reader, 193 ) 194 self.__file_collection.to_metadata_csv(meta_csv_file) --> 196 self.file_collection = IsmnFileCollection.from_metadata_csv( 197 self.root, meta_csv_file, network=network) 199 networks = self._collect() 200 self.collection = NetworkCollection(networks)
File ...\ismn\filecollection.py:405, in IsmnFileCollection.from_metadata_csv(cls, data_root, meta_csv_file, network, temp_root) 401 network = np.atleast_1d(network) 403 print(f"Found existing ismn metadata in {meta_csv_file}.") --> 405 metadata_df = _load_metadata_df(meta_csv_file) 407 if network is not None: 408 metadata_df = metadata_df[np.isin(metadata_df["network"].values, 409 network)]
File ...\ismn\filecollection.py:136, in _load_metadata_df(meta_csv_file) 134 # parse date cols as datetime 135 for col in ["timerange_from", "timerange_to"]: --> 136 metadata_df[col, "val"] = pd.to_datetime(metadata_df[col, "val"]) 138 lvars = [] 139 for c in metadata_df.columns:
File ...\pandas\core\tools\datetimes.py:1063, in to_datetime(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache) 1061 result = arg.tz_localize("utc") 1062 elif isinstance(arg, ABCSeries): -> 1063 cache_array = _maybe_cache(arg, format, cache, convert_listlike) 1064 if not cache_array.empty: 1065 result = arg.map(cache_array)
File ...\pandas\core\tools\datetimes.py:247, in _maybe_cache(arg, format, cache, convert_listlike) 245 unique_dates = unique(arg) 246 if len(unique_dates) < len(arg): --> 247 cache_dates = convert_listlike(unique_dates, format) 248 # GH#45319 249 try:
File ...\pandas\core\tools\datetimes.py:433, in _convert_listlike_datetimes(arg, format, name, utc, unit, errors, dayfirst, yearfirst, exact)
431 # format
could be inferred, or user didn't ask for mixed-format parsing.
432 if format is not None and format != "mixed":
--> 433 return _array_strptime_with_fallback(arg, name, utc, format, exact, errors)
435 result, tz_parsed = objects_to_datetime64(
436 arg,
437 dayfirst=dayfirst,
(...)
441 allow_object=True,
442 )
444 if tz_parsed is not None:
445 # We can take a shortcut since the datetime64 numpy array
446 # is in UTC
File ...\pandas\core\tools\datetimes.py:467, in _array_strptime_with_fallback(arg, name, utc, fmt, exact, errors) 456 def _array_strptime_with_fallback( 457 arg, 458 name, (...) 462 errors: str, 463 ) -> Index: 464 """ 465 Call array_strptime, with fallback behavior depending on 'errors'. 466 """ --> 467 result, tz_out = array_strptime(arg, fmt, exact=exact, errors=errors, utc=utc) 468 if tz_out is not None: 469 unit = np.datetime_data(result.dtype)[0]
File strptime.pyx:501, in pandas._libs.tslibs.strptime.array_strptime()
File strptime.pyx:451, in pandas._libs.tslibs.strptime.array_strptime()
File strptime.pyx:583, in pandas._libs.tslibs.strptime._parse_with_format()
ValueError: time data "2017-01-01" doesn't match format "%Y-%m-%d %H:%M:%S", at position 462. You might want to try:
format
if your strings have a consistent format;format='ISO8601'
if your strings are all ISO8601 but not necessarily in exactly the same format;format='mixed'
, and the format will be inferred for each element individually. You might want to use dayfirst
alongside this.Hi, thanks for reporting this. Unfortunately I could not reproduce the error so far (I have downloaded all data for 2017 from the ismn platform yesterday, maybe there was a temporary hiccup in the database and you got a corrupt file?).
conda list
or pip list
.After some testing, I think there might be a bug with parallel extraction from the zip archive. I'm looking into this. I suggest to use parallel=False (Default) for now when reading data directly from zip. Or extract the archive first and pass the directory that contains the extracted networks instead of the zip file.
I have just released a new version -> pip install ismn==1.4.1
, which should fix the described issue. I'm closing this issue, but we can re-open it if necessary.
I downloaded Soild_moisture, soil_temperature, surface_temperature, snow_depth and air_temperature for all stations betwen 2017-01-01 and 2017-12-31.
When loading the zip file and generating metadata for the first time the code outputed this error.
ValueError: time data "2017-01-01" doesn't match format "%Y-%m-%d %H:%M:%S", at position 462. You might want to try:
format
if your strings have a consistent format;format='ISO8601'
if your strings are all ISO8601 but not necessarily in exactly the same format;format='mixed'
, and the format will be inferred for each element individually. You might want to usedayfirst
alongside this.I tryed with ismn==1.2.0 and then got the "No objects to concatenate error" (see: https://github.com/TUW-GEO/ismn/issues/52)
Then I downgraded to ismn==1.3.0 and tryed to load a sinpler dataset contatining only Soild_moisture, soil_temperature, surface_temperature. It worked fine.