Closed egoriyaa closed 2 months ago
Can't load following internal datasets:
traffic_2008_10T traffic_2008_hourly
Datasets are loading correctly
from etna.datasets import load_dataset load_dataset("traffic_2008_hourly", rebuild_dataset=True)
No response
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[85], line 3 1 from etna.datasets import load_dataset ----> 3 load_dataset("traffic_2008_hourly", rebuild_dataset=True) File ~/etna/etna/datasets/internal_datasets.py:159, in load_dataset(name, download_path, rebuild_dataset, parts) 156 freq = dataset_params["freq"] 158 if not _check_dataset_local(dataset_path) or rebuild_dataset: --> 159 get_dataset_function(dataset_dir) 160 ts_out = [] 161 for part in parts_: File ~/etna/etna/datasets/internal_datasets.py:363, in get_traffic_2008_dataset(dataset_dir, dataset_freq) 360 dates_df = dates_df.drop(["dt"], axis=1) 362 df = pd.DataFrame(targets, columns=stations) --> 363 df["timestamp"] = dates_df_cropped["timestamp"].values 364 df = df.merge(dates_df, on=["timestamp"], how="right").fillna(0) 365 df = df.melt("timestamp", var_name="segment", value_name="target") File ~/Library/Caches/pypoetry/virtualenvs/etna-yJQui467-py3.8/lib/python3.8/site-packages/pandas/core/frame.py:3980, in DataFrame.__setitem__(self, key, value) 3977 self._setitem_array([key], value) 3978 else: 3979 # set column -> 3980 self._set_item(key, value) File ~/Library/Caches/pypoetry/virtualenvs/etna-yJQui467-py3.8/lib/python3.8/site-packages/pandas/core/frame.py:4174, in DataFrame._set_item(self, key, value) 4164 def _set_item(self, key, value) -> None: 4165 """ 4166 Add series to DataFrame in specified column. 4167 (...) 4172 ensure homogeneity. 4173 """ -> 4174 value = self._sanitize_column(value) 4176 if ( 4177 key in self.columns 4178 and value.ndim == 1 4179 and not is_extension_array_dtype(value) 4180 ): 4181 # broadcast across multiple columns if necessary 4182 if not self.columns.is_unique or isinstance(self.columns, MultiIndex): File ~/Library/Caches/pypoetry/virtualenvs/etna-yJQui467-py3.8/lib/python3.8/site-packages/pandas/core/frame.py:4915, in DataFrame._sanitize_column(self, value) 4912 return _reindex_for_setitem(Series(value), self.index) 4914 if is_list_like(value): -> 4915 com.require_length_match(value, self.index) 4916 return sanitize_array(value, self.index, copy=True, allow_2d=True) File ~/Library/Caches/pypoetry/virtualenvs/etna-yJQui467-py3.8/lib/python3.8/site-packages/pandas/core/common.py:571, in require_length_match(data, index) 567 """ 568 Check the length of data matches the length of the index. 569 """ 570 if len(data) != len(index): --> 571 raise ValueError( 572 "Length of values " 573 f"({len(data)}) " 574 "does not match length of index " 575 f"({len(index)})" 576 ) ValueError: Length of values (63648) does not match length of index (63360)
🐛 Bug Report
Can't load following internal datasets:
Expected behavior
Datasets are loading correctly
How To Reproduce
Code
Environment
No response
Additional context
Checklist