liuxu77 / LargeST

LargeST: A Benchmark Dataset for Large-Scale Traffic Forecasting (NeurIPS 2023 DB Track)
MIT License
166 stars 14 forks source link

处理SD数据出错 #3

Closed ContinualGoing closed 1 year ago

ContinualGoing commented 1 year ago

您好,非常感谢您开源的大数据集,我们在尝试处理SD数据的时候,jupyter笔记本代码运行出错,错误提示如下: year = '2019' # please specify the year, our experiments use 2019

sd_meta.ID = sd_meta.ID.astype(str) sd_meta_id = sd_meta.ID.values.tolist()

ca_his = pd.read_hdf('../ca/cahis' + year +'.h5') sd_his = ca_his[sd_meta_id] sd_his

ValueError Traceback (most recent call last) ~/anaconda3/envs/test7/lib/python3.7/site-packages/IPython/core/formatters.py in call(self, obj) 700 type_pprinters=self.type_printers, 701 deferred_pprinters=self.deferred_printers) --> 702 printer.pretty(obj) 703 printer.flush() 704 return stream.getvalue()

~/anaconda3/envs/test7/lib/python3.7/site-packages/IPython/lib/pretty.py in pretty(self, obj) 392 if cls is not object \ 393 and callable(cls.dict.get('repr')): --> 394 return _repr_pprint(obj, self, cycle) 395 396 return _default_pprint(obj, self, cycle)

~/anaconda3/envs/test7/lib/python3.7/site-packages/IPython/lib/pretty.py in _reprpprint(obj, p, cycle) 698 """A pprint that just redirects to the normal repr function.""" 699 # Find newlines and replace them with p.break() --> 700 output = repr(obj) 701 lines = output.splitlines() 702 with p.group():

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/frame.py in repr(self) 808 line_width=width, 809 max_colwidth=max_colwidth, --> 810 show_dimensions=show_dimensions, 811 ) 812

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, min_rows, max_cols, show_dimensions, decimal, line_width, max_colwidth, encoding) 935 max_cols=max_cols, 936 show_dimensions=show_dimensions, --> 937 decimal=decimal, 938 ) 939 return fmt.DataFrameRenderer(formatter).to_string(

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in init(self, frame, columns, col_space, header, index, na_rep, formatters, justify, float_format, sparsify, index_names, max_rows, min_rows, max_cols, show_dimensions, decimal, bold_rows, escape) 510 511 self.tr_frame = self.frame --> 512 self.truncate() 513 self.adj = get_adjustment() 514

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in truncate(self) 701 702 if self.is_truncated_vertically: --> 703 self._truncate_vertically() 704 705 def _truncate_horizontally(self) -> None:

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in _truncate_vertically(self) 741 head = self.tr_frame.iloc[:row_num, :] 742 tail = self.tr_frame.iloc[-row_num:, :] --> 743 self.tr_frame = concat((head, tail)) 744 else: 745 row_num = cast(int, self.max_rows)

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy) 293 verify_integrity=verify_integrity, 294 copy=copy, --> 295 sort=sort, 296 ) 297

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in init(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort) 465 self.copy = copy 466 --> 467 self.new_axes = self._get_new_axes() 468 469 def get_result(self):

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_new_axes(self) 537 return [ 538 self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i) --> 539 for i in range(ndim) 540 ] 541

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in (.0) 537 return [ 538 self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i) --> 539 for i in range(ndim) 540 ] 541

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_concat_axis(self) 590 591 if self.keys is None: --> 592 concat_axis = _concat_indexes(indexes) 593 else: 594 concat_axis = _make_concat_multiindex(

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _concat_indexes(indexes) 608 609 def _concat_indexes(indexes) -> Index: --> 610 return indexes[0].append(indexes[1:]) 611 612

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/indexes/base.py in append(self, other) 4354 name = None if len(names) > 1 else self.name 4355 -> 4356 return self._concat(to_concat, name) 4357 4358 def _concat(self, to_concat: List["Index"], name: Label) -> "Index":

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/indexes/base.py in _concat(self, to_concat, name) 4362 to_concat_vals = [x._values for x in to_concat] 4363 -> 4364 result = concat_compat(to_concat_vals) 4365 return Index(result, name=name) 4366

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/dtypes/concat.py in concat_compat(to_concat, axis) 153 154 elif _contains_datetime or "timedelta" in typs: --> 155 return _concat_datetime(to_concat, axis=axis) 156 157 elif all_empty:

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/dtypes/concat.py in _concat_datetime(to_concat, axis) 375 to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat] 376 --> 377 result = type(to_concat[0])._concat_same_type(to_concat, axis=axis) 378 379 if result.ndim == 2 and is_extension_array_dtype(result.dtype):

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in _concat_same_type(cls, to_concat, axis) 405 if obj.freq is not None and all(x.freq == obj.freq for x in to_concat): 406 pairs = zip(to_concat[:-1], to_concat[1:]) --> 407 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs): 408 new_freq = obj.freq 409

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in (.0) 405 if obj.freq is not None and all(x.freq == obj.freq for x in to_concat): 406 pairs = zip(to_concat[:-1], to_concat[1:]) --> 407 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs): 408 new_freq = obj.freq 409

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in getitem(self, key) 279 only handle list-likes, slices, and integer scalars 280 """ --> 281 result = super().getitem(key) 282 if lib.is_scalar(result): 283 return result

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/_mixins.py in getitem(self, key) 222 result = self._ndarray[key] 223 if self.ndim == 1: --> 224 return self._box_func(result) 225 return self._from_backing_data(result) 226

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimes.py in _box_func(self, x) 483 484 def _box_func(self, x) -> Union[Timestamp, NaTType]: --> 485 return Timestamp(x, freq=self.freq, tz=self.tz) 486 487 @property

pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.new()

pandas/_libs/tslibs/offsets.pyx in pandas._libs.tslibs.offsets.to_offset()

ValueError: Invalid frequency: b'ccopy_reg\n_reconstructor\np0\n(cpandas.tseries.offsets\nMinute\np1\ncbuiltin\nobject\np2\nNtp3\nRp4\n(dp5\nVn\np6\nI15\nsVnormalize\np7\nI00\nsV_cache\np8\n(dp9\nsb.'


ValueError Traceback (most recent call last) ~/anaconda3/envs/test7/lib/python3.7/site-packages/IPython/core/formatters.py in call(self, obj) 343 method = get_real_method(obj, self.print_method) 344 if method is not None: --> 345 return method() 346 return None 347 else:

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/frame.py in _reprhtml(self) 851 max_cols=max_cols, 852 show_dimensions=show_dimensions, --> 853 decimal=".", 854 ) 855 return fmt.DataFrameRenderer(formatter).to_html(notebook=True)

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in init(self, frame, columns, col_space, header, index, na_rep, formatters, justify, float_format, sparsify, index_names, max_rows, min_rows, max_cols, show_dimensions, decimal, bold_rows, escape) 510 511 self.tr_frame = self.frame --> 512 self.truncate() 513 self.adj = get_adjustment() 514

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in truncate(self) 701 702 if self.is_truncated_vertically: --> 703 self._truncate_vertically() 704 705 def _truncate_horizontally(self) -> None:

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in _truncate_vertically(self) 741 head = self.tr_frame.iloc[:row_num, :] 742 tail = self.tr_frame.iloc[-row_num:, :] --> 743 self.tr_frame = concat((head, tail)) 744 else: 745 row_num = cast(int, self.max_rows)

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy) 293 verify_integrity=verify_integrity, 294 copy=copy, --> 295 sort=sort, 296 ) 297

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in init(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort) 465 self.copy = copy 466 --> 467 self.new_axes = self._get_new_axes() 468 469 def get_result(self):

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_new_axes(self) 537 return [ 538 self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i) --> 539 for i in range(ndim) 540 ] 541

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in (.0) 537 return [ 538 self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i) --> 539 for i in range(ndim) 540 ] 541

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_concat_axis(self) 590 591 if self.keys is None: --> 592 concat_axis = _concat_indexes(indexes) 593 else: 594 concat_axis = _make_concat_multiindex(

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _concat_indexes(indexes) 608 609 def _concat_indexes(indexes) -> Index: --> 610 return indexes[0].append(indexes[1:]) 611 612

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/indexes/base.py in append(self, other) 4354 name = None if len(names) > 1 else self.name 4355 -> 4356 return self._concat(to_concat, name) 4357 4358 def _concat(self, to_concat: List["Index"], name: Label) -> "Index":

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/indexes/base.py in _concat(self, to_concat, name) 4362 to_concat_vals = [x._values for x in to_concat] 4363 -> 4364 result = concat_compat(to_concat_vals) 4365 return Index(result, name=name) 4366

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/dtypes/concat.py in concat_compat(to_concat, axis) 153 154 elif _contains_datetime or "timedelta" in typs: --> 155 return _concat_datetime(to_concat, axis=axis) 156 157 elif all_empty:

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/dtypes/concat.py in _concat_datetime(to_concat, axis) 375 to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat] 376 --> 377 result = type(to_concat[0])._concat_same_type(to_concat, axis=axis) 378 379 if result.ndim == 2 and is_extension_array_dtype(result.dtype):

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in _concat_same_type(cls, to_concat, axis) 405 if obj.freq is not None and all(x.freq == obj.freq for x in to_concat): 406 pairs = zip(to_concat[:-1], to_concat[1:]) --> 407 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs): 408 new_freq = obj.freq 409

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in (.0) 405 if obj.freq is not None and all(x.freq == obj.freq for x in to_concat): 406 pairs = zip(to_concat[:-1], to_concat[1:]) --> 407 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs): 408 new_freq = obj.freq 409

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in getitem(self, key) 279 only handle list-likes, slices, and integer scalars 280 """ --> 281 result = super().getitem(key) 282 if lib.is_scalar(result): 283 return result

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/_mixins.py in getitem(self, key) 222 result = self._ndarray[key] 223 if self.ndim == 1: --> 224 return self._box_func(result) 225 return self._from_backing_data(result) 226

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimes.py in _box_func(self, x) 483 484 def _box_func(self, x) -> Union[Timestamp, NaTType]: --> 485 return Timestamp(x, freq=self.freq, tz=self.tz) 486 487 @property

pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.new()

pandas/_libs/tslibs/offsets.pyx in pandas._libs.tslibs.offsets.to_offset()

ValueError: Invalid frequency: b'ccopy_reg\n_reconstructor\np0\n(cpandas.tseries.offsets\nMinute\np1\ncbuiltin\nobject\np2\nNtp3\nRp4\n(dp5\nVn\np6\nI15\nsVnormalize\np7\nI00\nsV_cache\np8\n(dp9\nsb.'

liuxu77 commented 1 year ago

Hi, thanks for your question. Does this issue also occur on other datasets, e.g., GBA?

Also, you may need to check the version of pandas library, ours use pandas==1.3.5