NVIDIA-Merlin / Transformers4Rec

Transformers4Rec is a flexible and efficient library for sequential and session-based recommendation and works with PyTorch.
https://nvidia-merlin.github.io/Transformers4Rec/main
Apache License 2.0
1.07k stars 142 forks source link

[BUG] examples/tutorial/01-preprocess.ipynb: Convert timestamp from datetime - NotImplementedError: cuDF does not yet support timezone-aware datetimes #777

Open zwei2016 opened 3 months ago

zwei2016 commented 3 months ago

Bug description

raw_df['event_time_dt'] = raw_df['event_time'].astype('datetime64[s]')
raw_df['event_time_ts']= raw_df['event_time_dt'].astype('int')
raw_df.head()

NotImplementedError Traceback (most recent call last) Cell In[4], line 4 1 #import datetime 2 #raw_df['event_time'] = cudf.to_datetime(raw_df['event_time'], format='%Y-%m-%d %H:%M:%S') ----> 4 raw_df['event_time_dt'] = raw_df['event_time'].astype('datetime64[s]') 5 raw_df['event_time_ts']= raw_df['event_time_dt'].astype('int') 6 raw_df.head()

File ~/miniconda3/lib/python3.10/site-packages/nvtx/nvtx.py:116, in annotate.call..inner(*args, kwargs) 113 @wraps(func) 114 def inner(*args, *kwargs): 115 libnvtx_push_range(self.attributes, self.domain.handle) --> 116 result = func(args, kwargs) 117 libnvtx_pop_range(self.domain.handle) 118 return result

File ~/miniconda3/lib/python3.10/site-packages/cudf/core/series.py:2102, in Series.astype(self, dtype, copy, errors) 2100 else: 2101 dtype = {self.name: dtype} -> 2102 return super().astype(dtype, copy, errors)

File ~/miniconda3/lib/python3.10/site-packages/cudf/core/indexed_frame.py:5009, in IndexedFrame.astype(self, dtype, copy, errors) 5007 except Exception as e: 5008 if errors == "raise": -> 5009 raise e 5010 return self 5012 return self._from_data(data, index=self._index)

File ~/miniconda3/lib/python3.10/site-packages/cudf/core/indexed_frame.py:5006, in IndexedFrame.astype(self, dtype, copy, errors) 5003 raise ValueError("invalid error value specified") 5005 try: -> 5006 data = super().astype(dtype, copy) 5007 except Exception as e: 5008 if errors == "raise":

File ~/miniconda3/lib/python3.10/site-packages/nvtx/nvtx.py:116, in annotate.call..inner(*args, kwargs) 113 @wraps(func) 114 def inner(*args, *kwargs): 115 libnvtx_push_range(self.attributes, self.domain.handle) --> 116 result = func(args, kwargs) 117 libnvtx_pop_range(self.domain.handle) 118 return result

File ~/miniconda3/lib/python3.10/site-packages/cudf/core/frame.py:272, in Frame.astype(self, dtype, copy) 270 @_cudf_nvtx_annotate 271 def astype(self, dtype, copy: bool = False): --> 272 result_data = { 273 col_name: col.astype(dtype.get(col_name, col.dtype), copy=copy) 274 for col_name, col in self._data.items() 275 } 277 return ColumnAccessor( 278 data=result_data, 279 multiindex=self._data.multiindex, (...) 283 verify=False, 284 )

File ~/miniconda3/lib/python3.10/site-packages/cudf/core/frame.py:273, in (.0) 270 @_cudf_nvtx_annotate 271 def astype(self, dtype, copy: bool = False): 272 result_data = { --> 273 col_name: col.astype(dtype.get(col_name, col.dtype), copy=copy) 274 for col_name, col in self._data.items() 275 } 277 return ColumnAccessor( 278 data=result_data, 279 multiindex=self._data.multiindex, (...) 283 verify=False, 284 )

File ~/miniconda3/lib/python3.10/site-packages/cudf/core/column/column.py:1002, in ColumnBase.astype(self, dtype, copy) 1000 return col.as_decimal_column(dtype) 1001 elif np.issubdtype(cast(Any, dtype), np.datetime64): -> 1002 return col.as_datetime_column(dtype) 1003 elif np.issubdtype(cast(Any, dtype), np.timedelta64): 1004 return col.as_timedelta_column(dtype)

File ~/miniconda3/lib/python3.10/site-packages/cudf/core/column/string.py:5749, in StringColumn.as_datetime_column(self, dtype, format) 5742 return cast( 5743 "cudf.core.column.DatetimeColumn", 5744 column.column_empty( 5745 len(self), dtype=out_dtype, masked=True 5746 ), 5747 ) 5748 else: -> 5749 format = datetime.infer_format( 5750 self.apply_boolean_mask(self.notnull()).element_indexing(0) 5751 ) 5753 if format.endswith("%z"): 5754 raise NotImplementedError( 5755 "cuDF does not yet support timezone-aware datetimes" 5756 )

File ~/miniconda3/lib/python3.10/site-packages/cudf/core/column/datetime.py:108, in infer_format(element, **kwargs) 106 if fmt is not None: 107 if "%z" in fmt or "%Z" in fmt: --> 108 raise NotImplementedError( 109 "cuDF does not yet support timezone-aware datetimes" 110 ) 111 if ".%f" not in fmt: 112 # For context read: 113 # https://github.com/pandas-dev/pandas/issues/52418 114 # We cannot rely on format containing only %f 115 # c++/libcudf expects .%3f, .%6f, .%9f 116 # Logic below handles those cases well. 117 return fmt

NotImplementedError: cuDF does not yet support timezone-aware datetimes

Steps/Code to reproduce bug

  1. just run the code raw_df['event_time'].astype('datetime64[s]')

Expected behavior

No error

Environment details

Additional context

Problem solved by adding one line

## added one line 
raw_df['event_time'] = cudf.to_datetime(raw_df['event_time'], format='%Y-%m-%d %H:%M:%S')
##
raw_df['event_time_dt'] = raw_df['event_time'].astype('datetime64[s]')
raw_df['event_time_ts']= raw_df['event_time_dt'].astype('int')
raw_df.head()