import numpy as np
from gluonts.dataset.pandas import PandasDataset
url = (
"/content/Historical Product Demand.csv"
)
df = pd.read_csv(url, index_col=0, parse_dates=True)
# remove_ind = np.random.choice(np.arange(df.shape[0]), size=100, replace=False)
# mask = [False if i in remove_ind else True for i in range(df.shape[0])]
# df_missing_val = df.loc[mask, :] # dataframe with 100 rows removed from df
def change_to_int(val):
try:
return int(val.strip('()')) # Assuming the value is like "(1234)"
except Exception as e:
return 0
# Convert date column to datetime
df['Date'] = pd.to_datetime(df['Date'])
# Apply the function to convert Order_Demand to int
df['Order_Demand'] = df['Order_Demand'].apply(change_to_int)
df['item_id'] = df.index
df['target']= df['Order_Demand']
df.index = df.Date
df = df[['item_id',"target"]]
#Drop duplicate entiries for date
df_missing_val = df[~df.index.duplicated(keep='first')]
df_missing_val=df_missing_val.loc[pd.notnull(df_missing_val.index)]
df
# Get the max end date
freq="1D"
max_end = max(df.groupby("item_id").apply(lambda _df: _df.index[-1]))
print(f"{max_end=}")
dfs_dict = {}
for item_id, gdf in df_missing_val.groupby("item_id"):
# Get the full (regular) date range
# print(f"{gdf.index[0]=}")
new_index = pd.date_range(gdf.index[0], end=max_end, freq=freq)
# Reindex the dataframe
dfs_dict[item_id] = gdf.reindex(new_index).drop("item_id", axis=1)
dfs_dict[item_id].target.fillna(100,inplace=True)
# Conver the columns to float32 for lag-llama
for col in dfs_dict[item_id]:
# Check if column is not of string type
if dfs_dict[item_id][col].dtype != 'object' and pd.api.types.is_string_dtype(dfs_dict[item_id][col]) == False:
dfs_dict[item_id][col] = dfs_dict[item_id][col].astype('float32')
# Create a PandasDataset
ds = PandasDataset(dfs_dict, target="target")
But I am getting the below error when predicting
131 ) -> Iterator:
--> 132 for data_entry in data_it:
133 try:
134 yield self.map_transform(data_entry.copy(), is_train)
[/usr/local/lib/python3.10/dist-packages/gluonts/transform/_base.py](https://localhost:8080/#) in __call__(self, data_it, is_train)
130 self, data_it: Iterable[DataEntry], is_train: bool
131 ) -> Iterator:
--> 132 for data_entry in data_it:
133 try:
134 yield self.map_transform(data_entry.copy(), is_train)
[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in __iter__(self)
412
413 def __iter__(self):
--> 414 for input, _label in self.test_data:
415 yield input
416
[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in __iter__(self)
384
385 def __iter__(self) -> Generator[Tuple[DataEntry, DataEntry], None, None]:
--> 386 yield from self.splitter.generate_test_pairs(
387 dataset=self.dataset,
388 prediction_length=self.prediction_length,
[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in generate_test_pairs(self, dataset, prediction_length, windows, distance, max_history)
249 for window in range(windows):
250 offset = window * distance
--> 251 test = self.test_pair(
252 entry, prediction_length=prediction_length, offset=offset
253 )
[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in test_pair(self, entry, prediction_length, offset)
299 label_slice = slice(offset_, None)
300 return (
--> 301 slice_data_entry(
302 entry, input_slice, prediction_length=prediction_length
303 ),
[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in slice_data_entry(entry, slice_, prediction_length)
161 entry: DataEntry, slice_: slice, prediction_length: int = 0
162 ) -> DataEntry:
--> 163 slice_ = to_positive_slice(
164 to_integer_slice(slice_, entry[FieldName.START]),
165 entry[FieldName.TARGET].shape[-1],
[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in to_positive_slice(slice_, length)
118 if stop is not None and stop < 0:
119 stop += length
--> 120 assert stop >= 0
121 return slice(start, stop, slice_.step)
122
AssertionError:```
Dataset using
Dataset Link
Code I am using to load the df
But I am getting the below error when predicting