time-series-foundation-models / lag-llama

Lag-Llama: Towards Foundation Models for Probabilistic Time Series Forecasting
Apache License 2.0
1.09k stars 121 forks source link

Getting Assertion Error #15

Open danishbansal808 opened 4 months ago

danishbansal808 commented 4 months ago

Dataset using

Dataset Link

Code I am using to load the df

import numpy as np
from gluonts.dataset.pandas import PandasDataset

url = (
    "/content/Historical Product Demand.csv"
)
df = pd.read_csv(url, index_col=0, parse_dates=True)
# remove_ind = np.random.choice(np.arange(df.shape[0]), size=100, replace=False)
# mask = [False if i in remove_ind else True for i in range(df.shape[0])]
# df_missing_val = df.loc[mask, :]  # dataframe with 100 rows removed from df
def change_to_int(val):
    try:
        return int(val.strip('()'))  # Assuming the value is like "(1234)"
    except Exception as e:
        return 0

# Convert date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Apply the function to convert Order_Demand to int
df['Order_Demand'] = df['Order_Demand'].apply(change_to_int)
df['item_id'] = df.index
df['target']= df['Order_Demand']
df.index = df.Date
df = df[['item_id',"target"]]

#Drop duplicate entiries for date
df_missing_val = df[~df.index.duplicated(keep='first')]
df_missing_val=df_missing_val.loc[pd.notnull(df_missing_val.index)]
df
# Get the max end date
freq="1D"
max_end = max(df.groupby("item_id").apply(lambda _df: _df.index[-1]))
print(f"{max_end=}")
dfs_dict = {}
for item_id, gdf in df_missing_val.groupby("item_id"):
    # Get the full (regular) date range
    # print(f"{gdf.index[0]=}")
    new_index = pd.date_range(gdf.index[0], end=max_end, freq=freq)
    # Reindex the dataframe
    dfs_dict[item_id] = gdf.reindex(new_index).drop("item_id", axis=1)
    dfs_dict[item_id].target.fillna(100,inplace=True)
    # Conver the columns to float32 for lag-llama
    for col in dfs_dict[item_id]:
        # Check if column is not of string type
        if dfs_dict[item_id][col].dtype != 'object' and pd.api.types.is_string_dtype(dfs_dict[item_id][col]) == False:
            dfs_dict[item_id][col] = dfs_dict[item_id][col].astype('float32')

# Create a PandasDataset
ds = PandasDataset(dfs_dict, target="target")

But I am getting the below error when predicting


131     ) -> Iterator:
--> 132         for data_entry in data_it:
    133             try:
    134                 yield self.map_transform(data_entry.copy(), is_train)

[/usr/local/lib/python3.10/dist-packages/gluonts/transform/_base.py](https://localhost:8080/#) in __call__(self, data_it, is_train)
    130         self, data_it: Iterable[DataEntry], is_train: bool
    131     ) -> Iterator:
--> 132         for data_entry in data_it:
    133             try:
    134                 yield self.map_transform(data_entry.copy(), is_train)

[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in __iter__(self)
    412 
    413     def __iter__(self):
--> 414         for input, _label in self.test_data:
    415             yield input
    416 

[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in __iter__(self)
    384 
    385     def __iter__(self) -> Generator[Tuple[DataEntry, DataEntry], None, None]:
--> 386         yield from self.splitter.generate_test_pairs(
    387             dataset=self.dataset,
    388             prediction_length=self.prediction_length,

[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in generate_test_pairs(self, dataset, prediction_length, windows, distance, max_history)
    249             for window in range(windows):
    250                 offset = window * distance
--> 251                 test = self.test_pair(
    252                     entry, prediction_length=prediction_length, offset=offset
    253                 )

[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in test_pair(self, entry, prediction_length, offset)
    299             label_slice = slice(offset_, None)
    300         return (
--> 301             slice_data_entry(
    302                 entry, input_slice, prediction_length=prediction_length
    303             ),

[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in slice_data_entry(entry, slice_, prediction_length)
    161     entry: DataEntry, slice_: slice, prediction_length: int = 0
    162 ) -> DataEntry:
--> 163     slice_ = to_positive_slice(
    164         to_integer_slice(slice_, entry[FieldName.START]),
    165         entry[FieldName.TARGET].shape[-1],

[/usr/local/lib/python3.10/dist-packages/gluonts/dataset/split.py](https://localhost:8080/#) in to_positive_slice(slice_, length)
    118     if stop is not None and stop < 0:
    119         stop += length
--> 120         assert stop >= 0
    121     return slice(start, stop, slice_.step)
    122 

AssertionError:```
ashok-arjun commented 3 months ago

Hi, thanks for the issue. Let me check this and get back.

ashok-arjun commented 2 months ago

Sorry, I couldn't check this then. Is this resolved? @danishbansal808