Hi,
I am facing this issue where I have my own datasets for time series (UEA dataset) and I am trying to create a custom benchmark from this dataset with 3 experiences in train set and a single test set. The code is below:
data = dta.loadDict("UEA_data", path=datapath)
name_list = ['ArticularyWordRecognition_TEST', 'ArticularyWordRecognition_TRAIN']
for key,value in data.items():
if key in name_list:
if "_TRAIN" in key:
print("Dataset Name: ", key)
print(type(value))
value = dta.pamosConverter(df=value, normalizelength=1)
split_df = np.array_split(value, 3)
split_df = [part.reset_index(drop=True) for part in split_df]
train_list_Y = []
train_list_X = []
for df in split_df:
train_tensor_Y = torch.tensor(df["label"].values)
train_list_Y.append(train_tensor_Y)
for df in split_df:
train_tensor_X = df.drop("label", axis=1)
train_tensor_X = dta.PAMOS_to_array(df=train_tensor_X)
print(type(train_tensor_X), train_tensor_X.shape)
train_tensor_X = torch.from_numpy(train_tensor_X)
train_list_X.append(train_tensor_X)
print("Data Type: ", type(train_list_X[0]))
print("Data Shape: ", train_list_X[0].shape)
print("Data Type: ", type(train_list_Y[0]))
print("Data Shape: ", train_list_Y[0].shape)
train_data = [TensorDataset(train_list_X[i], train_list_Y[i]) for i in range(3)]
avl_data_train = [AvalancheDataset(train_data[i]) for i in range(3)]
else:
print("Dataset Name: ", key)
print(type(value))
value = dta.pamosConverter(df=value, normalizelength=1)
test_tensor_Y = torch.tensor(value["label"].values)
value = value.drop("label", axis=1)
value = dta.PAMOS_to_array(df=value)
test_tensor_X = torch.from_numpy(value)
print("Data Type: ", type(test_tensor_X))
print("Data Shape: ", test_tensor_X.shape)
print("Data Type: ", type(test_tensor_Y))
print("Data Shape: ", test_tensor_Y.shape)
test_data = TensorDataset(test_tensor_X, test_tensor_Y)
avl_data_test = AvalancheDataset(test_data)
print(type(avl_data_test), type(avl_data_train[0]))
bm = avl.benchmarks.benchmark_from_datasets(dataset_streams={"train_stream":[avl_data_train], "test_stream":[avl_data_test]})
train_stream = bm.train_stream
test_stream = bm.test_stream
The output I received is as follows:
<class 'numpy.ndarray'> (92, 144, 9)
working
<class 'numpy.ndarray'> (92, 144, 9)
working
<class 'numpy.ndarray'> (91, 144, 9)
working
Data Type: <class 'torch.Tensor'>
Data Shape: torch.Size([92, 144, 9])
Data Type: <class 'torch.Tensor'>
Data Shape: torch.Size([92])
<class 'avalanche.benchmarks.utils.data.AvalancheDataset'> <class 'avalanche.benchmarks.utils.data.AvalancheDataset'>
Traceback (most recent call last):
File "/home/aku7rng/git/generalized_timeseries_processing/internal_backbone/SOTA.py", line 110, in <module>
bm = avl.benchmarks.benchmark_from_datasets(dataset_streams={"train_stream":[avl_data_train], "test_stream":[avl_data_test]})
File "/home/aku7rng/.conda/envs/sud_env/lib/python3.9/site-packages/avalanche/benchmarks/scenarios/dataset_scenario.py", line 59, in benchmark_from_datasets
raise ValueError("datasets must be AvalancheDatasets")
ValueError: datasets must be AvalancheDatasets
I am not getting it that why this error is there as clearly the type of data is an Avalanche dataset.
Hi, I am facing this issue where I have my own datasets for time series (UEA dataset) and I am trying to create a custom benchmark from this dataset with 3 experiences in train set and a single test set. The code is below:
The output I received is as follows:
I am not getting it that why this error is there as clearly the type of data is an Avalanche dataset.