**RuntimeError: The size of tensor a (896) must match the size of tensor b (14) at non-singleton dimension 0**

Hello, I'm working on time series classification with transformer. I divide it into 10 classes with 14 features, 1 label (categoric-> LabelEnceder) d_input = 14, d_outpu = 10, d_model? Window_size = 16, but I am getting such an error. What should I do, why does the dimension of y appear one dimension?

[Epoch 1/2]: 0%| | 0/31514 [00:00<?, ?it/s]torch.Size([31514, 16, 14]) torch.Size([31514]) torch.Size([6092, 16, 14]) torch.Size([6092]) 31514 6092 Running on the GPU Using device cuda:0 torch.Size([64, 16, 14]) torch.Size([64]) [Epoch 1/2]: 0%| | 0/31514 [00:00<?, ?it/s]torch.Size([896, 16, 14]) torch.Size([896, 16, 14])

error------

RuntimeError Traceback (most recent call last)

in () 149 print(y.shape) 150 optimizer.zero_grad() --> 151 netout = net(x) 152 loss = loss_function(netout, y) 153 loss.backward() 5 frames /content/multiHeadAttention.py in forward(self, query, key, value, mask) 92 print(queries) 93 print(keys) ---> 94 self._scores = (queries@keys.T) / np.sqrt(K) 95 96 # Compute local map mask **RuntimeError: The size of tensor a (896) must match the size of tensor b (14) at non-singleton dimension 0** Here is code: ``` def create_datasetX(dataset,look_back): dataX=[] row=0 while(row+look_back) < len(dataset): dataX.append(dataset[row:(row+look_back)]) row=row+3 return np.array(dataX) def create_datasetY(dataset,look_back): dataY=[] col=0 while(col+look_back) < len(dataset): dataY.append(dataset[(col+look_back)]) col=col+3 return np.array(dataY) #Accuracy : 0.110 from numpy import vstack,argmax from pandas import read_csv import torch from sklearn.preprocessing import LabelEncoder from sklearn.metrics import accuracy_score from torch import Tensor from torch.utils.data import Dataset,DataLoader,random_split from torch.nn import * import pandas as pd import torch.nn as nn import torch.optim as optim from loss import OZELoss from transformer import Transformer import seaborn as sns from tqdm import tqdm import datetime from utils_ import compute_loss import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler from scipy.stats import zscore #from plot_functions import map_plot_function, plot_values_distribution, plot_error_distribution, plot_errors_threshold, plot_visual_sampl class CSVDataset(Dataset): def __init__(self): KINEMATICS_USECOLS = [c-1 for c in [39, 40, 41, 51, 52, 53, 57, 58, 59, 60, 70, 71, 72, 76,77]] trainX = [] trainY = [] filenamesTrainX = ['C001.txt','C002.txt','C003.txt','C004.txt','C005.txt', 'D001.txt','D002.txt','D003.txt','D004.txt','D005.txt', 'E001.txt','E002.txt','E003.txt','E004.txt','E005.txt', 'F001.txt','F002.txt','F003.txt','F004.txt','F005.txt', 'G001.txt','G002.txt','G003.txt','G004.txt','G005.txt', 'I001.txt','I002.txt','I003.txt','I004.txt','I005.txt' ] for fname in filenamesTrainX: trainXdata = pd.read_csv(fname,sep=',',usecols=KINEMATICS_USECOLS) self.X, self.y = trainXdata.values[:, :-1], trainXdata.values[:, -1] self.X = self.X.astype(np.float) mean = np.mean(self.X, axis=(0, 1)) std = np.std(self.X, axis=(0, 1)) self.X = (self.X - mean) / (std + np.finfo(float).eps) self.X=self.X.astype(np.float32) #M = np.max(self.X, axis=(0, 1)) #m = np.min(self.X, axis=(0, 1)) self.X, self.y = self.X.astype('float32'), LabelEncoder().fit_transform(self.y) self.X = create_datasetX(self.X, look_back) self.y = create_datasetY(self.y, look_back) trainX.extend(self.X) trainY.extend(self.y) trainX=np.array(trainX) trainY=np.array(trainY) self.X = trainX self.y = trainY self.X= torch.Tensor(self.X) self.y= torch.Tensor(self.y) print(self.X.shape) print(self.y.shape) def __len__(self): return len(self.X) def __getitem__(self, idx): # if torch.is_tensor(idx): # idx = idx.tolist() return [self.X[idx], self.y[idx]] class TestDataset(Dataset): def __init__(self): KINEMATICS_USECOLS = [c-1 for c in [39, 40, 41, 51, 52, 53, 57, 58, 59, 60, 70, 71, 72, 76,77]] from sklearn.preprocessing import MinMaxScaler from scipy.stats import zscore trainX = [] trainY = [] filenamesTrainX = ['B001.txt','B002.txt','B003.txt','B004.txt','B005.txt'] for fname in filenamesTrainX: trainXdata = pd.read_csv(fname,sep=',',usecols=KINEMATICS_USECOLS) self.X, self.y = trainXdata.values[:, :-1], trainXdata.values[:, -1] self.X = self.X.astype(np.float) mean = np.mean(self.X, axis=(0, 1)) std = np.std(self.X, axis=(0, 1)) self.X = (self.X - mean) / (std + np.finfo(float).eps) #M = np.max(self.X, axis=(0, 1)) #m = np.min(self.X, axis=(0, 1)) #self.X = (self.X - m) / (M - m + np.finfo(float).eps) look_back = 16 self.X, self.y = self.X.astype('float32'), LabelEncoder().fit_transform(self.y) self.X=self.X.astype(np.float32) self.X = create_datasetX(self.X, look_back) self.y = create_datasetY(self.y, look_back) trainX.extend(self.X) trainY.extend(self.y) trainX=np.array(trainX) trainY=np.array(trainY) self.X = trainX self.y = trainY self.X= torch.Tensor(self.X) self.y= torch.Tensor(self.y) print(self.X.shape) print(self.y.shape) def __len__(self): return len(self.X) def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() return [self.X[idx], self.y[idx]] def prepare_data(): dataset = CSVDataset() train_dl = DataLoader(dataset, batch_size=BATCH_SIZE,shuffle=True, num_workers=NUM_WORKERS,pin_memory=False) return train_dl BATCH_SIZE =64 NUM_WORKERS= 0 LR =0.01 EPOCHS=2 d_model =16 q=14 v=14 h = 14 N = 7 attention_size = None dropout = 0.5 pe = None chunk_mode = None d_input = 14 d_output= 10 look_back=16 train_dl = prepare_data() dataset_test =TestDataset() test_dl = DataLoader(dataset_test, batch_size=BATCH_SIZE,shuffle=False, num_workers=NUM_WORKERS) print(len(train_dl.dataset), len(test_dl.dataset)) sns.set() if torch.cuda.is_available(): device = torch.device("cuda:0") # you can continue going on here, like cuda:1 cuda:2....etc. print("Running on the GPU") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"Using device {device}") net = Transformer(d_input, d_model, d_output, q, v, h, N, attention_size=attention_size, dropout=dropout, chunk_mode=chunk_mode, pe=pe) optimizer = optim.Adam(net.parameters(), lr=LR) #optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005) #optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9) loss_function = nn.CrossEntropyLoss() model_save_path = f'models/model_{datetime.datetime.now().strftime("%Y_%m_%d__%H%M%S")}.pth' for idx_epoch in range(EPOCHS): running_loss = 0 with tqdm(total=len(train_dl.dataset), desc=f"[Epoch {idx_epoch+1:3d}/{EPOCHS}]") as pbar: for idx_batch, (x, y) in enumerate(train_dl): print(x.shape) print(y.shape) optimizer.zero_grad() netout = net(x) loss = loss_function(netout, y) loss.backward() optimizer.step() running_loss += loss.item() pbar.set_postfix({'loss': running_loss/(idx_batch+1)}) pbar.update(x.shape[0]) # evaluate the model def evaluate_model(test_dl, model): predictions, actuals = list(), list() for i, (inputs, targets) in enumerate(test_dl): yhat = model(inputs) yhat = yhat.detach().numpy() actual = targets.numpy() yhat = argmax(yhat, axis=1) actual = actual.reshape((len(actual), 1)) yhat = yhat.reshape((len(yhat), 1)) predictions.append(yhat) actuals.append(actual) predictions, actuals = vstack(predictions), vstack(actuals) print(predictions) print(actuals) acc = accuracy_score(actuals, predictions) return acc acc = evaluate_model(test_dl, net) print('Accuracy: %.3f' % acc) ```

maxjcohen / transformer

RuntimeError: The size of tensor a (896) must match the size of tensor b (14) at non-singleton dimension 0 #43

maxjcohen / transformer

**RuntimeError: The size of tensor a (896) must match the size of tensor b (14) at non-singleton dimension 0** #43

RuntimeError: The size of tensor a (896) must match the size of tensor b (14) at non-singleton dimension 0 #43