ms-dot-k / Visual-Audio-Memory

PyTorch implementation of "Multi-modality Associative Bridging through Memory: Speech Sound Recollected from Face Video" (ICCV2021)
Other
19 stars 4 forks source link

Pytorch Error Runtime RuntimeError: mat1 and mat2 shapes cannot be multiplied (10x2048 and 64x6) #4

Closed harshalDharpure closed 6 months ago

harshalDharpure commented 6 months ago

I am trying to concat the X,Y and Rag Feature But it is Giving me erorr I have use the simple concat but it is Giving me error I just want to concat the x,y,rag feature in the forward function, can Anyone help me to solve the Problem

How do I fix an error when concatenating x, y, and rag in the forward function using torch.cat, ensuring matching dimensions and device types?

import torch
import torch.nn as nn
import torch.nn.functional as F
class MFB(nn.Module):
    def __init__(self,img_feat_size, ques_feat_size, is_first, MFB_K, MFB_O, DROPOUT_R):
        super(MFB, self).__init__()
        #self.__C = __C
        self.MFB_K = MFB_K
        self.MFB_O = MFB_O
        self.DROPOUT_R = DROPOUT_R

        self.is_first = is_first
        self.proj_i = nn.Linear(img_feat_size, MFB_K * MFB_O)
        self.proj_q = nn.Linear(ques_feat_size, MFB_K * MFB_O)

        self.dropout = nn.Dropout(DROPOUT_R)
        self.pool = nn.AvgPool1d(MFB_K, stride = MFB_K)

    def forward(self, img_feat, ques_feat, exp_in=1):
        batch_size = img_feat.shape[0]
        img_feat = self.proj_i(img_feat)                # (N, C, K*O)
        ques_feat = self.proj_q(ques_feat)              # (N, 1, K*O)

        exp_out = img_feat * ques_feat             # (N, C, K*O)
        exp_out = self.dropout(exp_out) if self.is_first else self.dropout(exp_out * exp_in)     # (N, C, K*O)
        z = self.pool(exp_out) * self.MFB_K         # (N, C, O)
        z = torch.sqrt(F.relu(z)) - torch.sqrt(F.relu(-z))
        z = F.normalize(z.view(batch_size, -1))         # (N, C*O)
        z = z.view(batch_size, -1, self.MFB_O)      # (N, C, O)
        return z

#MFB -> Multimodal Factorized Bilinear Pooling
#used to model complex interactions between features like image and text
#MFB_K -> Number Of factors, MFB_O -> Output size,
#Init initializes linear projection layers for image and question features , dropout layer and average pooling layer

#Forward:

#exp_in = input expansion factor (default - 1)
#Linear projection of image and question features to factorized bilinear form
#Element-wise multiplication of image and question features
#APply Dropout
#Average pooling along the factorized dimension (MFB_K) to reduce the size of the output tensor
#Element-wise operations to compute the final output (z) using square root and normalization using Relu.
#The final output represents the fused representation of image and question features.
data = data[~data['Name'].isin(outliers)]
len(sample_dataset_new)

torch.manual_seed(123)
t_p,v_p = torch.utils.data.random_split(sample_dataset_new,[450,50])

# torch.manual_seed(123)
t_p,te_p = torch.utils.data.random_split(t_p,[340,110])

t_p[1]["processed_img"].shape
t_p[1]['processed_txt'].shape
t_p[1]['processed_rag'].shape

(768,)
    class Classifier(pl.LightningModule):

    def __init__(self):
      super().__init__()
      self.MFB = MFB(512,768,True,256,64,0.1)
      self.fin_y_shape = torch.nn.Linear(768,512)
      self.fin_old = torch.nn.Linear(64,2)
      self.fin = torch.nn.Linear(16 * 768, 64)
      self.fin_inten = torch.nn.Linear(2048,6)
      self.fin_e1 = torch.nn.Linear(64,2)
      self.fin_e2 = torch.nn.Linear(64,2)
      self.fin_e3 = torch.nn.Linear(64,2)
      self.fin_e4 = torch.nn.Linear(64,2)
      self.fin_e5 = torch.nn.Linear(64,2)
      self.fin_e6 = torch.nn.Linear(64,2)
      self.fin_e7 = torch.nn.Linear(64,2)
      self.fin_e8 = torch.nn.Linear(64,2)
      self.fin_e9 = torch.nn.Linear(64,2)
      # self.reduce_x = torch.nn.Linear(768, 512)
      # self.reduce_rag = torch.nn.Linear(768, 512)

      self.validation_step_outputs = []
      self.test_step_outputs = []

    def forward(self, x,y,rag):
        x_,y_,rag_ = x,y,rag
        print("x.shape", x.shape)
        print("y.shape",y.shape)
        print("rag.shape",rag.shape)

        # x = self.reduce_x(x)
        # rag = self.reduce_rag(rag)

        # print("x.shape", x.shape)
        # print("y.shape",y.shape)
        # print("rag.shape",rag.shape)
        # z = self.MFB(torch.unsqueeze(y, axis=1), torch.unsqueeze(rag, axis=1))
        # z_rag = self.MFB(torch.unsqueeze(y, axis=1),torch.unsqueeze(rag, axis=1))
        # z_con = torch.cat((z, z_rag), dim=1)

        # Concatenate x with y and then with rag

        z= torch.cat((torch.cat((x, y), dim=1), rag), dim=1)

        # Pass concatenated x with y and x with rag through your network
        z_new = torch.squeeze(z,dim=1)
        print("z_new shape",z_new)

        c_inten = self.fin_inten(z_new)
        c_e1 = self.fin_e1(z_new)
        c_e2 = self.fin_e2(z_new)
        c_e3 = self.fin_e3(z_new)
        c_e4 = self.fin_e4(z_new)
        c_e5 = self.fin_e5(z_new)
        c_e6 = self.fin_e6(z_new)
        c_e7 = self.fin_e7(z_new)
        c_e8 = self.fin_e8(z_new)
        c_e9 = self.fin_e9(z_new)
        c = self.fin_old(z_new)

        # print("z.shape",z.shape)
        # print("z_new shape",z_new.shape)
        # print("intensity error:", c_inten.shape)
        # print("output:", c.shape)
        # print("c_e1:", c_e1.shape)
        # print("c_e2:", c_e2.shape)
        # print("c_e3:", c_e3.shape)
        # print("c_e4:", c_e4.shape)
        # print("c_e5:", c_e5.shape)
        # print("c_e6:", c_e6.shape)
        # print("c_e7:", c_e7.shape)
        # print("c_e8:", c_e8.shape)
        # print("c_e9:", c_e9.shape)
        # print("logits.shape",logits.shape)

        output = torch.log_softmax(c, dim=1)
        c_inten = torch.log_softmax(c_inten, dim=1)
        c_e1 = torch.log_softmax(c_e1, dim=1)
        c_e2 = torch.log_softmax(c_e2, dim=1)
        c_e3 = torch.log_softmax(c_e3, dim=1)
        c_e4 = torch.log_softmax(c_e4, dim=1)
        c_e5 = torch.log_softmax(c_e5, dim=1)
        c_e6 = torch.log_softmax(c_e6, dim=1)
        c_e7 = torch.log_softmax(c_e7, dim=1)
        c_e8 = torch.log_softmax(c_e8, dim=1)
        c_e9 = torch.log_softmax(c_e9, dim=1)

        return output,c_inten,c_e1,c_e2,c_e3,c_e4,c_e5,c_e6,c_e7,c_e8,c_e9

    def cross_entropy_loss(self, logits, labels):
      print("logits.shape",logits.shape)
      return F.nll_loss(logits, labels)

    def training_step(self, train_batch, batch_idx):
        #lab,txt,rag,img,name,per,iro,alli,ana,inv,meta,puns,sat,hyp= train_batch
        lab,txt,rag,img,name,intensity,e1,e2,e3,e4,e5,e6,e7,e8,e9= train_batch
        #logit_offen,a,b,c,d,e,f,g,h,i,logit_inten_target= self.forward(txt,img,rag)

        lab = train_batch[lab].unsqueeze(1)
        #print(lab)
        txt = train_batch[txt]
        rag = train_batch[rag]
        img = train_batch[img]
        name= train_batch[name]
        intensity = train_batch[intensity].unsqueeze(1)
        e1 = train_batch[e1].unsqueeze(1)
        e2 = train_batch[e2].unsqueeze(1)
        e3 = train_batch[e3].unsqueeze(1)
        e4 = train_batch[e4].unsqueeze(1)
        e5 = train_batch[e5].unsqueeze(1)
        e6 = train_batch[e6].unsqueeze(1)
        e7 = train_batch[e7].unsqueeze(1)
        e8 = train_batch[e8].unsqueeze(1)
        e9 = train_batch[e9].unsqueeze(1)

        lab = F.one_hot(lab, num_classes=2)
        intensity = torch.abs(intensity)
        intensity = F.one_hot(intensity, num_classes=6)  # Assuming you have 6 classes
        e1 = F.one_hot(e1,num_classes = 2)
        e2 = F.one_hot(e2,num_classes = 2)
        e3 = F.one_hot(e3,num_classes = 2)
        e4 = F.one_hot(e4,num_classes = 2)
        e5 = F.one_hot(e5,num_classes = 2)
        e6 = F.one_hot(e6,num_classes = 2)
        e7 = F.one_hot(e7,num_classes = 2)
        e8 = F.one_hot(e8,num_classes = 2)
        e9 = F.one_hot(e9,num_classes = 2)

        lab = lab.squeeze(dim=1)
        intensity = intensity.squeeze(dim=1)
        e1 = e1.squeeze(dim=1)
        e2 = e2.squeeze(dim=1)
        e3 = e3.squeeze(dim=1)
        e4 = e4.squeeze(dim=1)
        e5 = e5.squeeze(dim=1)
        e6 = e6.squeeze(dim=1)
        e7 = e7.squeeze(dim=1)
        e8 = e8.squeeze(dim=1)
        e9 = e9.squeeze(dim=1)

        logit_offen,logit_inten_target,a,b,c,d,e,f,g,h,i= self.forward(txt,img,rag)

        loss1 = self.cross_entropy_loss(logit_offen, lab)
        loss17 = self.cross_entropy_loss(logit_inten_target, intensity)
        loss4 = self.cross_entropy_loss(a, e1)
        loss5 = self.cross_entropy_loss(b, e2)
        loss6 = self.cross_entropy_loss(c, e3)
        loss7 = self.cross_entropy_loss(d, e4)
        loss8 = self.cross_entropy_loss(e, e5)
        loss9 = self.cross_entropy_loss(f, e6)
        loss10 = self.cross_entropy_loss(g, e7)
        loss11 = self.cross_entropy_loss(h, e8)
        loss12 = self.cross_entropy_loss(i, e9)

        loss = loss1 + loss4 + loss5 + loss6 + loss7 + loss8 +loss9 + loss10 +loss11 +loss12 + loss17

        self.log('train_loss', loss)
        return loss

    def validation_step(self, val_batch, batch_idx):
        #lab,txt,rag,img,name,per,iro,alli,ana,inv,meta,puns,sat,hyp = val_batch
        lab,txt,rag,img,name,intensity,e1,e2,e3,e4,e5,e6,e7,e8,e9= val_batch
        lab = val_batch[lab].unsqueeze(1)
        #print(lab)
        txt = val_batch[txt]
        rag = val_batch[rag]
        img = val_batch[img]
        name = val_batch[name]
        intensity = val_batch[intensity].unsqueeze(1)
        e1 = val_batch[e1].unsqueeze(1)
        e2 = val_batch[e2].unsqueeze(1)
        e3 = val_batch[e3].unsqueeze(1)
        e4 = val_batch[e4].unsqueeze(1)
        e5 = val_batch[e5].unsqueeze(1)
        e6 = val_batch[e6].unsqueeze(1)
        e7 = val_batch[e7].unsqueeze(1)
        e8 = val_batch[e8].unsqueeze(1)
        e9 = val_batch[e9].unsqueeze(1)

        lab = F.one_hot(lab, num_classes=2)

        intensity = torch.abs(intensity)
        intensity = F.one_hot(intensity, num_classes=6)
        e1 = F.one_hot(e1,num_classes = 2)
        e2 = F.one_hot(e2,num_classes = 2)
        e3 = F.one_hot(e3,num_classes = 2)
        e4 = F.one_hot(e4,num_classes = 2)
        e5 = F.one_hot(e5,num_classes = 2)
        e6 = F.one_hot(e6,num_classes = 2)
        e7 = F.one_hot(e7,num_classes = 2)
        e8 = F.one_hot(e8,num_classes = 2)
        e9 = F.one_hot(e9,num_classes = 2)
        lab = lab.squeeze(dim=1)

        intensity = intensity.squeeze(dim = 1)
        e1 = e1.squeeze(dim=1)
        e2 = e2.squeeze(dim=1)
        e3 = e3.squeeze(dim=1)
        e4 = e4.squeeze(dim=1)
        e5 = e5.squeeze(dim=1)
        e6 = e6.squeeze(dim=1)
        e7 = e7.squeeze(dim=1)
        e8 = e8.squeeze(dim=1)
        e9 = e9.squeeze(dim=1)

        logits,inten,a,b,c,d,e,f,g,h,i = self.forward(txt,img,rag)

        logits=logits.float()

        tmp = np.argmax(logits.detach().cpu().numpy(),axis=1)
        loss = self.cross_entropy_loss(logits, lab)
        lab = lab.detach().cpu().numpy()
        self.log('val_acc', accuracy_score(lab,tmp))
        self.log('val_roc_auc',roc_auc_score(lab,tmp))
        self.log('val_loss', loss)
        tqdm_dict = {'val_acc': accuracy_score(lab,tmp)}
        self.validation_step_outputs.append({'progress_bar': tqdm_dict,'val_f1 offensive': f1_score(lab,tmp,average='macro')})

        return {
                  'progress_bar': tqdm_dict,
        'val_f1 offensive': f1_score(lab,tmp,average='macro')
        }

    def on_validation_epoch_end(self):
      outs = []
      outs14=[]
      for out in self.validation_step_outputs:
        outs.append(out['progress_bar']['val_acc'])
        outs14.append(out['val_f1 offensive'])
      self.log('val_acc_all_offn', sum(outs)/len(outs))
      self.log('val_f1 offensive', sum(outs14)/len(outs14))
      print(f'***val_acc_all_offn at epoch end {sum(outs)/len(outs)}****')
      print(f'***val_f1 offensive at epoch end {sum(outs14)/len(outs14)}****')
      self.validation_step_outputs.clear()

    def test_step(self, batch, batch_idx):
        lab,txt,rag,img,name,intensity,e1,e2,e3,e4,e5,e6,e7,e8,e9= batch
        lab = batch[lab].unsqueeze(1)
        #print(lab)
        txt = batch[txt]
        rag = batch[rag]
        img = batch[img]
        name = batch[name]
        intensity = batch[intensity].unsqueeze(1)
        e1 = batch[e1].unsqueeze(1)
        e2 = batch[e2].unsqueeze(1)
        e3 = batch[e3].unsqueeze(1)
        e4 = batch[e4].unsqueeze(1)
        e5 = batch[e5].unsqueeze(1)
        e6 = batch[e6].unsqueeze(1)
        e7 = batch[e7].unsqueeze(1)
        e8 = batch[e8].unsqueeze(1)
        e9 = batch[e9].unsqueeze(1)
        lab = F.one_hot(lab, num_classes=2)
        intensity = F.one_hot(intensity, num_classes=6)
        e1 = F.one_hot(e1,num_classes = 2)
        e2 = F.one_hot(e2,num_classes = 2)
        e3 = F.one_hot(e3,num_classes = 2)
        e4 = F.one_hot(e4,num_classes = 2)
        e5 = F.one_hot(e5,num_classes = 2)
        e6 = F.one_hot(e6,num_classes = 2)
        e7 = F.one_hot(e7,num_classes = 2)
        e8 = F.one_hot(e8,num_classes = 2)
        e9 = F.one_hot(e9,num_classes = 2)
        lab = lab.squeeze(dim=1)
        intensity = intensity.squeeze(dim=1)
        e1 = e1.squeeze(dim=1)
        e2 = e2.squeeze(dim=1)
        e3 = e3.squeeze(dim=1)
        e4 = e4.squeeze(dim=1)
        e5 = e5.squeeze(dim=1)
        e6 = e6.squeeze(dim=1)
        e7 = e7.squeeze(dim=1)
        e8 = e8.squeeze(dim=1)
        e9 = e9.squeeze(dim=1)

        logits,inten,a,b,c,d,e,f,g,h,i= self.forward(txt,img,rag)

        logits = logits.float()
        tmp = np.argmax(logits.detach().cpu().numpy(force=True),axis=-1)
        loss = self.cross_entropy_loss(logits, lab)
        lab = lab.detach().cpu().numpy()
        self.log('test_acc', accuracy_score(lab,tmp))
        self.log('test_roc_auc',roc_auc_score(lab,tmp))
        self.log('test_loss', loss)
        tqdm_dict = {'test_acc': accuracy_score(lab,tmp)}
        self.test_step_outputs.append({'progress_bar': tqdm_dict,'test_acc': accuracy_score(lab,tmp), 'test_f1_score': f1_score(lab,tmp,average='macro')})
        return {
                  'progress_bar': tqdm_dict,
                  'test_acc': accuracy_score(lab,tmp),
                  'test_f1_score': f1_score(lab,tmp,average='macro')
        }
    def on_test_epoch_end(self):
        # OPTIONAL
        outs = []
        outs1,outs2,outs3,outs4,outs5,outs6,outs7,outs8,outs9,outs10,outs11,outs12,outs13,outs14 = \
        [],[],[],[],[],[],[],[],[],[],[],[],[],[]
        for out in self.test_step_outputs:
          outs.append(out['test_acc'])
          outs2.append(out['test_f1_score'])
        self.log('test_acc', sum(outs)/len(outs))
        self.log('test_f1_score', sum(outs2)/len(outs2))
        self.test_step_outputs.clear()

    def configure_optimizers(self):
      # optimizer = torch.optim.Adam(self.parameters(), lr=3e-2)
      optimizer = torch.optim.Adam(self.parameters(), lr=1e-5)

      return optimizer

  """
  Main Model:
  Initialize
  Forward Pass
  Training Step
  Validation Step
  Testing Step

  Pp
  """

  class HmDataModule(pl.LightningDataModule):

    def setup(self, stage):
      self.hm_train = t_p
      self.hm_val = v_p
      # self.hm_test = test
      self.hm_test = te_p

    def train_dataloader(self):
      return DataLoader(self.hm_train, batch_size=10, drop_last=True)

    def val_dataloader(self):
      return DataLoader(self.hm_val, batch_size=10, drop_last=True)

    def test_dataloader(self):
      return DataLoader(self.hm_test, batch_size=10, drop_last=True)

  data_module = HmDataModule()
  checkpoint_callback = ModelCheckpoint(
      monitor='val_acc_all_offn',
      dirpath='mrinal/',
      filename='epoch{epoch:02d}-val_f1_all_offn{val_acc_all_offn:.2f}',
      auto_insert_metric_name=False,
      save_top_k=1,
      mode="max",
  )
  all_callbacks = []
  all_callbacks.append(checkpoint_callback)
  # train
  from pytorch_lightning import seed_everything
  seed_everything(42, workers=True)
  hm_model = Classifier()
  gpus=1
  #if torch.cuda.is_available():gpus=0
  trainer = pl.Trainer(deterministic=True,max_epochs=10,precision=16,callbacks=all_callbacks)
  trainer.fit(hm_model, data_module)
INFO:lightning_fabric.utilities.seed:Seed set to 42
/usr/local/lib/python3.10/dist-packages/lightning_fabric/connector.py:563: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/accelerator_connector.py:556: You passed `Trainer(accelerator='cpu', precision='16-mixed')` but AMP with fp16 is not supported on CPU. Using `precision='bf16-mixed'` instead.
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
WARNING:pytorch_lightning.loggers.tensorboard:Missing logger folder: /content/LLaVA/lightning_logs
INFO:pytorch_lightning.callbacks.model_summary:
   | Name        | Type   | Params
----------------------------------------
0  | MFB         | MFB    | 21.0 M
1  | fin_y_shape | Linear | 393 K 
2  | fin_old     | Linear | 130   
3  | fin         | Linear | 786 K 
4  | fin_inten   | Linear | 12.3 K
5  | fin_e1      | Linear | 130   
6  | fin_e2      | Linear | 130   
7  | fin_e3      | Linear | 130   
8  | fin_e4      | Linear | 130   
9  | fin_e5      | Linear | 130   
10 | fin_e6      | Linear | 130   
11 | fin_e7      | Linear | 130   
12 | fin_e8      | Linear | 130   
13 | fin_e9      | Linear | 130   
----------------------------------------
22.2 M    Trainable params
0         Non-trainable params
22.2 M    Total params
88.792    Total estimated model params size (MB)
Sanity Checking DataLoader 0:   0%
 0/2 [00:00<?, ?it/s]
x.shape torch.Size([10, 768])
y.shape torch.Size([10, 512])
rag.shape torch.Size([10, 768])
z_new shape tensor([[ 0.0144, -0.1677,  0.1100,  ..., -0.1818,  0.4250, -0.2985],
        [-0.2105, -0.1002, -0.0113,  ..., -0.0639,  0.3789, -0.0553],
        [-0.1221, -0.1026, -0.3277,  ..., -0.3724,  0.1562,  0.0286],
        ...,
        [-0.0950,  0.3957,  0.3603,  ..., -0.2121,  0.6465, -0.1983],
        [ 0.0080,  0.2380, -0.0409,  ..., -0.2565,  0.0946, -0.1098],
        [ 0.1351, -0.3463,  0.3371,  ..., -0.2283,  0.4667,  0.0087]])
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-29-279b4c8e1163> in <cell line: 369>()
    367 #if torch.cuda.is_available():gpus=0
    368 trainer = pl.Trainer(deterministic=True,max_epochs=10,precision=16,callbacks=all_callbacks)
--> 369 trainer.fit(hm_model, data_module)

14 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    112 
    113     def forward(self, input: Tensor) -> Tensor:
--> 114         return F.linear(input, self.weight, self.bias)
    115 
    116     def extra_repr(self) -> str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (10x2048 and 64x2)
harshalDharpure commented 6 months ago

I am new to this can anyone help me in Pytorch

ms-dot-k commented 6 months ago

Please check the dimensions of the defined new variables (e.g., linear layer). Currently I think the 'fin_e#' should have the size of 2048x2 instead of 64x2.

harshalDharpure commented 6 months ago

Yes I have checked it But Now it is Giving me another error I have change the code and make everywhere 2048 x2 But I got New error can you please look into it

lable():gpus=0 389 trainer = pl.Trainer(deterministic=True,max_epochs=20,precision=16,callbacks=all_callbacks) --> 390 trainer.fit(hm_model, data_module)

13 frames /usr/local/lib/python3.10/dist-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction) 2702 if size_average is not None or reduce is not None: 2703 reduction = _Reduction.legacy_get_string(size_average, reduce) -> 2704 return torch._C._nn.nll_loss_nd(input, target, weight, _Reduction.get_enum(reduction), ignore_index) 2705 2706

RuntimeError: 0D or 1D target tensor expected, multi-target not supported

harshalDharpure commented 6 months ago

It's been one week since I tried to solve the problem, but nothing helped me out. I have also asked on different platforms about the same doubt, but nothing worked out.

harshalDharpure commented 6 months ago

This is the Updated Code which I have change the Dimension, I have also got Some suggestion on this code about the error which i have posted above Now , but I didn't understand Properly, can you please explain it where I have to update in the code.

From Pytorch community --

This error points to the target used in nn.CrossEntropyLoss or nn.NLLLoss having an invalid shape. Your code is unfortunately not executable, so it’s not trivial to copy/paste it to fix other issues.

From stackoverflow--

You need to focus, I previously told you to update the shape of fin_inten which you did. Now: you are throwing z_new into fin_inten which is a nn.Linear(2048,6) and right after into fin_e1 which is a nn.Linear(64,2). Your code is not consistent, you can't just throw random tensors into functions and hope for the best!

  class Classifier(pl.LightningModule):

    def __init__(self):
      super().__init__()
      self.MFB = MFB(512,768,True,256,64,0.1)
      self.fin_y_shape = torch.nn.Linear(768,512)
      self.fin_old = torch.nn.Linear(2048,2)
      self.fin = torch.nn.Linear(16 * 768, 64)
      self.fin_inten = torch.nn.Linear(2048,6)
      self.fin_e1 = torch.nn.Linear(2048,2)
      self.fin_e2 = torch.nn.Linear(2048,2)
      self.fin_e3 = torch.nn.Linear(2048,2)
      self.fin_e4 = torch.nn.Linear(2048,2)
      self.fin_e5 = torch.nn.Linear(2048,2)
      self.fin_e6 = torch.nn.Linear(2048,2)
      self.fin_e7 = torch.nn.Linear(2048,2)
      self.fin_e8 = torch.nn.Linear(2048,2)
      self.fin_e9 = torch.nn.Linear(2048,2)
      # self.reduce_x = torch.nn.Linear(768, 512)
      # self.reduce_rag = torch.nn.Linear(768, 512)

      self.validation_step_outputs = []
      self.test_step_outputs = []

    def forward(self, x,y,rag):
        x_,y_,rag_ = x,y,rag
        print("x.shape", x.shape)
        print("y.shape",y.shape)
        # print("rag.shape",rag.shape)

        # x = self.reduce_x(x)
        # rag = self.reduce_rag(rag)

        # print("x.shape", x.shape)
        # print("y.shape",y.shape)
        # print("rag.shape",rag.shape)
        # z = self.MFB(torch.unsqueeze(y, axis=1), torch.unsqueeze(rag, axis=1))
        # z_rag = self.MFB(torch.unsqueeze(y, axis=1),torch.unsqueeze(rag, axis=1))
        # z_con = torch.cat((z, z_rag), dim=1)

        # Concatenate x with y and then with rag

        # z= torch.cat((torch.cat((x, y), dim=1), rag), dim=1)

        # # Pass concatenated x with y and x with rag through your network
        # z_new = torch.squeeze(z,dim=1)
        # print("z_new shape",z_new)

        z = torch.cat((x, y, rag), dim=1)
        z_new = torch.squeeze(z, dim=1)

        c_inten = self.fin_inten(z_new)
        c_e1 = self.fin_e1(z_new)
        c_e2 = self.fin_e2(z_new)
        c_e3 = self.fin_e3(z_new)
        c_e4 = self.fin_e4(z_new)
        c_e5 = self.fin_e5(z_new)
        c_e6 = self.fin_e6(z_new)
        c_e7 = self.fin_e7(z_new)
        c_e8 = self.fin_e8(z_new)
        c_e9 = self.fin_e9(z_new)
        c = self.fin_old(z_new)

        # print("z.shape",z.shape)
        # print("z_new shape",z_new.shape)
        # print("intensity error:", c_inten.shape)
        # print("output:", c.shape)
        # print("c_e1:", c_e1.shape)
        # print("c_e2:", c_e2.shape)
        # print("c_e3:", c_e3.shape)
        # print("c_e4:", c_e4.shape)
        # print("c_e5:", c_e5.shape)
        # print("c_e6:", c_e6.shape)
        # print("c_e7:", c_e7.shape)
        # print("c_e8:", c_e8.shape)
        # print("c_e9:", c_e9.shape)
        # print("logits.shape",logits.shape)

        output = torch.log_softmax(c, dim=1)
        c_inten = torch.log_softmax(c_inten, dim=1)
        c_e1 = torch.log_softmax(c_e1, dim=1)
        c_e2 = torch.log_softmax(c_e2, dim=1)
        c_e3 = torch.log_softmax(c_e3, dim=1)
        c_e4 = torch.log_softmax(c_e4, dim=1)
        c_e5 = torch.log_softmax(c_e5, dim=1)
        c_e6 = torch.log_softmax(c_e6, dim=1)
        c_e7 = torch.log_softmax(c_e7, dim=1)
        c_e8 = torch.log_softmax(c_e8, dim=1)
        c_e9 = torch.log_softmax(c_e9, dim=1)

        return output,c_inten,c_e1,c_e2,c_e3,c_e4,c_e5,c_e6,c_e7,c_e8,c_e9

    def cross_entropy_loss(self, logits, labels):

        return F.nll_loss(logits, labels)

    def training_step(self, train_batch, batch_idx):
        #lab,txt,rag,img,name,per,iro,alli,ana,inv,meta,puns,sat,hyp= train_batch
        lab,txt,rag,img,name,intensity,e1,e2,e3,e4,e5,e6,e7,e8,e9= train_batch
        #logit_offen,a,b,c,d,e,f,g,h,i,logit_inten_target= self.forward(txt,img,rag)

        lab = train_batch[lab].unsqueeze(1)
        #print(lab)
        txt = train_batch[txt]
        rag = train_batch[rag]
        img = train_batch[img]
        name= train_batch[name]
        intensity = train_batch[intensity].unsqueeze(1)
        e1 = train_batch[e1].unsqueeze(1)
        e2 = train_batch[e2].unsqueeze(1)
        e3 = train_batch[e3].unsqueeze(1)
        e4 = train_batch[e4].unsqueeze(1)
        e5 = train_batch[e5].unsqueeze(1)
        e6 = train_batch[e6].unsqueeze(1)
        e7 = train_batch[e7].unsqueeze(1)
        e8 = train_batch[e8].unsqueeze(1)
        e9 = train_batch[e9].unsqueeze(1)

        lab = F.one_hot(lab, num_classes=2)
        intensity = torch.abs(intensity)
        intensity = F.one_hot(intensity, num_classes=6)  # Assuming you have 6 classes
        e1 = F.one_hot(e1,num_classes = 2)
        e2 = F.one_hot(e2,num_classes = 2)
        e3 = F.one_hot(e3,num_classes = 2)
        e4 = F.one_hot(e4,num_classes = 2)
        e5 = F.one_hot(e5,num_classes = 2)
        e6 = F.one_hot(e6,num_classes = 2)
        e7 = F.one_hot(e7,num_classes = 2)
        e8 = F.one_hot(e8,num_classes = 2)
        e9 = F.one_hot(e9,num_classes = 2)

        lab = lab.squeeze(dim=1)
        intensity = intensity.squeeze(dim=1)
        e1 = e1.squeeze(dim=1)
        e2 = e2.squeeze(dim=1)
        e3 = e3.squeeze(dim=1)
        e4 = e4.squeeze(dim=1)
        e5 = e5.squeeze(dim=1)
        e6 = e6.squeeze(dim=1)
        e7 = e7.squeeze(dim=1)
        e8 = e8.squeeze(dim=1)
        e9 = e9.squeeze(dim=1)

        logit_offen,logit_inten_target,a,b,c,d,e,f,g,h,i= self.forward(txt,img,rag)

        loss1 = self.cross_entropy_loss(logit_offen, lab)
        loss17 = self.cross_entropy_loss(logit_inten_target, intensity)
        loss4 = self.cross_entropy_loss(a, e1)
        loss5 = self.cross_entropy_loss(b, e2)
        loss6 = self.cross_entropy_loss(c, e3)
        loss7 = self.cross_entropy_loss(d, e4)
        loss8 = self.cross_entropy_loss(e, e5)
        loss9 = self.cross_entropy_loss(f, e6)
        loss10 = self.cross_entropy_loss(g, e7)
        loss11 = self.cross_entropy_loss(h, e8)
        loss12 = self.cross_entropy_loss(i, e9)

        loss = loss1 + loss4 + loss5 + loss6 + loss7 + loss8 +loss9 + loss10 +loss11 +loss12 + loss17

        self.log('train_loss', loss)
        return loss

    def validation_step(self, val_batch, batch_idx):
        #lab,txt,rag,img,name,per,iro,alli,ana,inv,meta,puns,sat,hyp = val_batch
        lab,txt,rag,img,name,intensity,e1,e2,e3,e4,e5,e6,e7,e8,e9= val_batch
        lab = val_batch[lab].unsqueeze(1)
        #print(lab)
        txt = val_batch[txt]
        rag = val_batch[rag]
        img = val_batch[img]
        name = val_batch[name]
        intensity = val_batch[intensity].unsqueeze(1)
        e1 = val_batch[e1].unsqueeze(1)
        e2 = val_batch[e2].unsqueeze(1)
        e3 = val_batch[e3].unsqueeze(1)
        e4 = val_batch[e4].unsqueeze(1)
        e5 = val_batch[e5].unsqueeze(1)
        e6 = val_batch[e6].unsqueeze(1)
        e7 = val_batch[e7].unsqueeze(1)
        e8 = val_batch[e8].unsqueeze(1)
        e9 = val_batch[e9].unsqueeze(1)

        lab = F.one_hot(lab, num_classes=2)

        intensity = torch.abs(intensity)
        intensity = F.one_hot(intensity, num_classes=6)
        e1 = F.one_hot(e1,num_classes = 2)
        e2 = F.one_hot(e2,num_classes = 2)
        e3 = F.one_hot(e3,num_classes = 2)
        e4 = F.one_hot(e4,num_classes = 2)
        e5 = F.one_hot(e5,num_classes = 2)
        e6 = F.one_hot(e6,num_classes = 2)
        e7 = F.one_hot(e7,num_classes = 2)
        e8 = F.one_hot(e8,num_classes = 2)
        e9 = F.one_hot(e9,num_classes = 2)
        lab = lab.squeeze(dim=1)

        intensity = intensity.squeeze(dim = 1)
        e1 = e1.squeeze(dim=1)
        e2 = e2.squeeze(dim=1)
        e3 = e3.squeeze(dim=1)
        e4 = e4.squeeze(dim=1)
        e5 = e5.squeeze(dim=1)
        e6 = e6.squeeze(dim=1)
        e7 = e7.squeeze(dim=1)
        e8 = e8.squeeze(dim=1)
        e9 = e9.squeeze(dim=1)

        logits,inten,a,b,c,d,e,f,g,h,i = self.forward(txt,img,rag)

        logits=logits.float()

        tmp = np.argmax(logits.detach().cpu().numpy(),axis=1)
        loss = self.cross_entropy_loss(logits, lab)
        lab = lab.detach().cpu().numpy()
        self.log('val_acc', accuracy_score(lab,tmp))
        self.log('val_roc_auc',roc_auc_score(lab,tmp))
        self.log('val_loss', loss)
        tqdm_dict = {'val_acc': accuracy_score(lab,tmp)}
        self.validation_step_outputs.append({'progress_bar': tqdm_dict,'val_f1 offensive': f1_score(lab,tmp,average='macro')})

        return {
                  'progress_bar': tqdm_dict,
        'val_f1 offensive': f1_score(lab,tmp,average='macro')
        }

    def on_validation_epoch_end(self):
      outs = []
      outs14=[]
      for out in self.validation_step_outputs:
        outs.append(out['progress_bar']['val_acc'])
        outs14.append(out['val_f1 offensive'])
      self.log('val_acc_all_offn', sum(outs)/len(outs))
      self.log('val_f1 offensive', sum(outs14)/len(outs14))
      print(f'***val_acc_all_offn at epoch end {sum(outs)/len(outs)}****')
      print(f'***val_f1 offensive at epoch end {sum(outs14)/len(outs14)}****')
      self.validation_step_outputs.clear()

    def test_step(self, batch, batch_idx):
        lab,txt,rag,img,name,intensity,e1,e2,e3,e4,e5,e6,e7,e8,e9= batch
        lab = batch[lab].unsqueeze(1)
        #print(lab)
        txt = batch[txt]
        rag = batch[rag]
        img = batch[img]
        name = batch[name]
        intensity = batch[intensity].unsqueeze(1)
        e1 = batch[e1].unsqueeze(1)
        e2 = batch[e2].unsqueeze(1)
        e3 = batch[e3].unsqueeze(1)
        e4 = batch[e4].unsqueeze(1)
        e5 = batch[e5].unsqueeze(1)
        e6 = batch[e6].unsqueeze(1)
        e7 = batch[e7].unsqueeze(1)
        e8 = batch[e8].unsqueeze(1)
        e9 = batch[e9].unsqueeze(1)
        lab = F.one_hot(lab, num_classes=2)
        intensity = F.one_hot(intensity, num_classes=6)
        e1 = F.one_hot(e1,num_classes = 2)
        e2 = F.one_hot(e2,num_classes = 2)
        e3 = F.one_hot(e3,num_classes = 2)
        e4 = F.one_hot(e4,num_classes = 2)
        e5 = F.one_hot(e5,num_classes = 2)
        e6 = F.one_hot(e6,num_classes = 2)
        e7 = F.one_hot(e7,num_classes = 2)
        e8 = F.one_hot(e8,num_classes = 2)
        e9 = F.one_hot(e9,num_classes = 2)
        lab = lab.squeeze(dim=1)
        intensity = intensity.squeeze(dim=1)
        e1 = e1.squeeze(dim=1)
        e2 = e2.squeeze(dim=1)
        e3 = e3.squeeze(dim=1)
        e4 = e4.squeeze(dim=1)
        e5 = e5.squeeze(dim=1)
        e6 = e6.squeeze(dim=1)
        e7 = e7.squeeze(dim=1)
        e8 = e8.squeeze(dim=1)
        e9 = e9.squeeze(dim=1)

        logits,inten,a,b,c,d,e,f,g,h,i= self.forward(txt,img,rag)

        logits = logits.float()
        tmp = np.argmax(logits.detach().cpu().numpy(force=True),axis=-1)
        loss = self.cross_entropy_loss(logits, lab)
        lab = lab.detach().cpu().numpy()
        self.log('test_acc', accuracy_score(lab,tmp))
        self.log('test_roc_auc',roc_auc_score(lab,tmp))
        self.log('test_loss', loss)
        tqdm_dict = {'test_acc': accuracy_score(lab,tmp)}
        self.test_step_outputs.append({'progress_bar': tqdm_dict,'test_acc': accuracy_score(lab,tmp), 'test_f1_score': f1_score(lab,tmp,average='macro')})
        return {
                  'progress_bar': tqdm_dict,
                  'test_acc': accuracy_score(lab,tmp),
                  'test_f1_score': f1_score(lab,tmp,average='macro')
        }
    def on_test_epoch_end(self):
        # OPTIONAL
        outs = []
        outs1,outs2,outs3,outs4,outs5,outs6,outs7,outs8,outs9,outs10,outs11,outs12,outs13,outs14 = \
        [],[],[],[],[],[],[],[],[],[],[],[],[],[]
        for out in self.test_step_outputs:
          outs.append(out['test_acc'])
          outs2.append(out['test_f1_score'])
        self.log('test_acc', sum(outs)/len(outs))
        self.log('test_f1_score', sum(outs2)/len(outs2))
        self.test_step_outputs.clear()

    def configure_optimizers(self):
      # optimizer = torch.optim.Adam(self.parameters(), lr=3e-2)
      optimizer = torch.optim.Adam(self.parameters(), lr=1e-5)

      return optimizer

  """
  Main Model:
  Initialize
  Forward Pass
  Training Step
  Validation Step
  Testing Step

  Pp
  """

  class HmDataModule(pl.LightningDataModule):

    def setup(self, stage):
      self.hm_train = t_p
      self.hm_val = v_p
      # self.hm_test = test
      self.hm_test = te_p

    def train_dataloader(self):
      return DataLoader(self.hm_train, batch_size=10, drop_last=True)

    def val_dataloader(self):
      return DataLoader(self.hm_val, batch_size=10, drop_last=True)

    def test_dataloader(self):
      return DataLoader(self.hm_test, batch_size=10, drop_last=True)

  data_module = HmDataModule()
  checkpoint_callback = ModelCheckpoint(
      monitor='val_acc_all_offn',
      dirpath='mrinal/',
      filename='epoch{epoch:02d}-val_f1_all_offn{val_acc_all_offn:.2f}',
      auto_insert_metric_name=False,
      save_top_k=1,
      mode="max",
  )
  all_callbacks = []
  all_callbacks.append(checkpoint_callback)
  # train
  from pytorch_lightning import seed_everything
  seed_everything(42, workers=True)
  hm_model = Classifier()
  gpus=1
  #if torch.cuda.is_available():gpus=0
  trainer = pl.Trainer(deterministic=True,max_epochs=20,precision=16,callbacks=all_callbacks)
  trainer.fit(hm_model, data_module)