dragen1860 / MAML-Pytorch

Elegant PyTorch implementation of paper Model-Agnostic Meta-Learning (MAML)
MIT License
2.32k stars 422 forks source link

准确率不变 #75

Open shiyao1999 opened 11 months ago

shiyao1999 commented 11 months ago

我使用ALBERT和孪生网络来训练一个主观问题评分模型,训练策略参考的你的代码,孪生网络由双向LSTM和全连接层组成。在训练中,我发现准确率没有提高,一直保持不变。我感觉像是权重没有更新,可能是因为梯度太小导致了权重变化不大。或者,训练策略可能存在问题,但我不确定具体原因。下面是我训练期时的准确率: training `

class MetaTask(nn.Module): def init(self, args): super(MetaTask, self).init() self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.loss_fn = nn.CrossEntropyLoss() self.update_lr = args.update_lr self.meta_lr = args.meta_lr self.finetunning_lr = args.finetunning_lr self.n_way = args.n_way self.k_spt = args.k_spt self.k_qry = args.k_qry self.task_num = args.task_num self.update_step = args.update_step self.update_step_test = args.update_step_test self.net = SubjectiveGradingModel().to(self.device) self.meta_optim = optim.Adam(self.net.parameters(), lr=self.meta_lr)

def forward(self, support_x, support_y, query_x, query_y):
    task_num = len(support_x)
    querysz = len(query_x[0])
    losses_q = [0 for _ in range(self.update_step + 1)]
    corrects = [0 for _ in range(self.update_step + 1)]
    for i in range(task_num):
        self.net.train()
        # 1. run the i-th task and compute loss for k=0
        logits = self.net(support_x[i])
        loss = self.loss_fn(logits, torch.cat(support_y[i], dim=0).long())
        fast_weights = OrderedDict(self.net.named_parameters())
        grad = torch.autograd.grad(loss, fast_weights.values(), retain_graph=True)
        # 输出梯度为None的参数
        # for (name, param), gra in zip(self.net.named_parameters(), grad):
        #     if gra is None:
        #         print("梯度为None的参数:", name)
        fast_weights = OrderedDict(
            (name, param - self.update_lr * grad)
            for ((name, param), grad) in zip(fast_weights.items(), grad)
        )
        # this is the loss and accuracy before first update
        with torch.no_grad():
            self.net.eval()
            logits_q = self.net(query_x[i])
            loss_q = self.loss_fn(logits_q, torch.cat(query_y[i], dim=0).long())
            losses_q[0] += loss_q
            pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
            correct = torch.eq(pred_q, torch.cat(query_y[i], dim=0).long()).sum().item()
            corrects[0] = corrects[0] + correct

        # this is the loss and accuracy after the first update
        with torch.no_grad():
            self.net.eval()
            self.net.load_state_dict(fast_weights, strict=False)
            logits_q = self.net(query_x[i])
            loss_q = self.loss_fn(logits_q, torch.cat(query_y[i], dim=0).long())
            losses_q[1] += loss_q

            pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
            correct = torch.eq(pred_q, torch.cat(query_y[i], dim=0).long()).sum().item()
            corrects[1] = corrects[1] + correct
        self.net.train()
        for k in range(1, self.update_step):
            # 1. run the i-th task and compute loss for k=1~K-1
            self.net.load_state_dict(fast_weights, strict=False)
            logits = self.net(support_x[i])
            loss = self.loss_fn(logits, torch.cat(support_y[i], dim=0).long())
            # 2. compute grad on theta_pi
            fast_weights = OrderedDict(self.net.named_parameters())
            grad = torch.autograd.grad(loss, fast_weights.values(), retain_graph=True)
            # 3. theta_pi = theta_pi - train_lr * grad
            fast_weights = OrderedDict(
                (name, param - self.update_lr * grad)
                for ((name, param), grad) in zip(fast_weights.items(), grad)
            )
            self.net.load_state_dict(fast_weights, strict=False)
            logits_q = self.net(query_x[i])
            # loss_q will be overwritten and just keep the loss_q on last update step.
            loss_q = self.loss_fn(logits_q, torch.cat(query_y[i], dim=0).long())
            losses_q[k + 1] += loss_q

            with torch.no_grad():
                pred_q = F.softmax(logits_q, dim=1).argmax(dim=1)
                correct = torch.eq(pred_q, torch.cat(query_y[i], dim=0).long()).sum().item()  # convert to numpy
                corrects[k + 1] = corrects[k + 1] + correct

    loss_q = losses_q[-1] / task_num
    # optimize theta parameters
    self.meta_optim.zero_grad()
    loss_q.backward(retain_graph=True)
    # print('meta update')
    self.meta_optim.step()
    accs = np.array(corrects) / (querysz * task_num)
    return accs

class SubjectiveGradingModel(nn.Module): def init(self, hidden_size=384): super(SubjectiveGradingModel, self).init()

    # 加载预训练的BERT模型和分词器
    self.bert = AlbertModel.from_pretrained('src/datamoudle/model/albert_chinese_small')
    # 孪生网络
    self.siamese_network = Siamese(max_length=378, embedding_size=hidden_size)

def forward(self, input_data ,weights=None):
    # 将每个字典中的数据拆分成单独的列表
    input_ids_list = [item['input_ids'].squeeze(0).squeeze(0) for item in input_data]
    token_type_ids_list = [item['token_type_ids'].squeeze(0).squeeze(0) for item in input_data]
    attention_mask_list = [item['attention_mask'].squeeze(0).squeeze(0) for item in input_data]
    answer_input_ids_list = [item['answer_input_ids'].squeeze(0).squeeze(0) for item in input_data]
    answer_token_type_ids_list = [item['answer_token_type_ids'].squeeze(0).squeeze(0) for item in input_data]
    answer_attention_mask_list = [item['answer_attention_mask'].squeeze(0).squeeze(0) for item in input_data]

    # 转换成 PyTorch 张量
    input_ids = torch.stack(input_ids_list)
    token_type_ids = torch.stack(token_type_ids_list)
    attention_mask = torch.stack(attention_mask_list)
    answer_input_ids = torch.stack(answer_input_ids_list)
    answer_token_type_ids = torch.stack(answer_token_type_ids_list)
    answer_attention_mask = torch.stack(answer_attention_mask_list)

    outputs = self.bert(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask)
    pooled_output = outputs.last_hidden_state
    cls_output = outputs.pooler_output
    outputs_answer = self.bert(input_ids=answer_input_ids, token_type_ids=answer_token_type_ids, attention_mask=answer_attention_mask)
    pooled_output_answer = outputs_answer.last_hidden_state
    cls_output_answer = outputs_answer.pooler_output

    siamese_output = self.siamese_network(pooled_output, pooled_output_answer, cls_output, cls_output_answer)

    return siamese_output

` 这会是什么原因?

jay152forcreate commented 5 months ago

兄弟,问题解决了吗,我也碰到这个问题,用这个策略训练的时候准确率没什么变化

jay152forcreate commented 5 months ago

兄弟,问题解决了吗,我也碰到这个问题,用这个策略训练的时候准确率没什么变化

jay152forcreate commented 5 months ago

兄弟,问题解决了吗,我也碰到这个问题,用这个策略训练的时候准确率没什么变化

shiyao1999 commented 5 months ago

我的问题是数据集的问题,不是模型的问题。数据集当时有些标点符号不对,我整理了一下就好了

---- 回复的原邮件 ---- | 发件人 | @.> | | 日期 | 2024年06月13日 10:49 | | 收件人 | @.> | | 抄送至 | Jones @.>@.> | | 主题 | Re: [dragen1860/MAML-Pytorch] 准确率不变 (Issue #75) |

兄弟,问题解决了吗,我也碰到这个问题,用这个策略训练的时候准确率没什么变化

— Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>