RidongHan / DocRE-Co-Occur

The Pytorch code of the paper "Document-level Relation Extraction with Relation Correlations" in Neural Networks journal.
5 stars 0 forks source link

from graph import GraphReasonLayer #1

Open zhang-yafei0507 opened 1 year ago

zhang-yafei0507 commented 1 year ago

File "/root/autodl-fs/Correl-RE/model.py", line 6, in from graph import GraphReasonLayer ModuleNotFoundError: No module named 'graph' 这个graph包怎么安装

RidongHan commented 1 year ago

Graph 相关的东西 注释掉就行

代码内的模型 没使用 graph 相关的操作

RidongHan commented 1 year ago

或者 新建一个 graph.py

import torch import torch.nn as nn from transformers import * import torch.nn.functional as F

class GraphConvolutionLayer(nn.Module):

def __init__(self, edges, input_size, hidden_size, graph_drop):
    super(GraphConvolutionLayer, self).__init__()
    self.W = nn.Parameter(torch.Tensor(size=(input_size, hidden_size)))
    self.edges = edges
    self.W_edge = nn.ModuleList([nn.Linear(hidden_size, hidden_size, bias=False) for i in (self.edges)])
    self.bias = nn.Parameter(torch.Tensor(hidden_size))
    self.loop_weight = nn.Parameter(torch.Tensor(input_size, hidden_size))
    self.drop = torch.nn.Dropout(p=graph_drop, inplace=False)
    self.init_weight()

def init_weight(self):
    nn.init.xavier_uniform_(self.W, gain=nn.init.calculate_gain('relu'))
    for m in self.W_edge:
        nn.init.xavier_uniform_(m.weight, gain=nn.init.calculate_gain('relu'))
    nn.init.zeros_(self.bias)
    nn.init.xavier_uniform_(self.loop_weight, gain=nn.init.calculate_gain('relu'))

def forward(self, nodes_embed, node_adj):

    N_bt = nodes_embed.shape[0]
    N = nodes_embed.shape[1]
    h = torch.matmul(nodes_embed, self.W.unsqueeze(0))
    sum_nei = torch.zeros_like(h)
    for edge_type in range(len(self.edges)):
        mask = (node_adj == (edge_type+1)).float()
        sum_nei += torch.matmul(mask, self.W_edge[edge_type](h))

    degs = torch.sum(node_adj > 0, dim=-1).float().clamp(min=1).unsqueeze(dim=-1)
    norm = 1.0 / degs
    dst = sum_nei*norm + self.bias
    dst = dst + torch.matmul(nodes_embed, self.loop_weight)
    out = self.drop(torch.relu(dst))
    return out

class GraphMultiHeadAttention(nn.Module):

def __init__(self, edges, input_size, hidden_size, nhead=4, graph_drop=0.0):
    super(GraphMultiHeadAttention, self).__init__()
    assert hidden_size % nhead == 0
    ho = int(hidden_size/nhead)
    self.head_graph = nn.ModuleList([GraphAttentionLayer(edges, input_size, ho, graph_drop) \
                                    for _ in range(nhead)])
    self.nhead = nhead
    self.layer_norm = nn.LayerNorm(input_size, eps=1e-6)

def forward(self, nodes_embed, node_adj):
    x = []
    for cnt in range(0, self.nhead):
        x.append(self.head_graph[cnt](nodes_embed, node_adj))

    return torch.cat(x, dim=-1)

class GraphAttentionLayer(nn.Module):

def __init__(self, edges, input_size, hidden_size, graph_drop):
    super(GraphAttentionLayer, self).__init__()
    self.W = nn.Parameter(torch.Tensor(size=(input_size, hidden_size)))
    self.edges = edges
    self.W_edge = nn.ModuleList([nn.Linear(2*hidden_size, 1, bias=False) for i in (self.edges)])
    self.bias = nn.Parameter(torch.Tensor(hidden_size))
    self.self_loop = False
    self.loop_weight = nn.Linear(hidden_size, 1, bias=False)
    self.hidden_size = hidden_size

    self.drop = torch.nn.Dropout(p=graph_drop, inplace=False)
    self.layer_norm = nn.LayerNorm(hidden_size, eps=1e-6)
    self.init_weight()

def init_weight(self):
    nn.init.xavier_uniform_(self.W, gain=nn.init.calculate_gain('relu'))
    for m in self.W_edge:
        nn.init.xavier_uniform_(m.weight, gain=nn.init.calculate_gain('relu'))
    nn.init.zeros_(self.bias)
    nn.init.xavier_uniform_(self.loop_weight.weight, gain=nn.init.calculate_gain('relu'))

def forward(self, nodes_embed, node_adj):

    N_bt = nodes_embed.shape[0]
    N = nodes_embed.shape[1]
    h = torch.matmul(nodes_embed, self.W.unsqueeze(0))
    a_input = torch.cat([h.repeat(1, 1, N).view(N_bt, N * N, -1), h.repeat(1, N, 1)], dim=-1)
    weight = torch.zeros(N_bt, N*N).cuda()
    for edge_type in range(len(self.edges)):
        mask = (node_adj == (edge_type+1)).float().view(N_bt, -1)
        weight += mask * self.W_edge[edge_type](a_input).squeeze(dim=-1)

    if self.self_loop:
        sl_mask = torch.zeros_like(node_adj)
        sl_mask[:, torch.arange(node_adj.shape[1]).cuda(), torch.arange(node_adj.shape[2]).cuda()] = 1
        sl_mask = sl_mask.view(N_bt, -1)
        weight += sl_mask * self.loop_weight(a_input[..., :self.hidden_size]).squeeze(dim=-1)

    weight = F.leaky_relu(weight).view(N_bt, N, N)
    weight = weight.masked_fill(node_adj == 0, -1e9)
    attention = F.softmax(weight, dim=-1)
    dst = torch.matmul(attention, h) + self.bias

    out = self.drop(torch.relu(dst)) + h
    return self.layer_norm(out)

class GraphReasonLayer(nn.Module):

def __init__(self, edges, input_size, out_size, iters, graph_type="gat", graph_drop=0.0, graph_head=4):
    super(GraphReasonLayer, self).__init__()
    self.iters = iters
    self.edges = edges
    self.graph_type = graph_type
    if graph_type == "gat":
        self.block = nn.ModuleList([GraphAttentionLayer(edges, input_size, input_size, graph_drop)
                                    for i in range(iters)])
    elif graph_type == "gcn":
        self.block = nn.ModuleList([GraphConvolutionLayer(edges, input_size, input_size, graph_drop) \
                                    for i in range(iters)])
    else:
        raise("[Error]: Graph Encoder choose error.")

def forward(self, nodes_embed, node_adj):

    hi = nodes_embed
    for cnt in range(0, self.iters):
        hi = self.block[cnt](hi, node_adj)
        nodes_embed = torch.cat((nodes_embed, hi), dim=-1)

    return nodes_embed  # [input, output_1, output_2]
zhang-yafei0507 commented 1 year ago

多谢,哥们,已经解决了

zhang-yafei0507 commented 1 year ago

total parameters:118464865 alpha: 0.7 Total steps: 76300 Warmup steps: 4578 0 step/Epoch 0, Total Loss 0.489075, DocRE_loss 0.694144, RelCorrel_loss 0.324546, TriCorel_loss 0.760853, 150 step/Epoch 0, Total Loss 0.062797, DocRE_loss 0.669597, RelCorrel_loss 0.062794, TriCorel_loss 0.024140, 300 step/Epoch 0, Total Loss 0.045872, DocRE_loss 0.489982, RelCorrel_loss 0.042424, TriCorel_loss 0.025670, 450 step/Epoch 0, Total Loss 0.013898, DocRE_loss 0.003959, RelCorrel_loss 0.045392, TriCorel_loss 0.018522, 600 step/Epoch 0, Total Loss 0.011402, DocRE_loss 0.002993, RelCorrel_loss 0.044053, TriCorel_loss 0.024918, 750 step/Epoch 0, Total Loss 0.011993, DocRE_loss 0.002734, RelCorrel_loss 0.054080, TriCorel_loss 0.134803, 900 step/Epoch 0, Total Loss 0.012953, DocRE_loss 0.003289, RelCorrel_loss 0.062873, TriCorel_loss 0.016047, 1050 step/Epoch 0, Total Loss 0.010287, DocRE_loss 0.002412, RelCorrel_loss 0.048769, TriCorel_loss 0.072633, 1200 step/Epoch 0, Total Loss 0.008878, DocRE_loss 0.002214, RelCorrel_loss 0.046350, TriCorel_loss 0.011490, Traceback (most recent call last): File "train.py", line 356, in main() File "train.py", line 335, in main train(args, model, train_features, dev_features, test_features, id2rel, logger) File "train.py", line 125, in train finetune(train_features, optimizer, args.num_train_epochs, num_steps, id2rel, logger) File "train.py", line 39, in finetune outputs = model(*batch) File "/root/miniconda3/envs/py1.7cu10.2/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl result = self.forward(input, kwargs) File "/root/autodl-fs/Correl-RE/model.py", line 407, in forward correl_triplet_loss = torch.stack(correl_triplet_loss, dim=0).mean() RuntimeError: stack expects a non-empty TensorList** (py1.7cu10.2) root@autodl-container-7e5011ac52-5cbf5102:~/autodl-fs/Correl-RE# bash ./scripts/docred/run_bert.sh { "data_dir": ".\/dataset\/docred", "prepro_dir": ".\/processed_data\/docred\/JE\/" 想知道为啥train_batch_size 设置为2,correl_triplet_loss为空 设置为4就可以,但是我用的2080ti显卡显存容易溢出