Closed sshen82 closed 3 weeks ago
Hi, I was working with a toy example of pytorch geometrics from https://github.com/pyg-team/pytorch_geometric/blob/master/examples/hetero/hetero_link_pred.py, but found that we can save model with to_hetero, but not to_hetero_with_bases. The error message is "AttributeError: Can't pickle local object 'HeteroBasisConv.init..hook'". Do you know the possible reason?
import argparse import os.path as osp import torch import torch.nn.functional as F from torch.nn import Linear import torch_geometric.transforms as T from torch_geometric.datasets import MovieLens from torch_geometric.nn import SAGEConv, to_hetero, to_hetero_with_bases if torch.cuda.is_available(): device = torch.device('cuda') elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): device = torch.device('mps') else: device = torch.device('cpu') path = '/p/keles/schic/volumeC/SiqiShen/code/Playground/data/MovieLens' dataset = MovieLens(path, model_name='all-MiniLM-L6-v2') data = dataset[0].to(device) # Add user node features for message passing: data['user'].x = torch.eye(data['user'].num_nodes, device=device) del data['user'].num_nodes # Add a reverse ('movie', 'rev_rates', 'user') relation for message passing: data = T.ToUndirected()(data) del data['movie', 'rev_rates', 'user'].edge_label # Remove "reverse" label. # Perform a link-level split into training, validation, and test edges: train_data, val_data, test_data = T.RandomLinkSplit( num_val=0.1, num_test=0.1, neg_sampling_ratio=0.0, edge_types=[('user', 'rates', 'movie')], rev_edge_types=[('movie', 'rev_rates', 'user')], )(data) # We have an unbalanced dataset with many labels for rating 3 and 4, and very # few for 0 and 1. Therefore we use a weighted MSE loss. weight = torch.bincount(train_data['user', 'movie'].edge_label) weight = weight.max() / weight def weighted_mse_loss(pred, target, weight=None): weight = 1. if weight is None else weight[target].to(pred.dtype) return (weight * (pred - target.to(pred.dtype)).pow(2)).mean() class GNNEncoder(torch.nn.Module): def __init__(self, hidden_channels, out_channels): super().__init__() self.conv1 = SAGEConv((-1, -1), hidden_channels) self.conv2 = SAGEConv((-1, -1), out_channels) def forward(self, x, edge_index): x = self.conv1(x, edge_index).relu() x = self.conv2(x, edge_index) return x class EdgeDecoder(torch.nn.Module): def __init__(self, hidden_channels): super().__init__() self.lin1 = Linear(2 * hidden_channels, hidden_channels) self.lin2 = Linear(hidden_channels, 1) def forward(self, z_dict, edge_label_index): row, col = edge_label_index z = torch.cat([z_dict['user'][row], z_dict['movie'][col]], dim=-1) z = self.lin1(z).relu() z = self.lin2(z) return z.view(-1) class Model(torch.nn.Module): def __init__(self, hidden_channels): super().__init__() self.encoder = GNNEncoder(hidden_channels, hidden_channels) # self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum') self.encoder = to_hetero_with_bases(self.encoder, metadata=data.metadata(), num_bases=3, in_channels={'x': hidden_channels}) self.decoder = EdgeDecoder(hidden_channels) def forward(self, x_dict, edge_index_dict, edge_label_index): z_dict = self.encoder(x_dict, edge_index_dict) return self.decoder(z_dict, edge_label_index) model = Model(hidden_channels=32).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) def train(): model.train() optimizer.zero_grad() pred = model(train_data.x_dict, train_data.edge_index_dict, train_data['user', 'movie'].edge_label_index) target = train_data['user', 'movie'].edge_label loss = weighted_mse_loss(pred, target, weight) loss.backward() optimizer.step() return float(loss) @torch.no_grad() def test(data): model.eval() pred = model(data.x_dict, data.edge_index_dict, data['user', 'movie'].edge_label_index) pred = pred.clamp(min=0, max=5) target = data['user', 'movie'].edge_label.float() rmse = F.mse_loss(pred, target).sqrt() return float(rmse) for epoch in range(1, 101): loss = train() torch.save(model,'/p/keles/schic/volumeC/SiqiShen/code/Playground/tmp.pt') train_rmse = test(train_data) val_rmse = test(val_data) test_rmse = test(test_data) print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_rmse:.4f}, ' f'Val: {val_rmse:.4f}, Test: {test_rmse:.4f}')
print(torch.version) print(torch_geometric.version) 2.1.1+cu121 2.4.0
Fix is here: https://github.com/pyg-team/pytorch_geometric/pull/9399
However, note that it is better to just save the state_dict rather than pickling the whole module.
state_dict
🐛 Describe the bug
Hi, I was working with a toy example of pytorch geometrics from https://github.com/pyg-team/pytorch_geometric/blob/master/examples/hetero/hetero_link_pred.py, but found that we can save model with to_hetero, but not to_hetero_with_bases. The error message is "AttributeError: Can't pickle local object 'HeteroBasisConv.init..hook'". Do you know the possible reason?
Versions
print(torch.version) print(torch_geometric.version) 2.1.1+cu121 2.4.0