I need to train a simple GCN on the ogbg-molbace dataset, aiming for reproducible results. I've made slight modifications to your main_pyg.py script (from this source) and manually set a seed before defining the model. However, I've noticed that running the script multiple times yields different performances. For clarity, I'm including the performances of a GCN on ogbg-molhiv, as the leaderboard only reports GCN performances on this dataset. Across 10 runs, the results show a ~1% of std, which should be weird considering that the models have the same weights initialization.
Since I'm using the default hyperparameters, I think the issue may be related to model convergence. Could you please provide the hyperparameters used to train a GCN? The default model in the script is GIN, so I assume the provided hyperparameters are intended for it.
python main_pyg.py --gnn "gcn"
100%|██████████| 100/100 [32:19<00:00, 19.39s/it]
Finished training!
Best epoch: 38
Best validation score: 0.8119886586321772
Test score: 0.7545201722706116
python main_pyg.py --gnn "gcn"
100%|██████████| 100/100 [32:15<00:00, 19.35s/it]
Finished training!
Best epoch: 51
Best validation score: 0.8196250979815796
Test score: 0.7756580853241661
python main_pyg.py --gnn "gcn"
100%|██████████| 100/100 [32:24<00:00, 19.45s/it]
Finished training!
Best epoch: 49
Best validation score: 0.8157303301979227
Test score: 0.7535989493810231
python main_pyg.py --gnn "gcn"
100%|██████████| 100/100 [32:42<00:00, 19.63s/it]
Finished training!
Best epoch: 73
Best validation score: 0.8068874926513814
Test score: 0.7652619787944919
python main_pyg.py --gnn "gcn"
100%|██████████| 100/100 [32:27<00:00, 19.48s/it]
Finished training!
Best epoch: 51
Best validation score: 0.8044425460513424
Test score: 0.7589679213580796
python main_pyg.py --gnn "gcn"
100%|██████████| 100/100 [32:17<00:00, 19.37s/it]
Finished training!
Best epoch: 28
Best validation score: 0.819634283754654
Test score: 0.7728866915158655
python main_pyg.py --gnn "gcn"
100%|██████████| 100/100 [31:45<00:00, 19.06s/it]
Finished training!
Best epoch: 51
Best validation score: 0.8099769743288262
Test score: 0.7803182757488556
python main_pyg.py --gnn "gcn"
100%|██████████| 100/100 [31:45<00:00, 19.06s/it]
Finished training!
Best epoch: 24
Best validation score: 0.8087368949637468
Test score: 0.7654087564456633
python main_pyg.py --gnn "gcn"
100%|██████████| 100/100 [31:44<00:00, 19.05s/it]
Finished training!
Best epoch: 71
Best validation score: 0.8040307172251616
Test score: 0.7746557484694568
python main_pyg.py --gnn "gcn"
100%|██████████| 100/100 [31:46<00:00, 19.07s/it]
Finished training!
Best epoch: 63
Best validation score: 0.80362041936116
Test score: 0.7799976824581394
import numpy as np
a = np.array([0.7545201722706116, 0.7756580853241661,
0.7535989493810231, 0.7652619787944919, 0.7589679213580796,
0.7728866915158655, 0.7803182757488556, 0.7654087564456633,
0.7746557484694568, 0.7799976824581394])
print(f"Num runs: {len(a)} - test accuracy: {a.mean()} +/- {a.std()}")
Num runs: 10 - test accuracy: 0.7681274261766353 +/- 0.009524876389729419
import torch
from torch_geometric.seed import seed_everything
from torch_geometric.loader import DataLoader
import torch.optim as optim
import torch.nn.functional as F
from gnn import GNN
from tqdm import tqdm
import argparse
import time
import numpy as np
### importing OGB
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
cls_criterion = torch.nn.BCEWithLogitsLoss()
reg_criterion = torch.nn.MSELoss()
def train(model, device, loader, optimizer, task_type):
model.train()
for step, batch in enumerate(loader):
batch = batch.to(device)
if batch.x.shape[0] == 1 or batch.batch[-1] == 0:
pass
else:
pred = model(batch)
optimizer.zero_grad()
## ignore nan targets (unlabeled) when computing training loss.
is_labeled = batch.y == batch.y
if "classification" in task_type:
loss = cls_criterion(pred.to(torch.float32)[is_labeled], batch.y.to(torch.float32)[is_labeled])
else:
loss = reg_criterion(pred.to(torch.float32)[is_labeled], batch.y.to(torch.float32)[is_labeled])
loss.backward()
optimizer.step()
def eval(model, device, loader, evaluator):
model.eval()
y_true = []
y_pred = []
for step, batch in enumerate(loader):
batch = batch.to(device)
if batch.x.shape[0] == 1:
pass
else:
with torch.no_grad():
pred = model(batch)
y_true.append(batch.y.view(pred.shape).detach().cpu())
y_pred.append(pred.detach().cpu())
y_true = torch.cat(y_true, dim = 0).numpy()
y_pred = torch.cat(y_pred, dim = 0).numpy()
input_dict = {"y_true": y_true, "y_pred": y_pred}
return evaluator.eval(input_dict)
def main():
# Training settings
parser = argparse.ArgumentParser(description='GNN baselines on ogbgmol* data with Pytorch Geometrics')
parser.add_argument('--device', type=int, default=0,
help='which gpu to use if any (default: 0)')
parser.add_argument('--gnn', type=str, default='gin-virtual',
help='GNN gin, gin-virtual, or gcn, or gcn-virtual (default: gin-virtual)')
parser.add_argument('--drop_ratio', type=float, default=0.5,
help='dropout ratio (default: 0.5)')
parser.add_argument('--num_layer', type=int, default=5,
help='number of GNN message passing layers (default: 5)')
parser.add_argument('--emb_dim', type=int, default=300,
help='dimensionality of hidden units in GNNs (default: 300)')
parser.add_argument('--batch_size', type=int, default=32,
help='input batch size for training (default: 32)')
parser.add_argument('--epochs', type=int, default=100,
help='number of epochs to train (default: 100)')
parser.add_argument('--num_workers', type=int, default=0,
help='number of workers (default: 0)')
parser.add_argument('--dataset', type=str, default="ogbg-molhiv",
help='dataset name (default: ogbg-molhiv)')
parser.add_argument('--feature', type=str, default="full",
help='full feature or simple feature')
parser.add_argument('--filename', type=str, default="",
help='filename to output result (default: )')
args = parser.parse_args()
device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
### automatic dataloading and splitting
dataset = PygGraphPropPredDataset(name = args.dataset)
if args.feature == 'full':
pass
elif args.feature == 'simple':
print('using simple feature')
# only retain the top two node/edge features
dataset.data.x = dataset.data.x[:,:2]
dataset.data.edge_attr = dataset.data.edge_attr[:,:2]
split_idx = dataset.get_idx_split()
### automatic evaluator. takes dataset name as input
evaluator = Evaluator(args.dataset)
train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args.batch_size*2, shuffle=False, num_workers = args.num_workers)
test_loader = DataLoader(dataset[split_idx["test"]], batch_size=args.batch_size*2, shuffle=False, num_workers = args.num_workers)
seed_everything(123)
if args.gnn == 'gin':
model = GNN(gnn_type = 'gin', num_tasks = dataset.num_tasks, num_layer = args.num_layer, emb_dim = args.emb_dim, drop_ratio = args.drop_ratio, virtual_node = False).to(device)
elif args.gnn == 'gin-virtual':
model = GNN(gnn_type = 'gin', num_tasks = dataset.num_tasks, num_layer = args.num_layer, emb_dim = args.emb_dim, drop_ratio = args.drop_ratio, virtual_node = True).to(device)
elif args.gnn == 'gcn':
model = GNN(gnn_type = 'gcn', num_tasks = dataset.num_tasks, num_layer = args.num_layer, emb_dim = args.emb_dim, drop_ratio = args.drop_ratio, virtual_node = False).to(device)
elif args.gnn == 'gcn-virtual':
model = GNN(gnn_type = 'gcn', num_tasks = dataset.num_tasks, num_layer = args.num_layer, emb_dim = args.emb_dim, drop_ratio = args.drop_ratio, virtual_node = True).to(device)
else:
raise ValueError('Invalid GNN type')
optimizer = optim.Adam(model.parameters(), lr=1e-3)
valid_curve = []
test_curve = []
train_curve = []
for epoch in tqdm(range(1, args.epochs + 1)):
# print("=====Epoch {}".format(epoch))
# print('Training...')
train(model, device, train_loader, optimizer, dataset.task_type)
# print('Evaluating...')
train_perf = eval(model, device, train_loader, evaluator)
valid_perf = eval(model, device, valid_loader, evaluator)
test_perf = eval(model, device, test_loader, evaluator)
# print({'Train': train_perf, 'Validation': valid_perf, 'Test': test_perf})
train_curve.append(train_perf[dataset.eval_metric])
valid_curve.append(valid_perf[dataset.eval_metric])
test_curve.append(test_perf[dataset.eval_metric])
if 'classification' in dataset.task_type:
best_val_epoch = np.argmax(np.array(valid_curve))
best_train = max(train_curve)
else:
best_val_epoch = np.argmin(np.array(valid_curve))
best_train = min(train_curve)
print('Finished training!')
print(f"Best epoch: {best_val_epoch}")
print('Best validation score: {}'.format(valid_curve[best_val_epoch]))
print('Test score: {}'.format(test_curve[best_val_epoch]))
if not args.filename == '':
torch.save({'Val': valid_curve[best_val_epoch], 'Test': test_curve[best_val_epoch], 'Train': train_curve[best_val_epoch], 'BestTrain': best_train}, args.filename)
if __name__ == "__main__":
main()
Hi,
I need to train a simple GCN on the
ogbg-molbace
dataset, aiming for reproducible results. I've made slight modifications to yourmain_pyg.py
script (from this source) and manually set a seed before defining the model. However, I've noticed that running the script multiple times yields different performances. For clarity, I'm including the performances of a GCN onogbg-molhiv
, as the leaderboard only reports GCN performances on this dataset. Across 10 runs, the results show a ~1% of std, which should be weird considering that the models have the same weights initialization.Since I'm using the default hyperparameters, I think the issue may be related to model convergence. Could you please provide the hyperparameters used to train a GCN? The default model in the script is GIN, so I assume the provided hyperparameters are intended for it.