THUDM / grb

Graph Robustness Benchmark: A scalable, unified, modular, and reproducible benchmark for evaluating the adversarial robustness of Graph Machine Learning.
https://cogdl.ai/grb/home
MIT License
90 stars 17 forks source link

Can't reach the accuracy of leaderboard #8

Open xiangchi-yuan opened 1 year ago

xiangchi-yuan commented 1 year ago

Hi, I tried to use the pipeline to reproduce the result of GRB leaderboard but can't reach the accuracy given by the paper and grb website. There is always a 2-5% gap between the paper and my experiment. Could you please provide the full code for reproducing?

Stanislas0 commented 1 year ago

Hi! Could you please specify for which model in which dataset you can't reproduce? If it is common, there must be something wrong with your experiment settings (environment, hyper-parameters, etc.).

xiangchi-yuan commented 1 year ago

For example, I tried to reproduce basic models with AT and I use pipeline provided by grb github. I didn't change any hyper-parameters but adjusted the model files dir, here: adv_train_pipeline: parser = argparse.ArgumentParser(description='Adversarial training GNN models in pipeline.')

Dataset settings

parser.add_argument("--dataset", type=str, default="grb-cora")
parser.add_argument("--data_dir", type=str, default="../data/")
parser.add_argument("--feat_norm", type=str, default="arctan")
# Model settings
parser.add_argument("--model", nargs='+', default=None)
parser.add_argument("--save_dir", type=str, default="../saved_models/")
parser.add_argument("--config_dir", type=str, default="../pipeline/configs/")
parser.add_argument("--log_dir", type=str, default="../pipeline/logs/")
parser.add_argument("--save_name", type=str, default="model_at.pt")
# Attack setting
parser.add_argument("--attack_adv", type=str, default="fgsm")
parser.add_argument("--attack_epoch", type=int, default=10)
parser.add_argument("--attack_lr", type=float, default=0.01)
parser.add_argument("--n_attack", type=int, default=1)
parser.add_argument("--n_inject_ratio", type=float, default=None)
parser.add_argument("--n_inject_max", type=int, default=20)
parser.add_argument("--n_edge_max", type=int, default=20)
parser.add_argument("--feat_lim_min", type=float, default=None)
parser.add_argument("--feat_lim_max", type=float, default=None)
# Adversarial training settings
parser.add_argument("--gpu", type=int, default=0, help="gpu")
parser.add_argument("--n_train", type=int, default=1)
parser.add_argument("--n_epoch", type=int, default=8000, help="Training epoch.")
parser.add_argument("--lr", type=float, default=0.01, help="Learning rate.")
parser.add_argument("--eval_every", type=int, default=1)
parser.add_argument("--save_after", type=int, default=0)
parser.add_argument("--train_mode", type=str, default="inductive")
parser.add_argument("--eval_metric", type=str, default="acc")
parser.add_argument("--early_stop", action="store_true")
parser.add_argument("--early_stop_patience", type=int, default=500)
parser.add_argument("--lr_scheduler", action="store_true")
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--verbose", action="store_true")

train_pipeline: parser = argparse.ArgumentParser(description='Training GML models in pipeline.')

# Dataset settings
parser.add_argument("--dataset", type=str, default="grb-cora")
parser.add_argument("--data_dir", type=str, default="../data/")
parser.add_argument("--feat_norm", type=str, default="arctan")

# Model settings
parser.add_argument("--model", nargs='+', default=None)
parser.add_argument("--save_dir", type=str, default="../saved_models_test/")
parser.add_argument("--config_dir", type=str, default="../pipeline/configs/")
parser.add_argument("--log_dir", type=str, default="../pipeline/logs/")
parser.add_argument("--save_name", type=str, default="model.pt")

# Training settings
parser.add_argument("--gpu", type=int, default=0, help="gpu")
parser.add_argument("--n_train", type=int, default=1)
parser.add_argument("--n_epoch", type=int, default=1000, help="Training epoch.")
parser.add_argument("--lr", type=float, default=0.01, help="Learning rate.")
parser.add_argument("--eval_every", type=int, default=1)
parser.add_argument("--save_after", type=int, default=0)
parser.add_argument("--train_mode", type=str, default="inductive")
parser.add_argument("--eval_metric", type=str, default="acc")
parser.add_argument("--early_stop", action="store_true")
parser.add_argument("--early_stop_patience", type=int, default=500)
parser.add_argument("--lr_scheduler", action="store_true")
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--verbose", action="store_true")

injection_attack_pipeline: parser = argparse.ArgumentParser(description='Injection attack on GML models.')

# Dataset settings
parser.add_argument("--dataset", type=str, default="grb-cora")
parser.add_argument("--dataset_mode", nargs='+', default=["easy", "medium", "hard", "full"])
parser.add_argument("--data_dir", type=str, default="../data/")
parser.add_argument("--feat_norm", type=str, default="arctan")

# Model settings model_sur
parser.add_argument("--model", nargs='+', default=None)
parser.add_argument("--model_dir", type=str, default="../saved_models_test/")
parser.add_argument("--config_dir", type=str, default="../pipeline/configs/")
parser.add_argument("--log_dir", type=str, default="../pipeline/logs/")
parser.add_argument("--model_file", type=str, default="model_0.pt")

# Injection attack setting
parser.add_argument("--attack", nargs='+', default=None)
parser.add_argument("--attack_mode", type=str, default="injection")
parser.add_argument("--save_dir", type=str, default="../attack_results/")
parser.add_argument("--attack_epoch", type=int, default=500)
parser.add_argument("--attack_lr", type=float, default=0.01)
parser.add_argument("--n_attack", type=int, default=1)
parser.add_argument("--n_inject_ratio", type=float, default=None)
parser.add_argument("--n_inject_max", type=int, default=20)
parser.add_argument("--n_edge_max", type=int, default=20)
parser.add_argument("--feat_lim_min", type=float, default=None)
parser.add_argument("--feat_lim_max", type=float, default=None)
parser.add_argument("--flip_type", type=str, default="deg")
parser.add_argument("--gpu", type=int, default=0, help="gpu")
parser.add_argument("--early_stop", action="store_true")
parser.add_argument("--early_stop_patience", type=int, default=500)
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--verbose", action="store_true")

leaderboard_pipeline: parser = argparse.ArgumentParser(description='Reproducing results on leaderboards') parser.add_argument("--gpu", type=int, default=0, help="gpu") parser.add_argument("--dataset", type=str, default="grb-cora") parser.add_argument("--dataset_mode", nargs='+', default=["easy", "medium", "hard", "full"]) parser.add_argument("--feat_norm", type=str, default="arctan") parser.add_argument("--data_dir", type=str, default="../data/") parser.add_argument("--model", nargs='+', default=None) parser.add_argument("--model_sur", nargs='+', default=None) parser.add_argument("--model_dir", type=str, default="../saved_models/") parser.add_argument("--model_file", type=str, default="final_model_at_0.pt") parser.add_argument("--config_dir", type=str, default="../pipeline/configs/") parser.add_argument("--n_attack", type=int, default=0) parser.add_argument("--attack", nargs='+', default=None) parser.add_argument("--attack_mode", type=str, default="injection") parser.add_argument("--attack_dir", type=str, default="../attack_results/") parser.add_argument("--attack_adj_name", type=str, default="adj.pkl") parser.add_argument("--attack_feat_name", type=str, default="features.npy") parser.add_argument("--weight_type", type=str, default="polynomial", help="Type of weighted accuracy, 'polynomial' or 'arithmetic'.") parser.add_argument("--save_dir", type=str, default=None)

and I get the result, it can't match the leaderboard on the website, please help me!

result

and I also have a question about attack and defense model. Are ProGNN and Metattack, Nettack, random not scalable to large dataset so GRB doesn't included them?

Stanislas0 commented 1 year ago

Hi, I'll check if the hyper-parameters are correct. For your question, the answer is yes, these methods are not scalable to large datasets.

xiangchi-yuan commented 1 year ago

For example, when I simply train GCN and attack it with FGSM, it can't meet the result of the leaderboard on the website even if I have checked every hyper-parameters in pipeline/configs or paper shown.

import os import torch import grb.utils as utils from grb.dataset import Dataset from grb.model.torch import GCN from grb.utils.normalize import GCNAdjNorm from grb.trainer.trainer import Trainer from grb.attack.injection import FGSM

def main():

Load data

dataset_name = "grb-cora"
dataset = Dataset(name=dataset_name,
                  data_dir="../data/",
                  mode="full",
                  feat_norm="arctan")

# Build model

# GCN
model_name = "gcn"
model = GCN(in_features=dataset.num_features,
            out_features=dataset.num_classes,
            hidden_features=128,
            n_layers=3,
            layer_norm=False,
            dropout=0.6)

print("Number of parameters: {}.".format(utils.get_num_params(model)))
print(model)

# Training
save_dir = "./saved_models/{}/{}".format(dataset_name, model_name)
save_name = "model.pt"
device = "cuda:0"
feat_norm = None
train_mode = "inductive"  # "transductive"

trainer = Trainer(dataset=dataset,
                  optimizer=torch.optim.Adam(model.parameters(), lr=0.01),
                  loss=torch.nn.functional.cross_entropy,
                  lr_scheduler=False,
                  early_stop=True,
                  early_stop_patience=500,
                  feat_norm=feat_norm,
                  device=device)

trainer.train(model=model,
              n_epoch=5000,
              eval_every=1,
              save_after=0,
              save_dir=save_dir,
              save_name=save_name,
              train_mode=train_mode,
              verbose=False)

# inference
model = torch.load(os.path.join(save_dir, save_name))
model = model.to(device)
model.eval()

# by trainer
pred = trainer.inference(model)

# by utils
pred = utils.inference(model,
                       features=dataset.features,
                       feat_norm=feat_norm,
                       adj=dataset.adj,
                       adj_norm_func=model.adj_norm_func,
                       device=device)
# evaluation
# by trainer
test_score = trainer.evaluate(model, dataset.test_mask)
print("Test score by trainer: {:.4f}".format(test_score))

# by utils
test_score = utils.evaluate(model,
                            features=dataset.features,
                            adj=dataset.adj,
                            labels=dataset.labels,
                            feat_norm=feat_norm,
                            adj_norm_func=model.adj_norm_func,
                            mask=dataset.test_mask,
                            device=device)
print("Test score by utils: {:.4f}".format(test_score))

# attack and eval

adj = dataset.adj
features = dataset.features
labels = dataset.labels
num_features = dataset.num_features
num_classes = dataset.num_classes
test_mask = dataset.test_mask
device = "cuda:0"

attack = FGSM(epsilon=0.01,
              n_epoch=1000,
              n_inject_max=60,
              n_edge_max=20,
              feat_lim_min=-0.94,
              feat_lim_max=0.94,
              device=device)

# model sur ,always gcn
save_dir = "./saved_models/{}/gcn".format(dataset_name)
save_name = "model.pt"
model_sur = torch.load(os.path.join(save_dir, save_name))
adj_attack, features_attack = attack.attack(model=model_sur,
                                            adj=adj,
                                            features=features,
                                            target_mask=test_mask,
                                            adj_norm_func=model_sur.adj_norm_func)

features_attacked = torch.cat([features.to(device), features_attack])
test_score = utils.evaluate(model_sur,
                            features=features_attacked,
                            adj=adj_attack,
                            labels=dataset.labels,
                            adj_norm_func=model_sur.adj_norm_func,
                            mask=dataset.test_mask,
                            device=device)
print("Test score after attack for surrogate model: {:.4f}.".format(test_score))

model_name = "gcn"
target_dir = "./saved_models/{}/{}".format(dataset_name, model_name)
target_name = "model.pt"
device = "cuda:0"
model = torch.load(os.path.join(target_dir, target_name))
model = model.to(device)
model.eval()
test_score = utils.evaluate(model,
                            features=features_attacked,
                            adj=adj_attack,
                            labels=dataset.labels,
                            adj_norm_func=model.adj_norm_func,
                            mask=dataset.test_mask,
                            device=device)
print("Test score after attack for target model: {:.4f}.".format(test_score))

if name == 'main': main()

I think it would be convenient for you to check this python file and find the problem. Thanks. And will GRB support ProGNN, mettack and nettack on small datasets like cora in evasion mode? Since GNNGuard, GCN-SVD have been included in GRB cora leaderboard (they suffer from scalability). And I also sent an email containing some details to you. Looking forward to your reply!