BUPT-GAMMA / OpenHGNN

This is an open-source toolkit for Heterogeneous Graph Neural Network(OpenHGNN) based on DGL.
Apache License 2.0
848 stars 142 forks source link

Run Example #169

Closed DuanhaoranCC closed 1 year ago

DuanhaoranCC commented 1 year ago

❓ Questions and Help

from openhgnn.dataset import generate_random_hg
from dgl import transforms as T
from dgl import DGLHeteroGraph
from dgl.data import DGLDataset
import torch as th
import numpy as np
import scipy.sparse as sp

category = 'p'
meta_paths_dict = {'pap': [('p', 'to', 'a'), ('a', 'to', 'p')]}

class MyNCDataset(DGLDataset):
    def __init__(self):
        super().__init__(name='my-nc-dataset')

    def process(self):
        # Generate a random heterogeneous graph with labels on target node type.
        self._g = load_acm()
        transform = T.Compose([T.ToSimple(), T.AddReverse()])
        # self._g = transform(self._g)

    # Some models require meta paths, you can set meta path dict for this dataset.
    @property
    def meta_paths_dict(self):
        return meta_paths_dict

    def __getitem__(self, idx):
        return self._g

    def __len__(self):
        return 1

def load_acm():
    path = "../data/acm/"
    ratio = [1, 5, 10, 20]
    label = np.load(path + "labels.npy").astype('int32')
    nei_a = np.load(path + "nei_a.npy", allow_pickle=True)
    nei_s = np.load(path + "nei_s.npy", allow_pickle=True)
    feat_p = sp.load_npz(path + "p_feat.npz").astype("float32")
    feat_a = sp.load_npz(path + "a_feat.npz").astype("float32")
    feat_s = make_sparse_eye(60)
    pap = sp.load_npz(path + "pap.npz")
    psp = sp.load_npz(path + "psp.npz")
    pos = sp.load_npz(path + "pos.npz")

    label = torch.LongTensor(label)
    nei_a = nei_to_edge_index([torch.LongTensor(i) for i in nei_a])
    nei_s = nei_to_edge_index([torch.LongTensor(i) for i in nei_s])
    feat_p = preprocess_sp_features(feat_p)
    feat_a = preprocess_sp_features(feat_a)
    feat_s = preprocess_th_features(feat_s)
    pap = sp_adj_to_tensor(pap)
    psp = sp_adj_to_tensor(psp)
    pos = sp_adj_to_tensor(pos)
    # nei_a size: (2, 13407)
    edge = {
        ('a', 'to', 'p'): (nei_a.flip([0])[0], nei_a.flip([0])[1]),
        ('s', 'to', 'p'): (nei_s.flip([0])[0], nei_s.flip([0])[1])
    }
    g = dgl.heterograph(edge)
    g.nodes['p'].data['x'] = feat_p
    g.nodes['s'].data['x'] = feat_s
    g.nodes['a'].data['x'] = feat_a
    g.nodes['p'].data['label'] = label

    # data[('p', 'a', 'p')].edge_index = pap
    # data[('p', 's', 'p')].edge_index = psp
    # data[('p', 'pos', 'p')].edge_index = pos

    for r in ratio:
        mask = train_test_split(
            g.nodes['p'].data['label'].detach().cpu().numpy(), seed=np.random.randint(0, 35456, size=1),
            train_examples_per_class=r,
            val_size=1000, test_size=None)
        train_mask_l = f"{r}_train_mask"
        train_mask = mask['train'].astype(bool)
        val_mask_l = f"{r}_val_mask"
        val_mask = mask['val'].astype(bool)

        test_mask_l = f"{r}_test_mask"
        test_mask = mask['test'].astype(bool)

        g.nodes['p'].data[train_mask_l] = torch.from_numpy(train_mask)
        g.nodes['p'].data[val_mask_l] = torch.from_numpy(val_mask)
        g.nodes['p'].data[test_mask_l] = torch.from_numpy(test_mask)

    return g

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', '-m', default='RGCN', type=str, help='name of models')
    parser.add_argument('--dataset', '-d', default='acm', type=str, help='acm or cora')
    parser.add_argument('--gpu', '-g', default='-1', type=int, help='-1 means cpu')
    parser.add_argument('--mini-batch-flag', action='store_true')

    args = parser.parse_args()

    ds = MyNCDataset()
    new_ds = AsNodeClassificationDataset(ds, target_ntype='author', labeled_nodes_split_ratio=[0.8, 0.1, 0.1],
                                         prediction_ratio=1, label_mask_feat_name='label_mask')

    experiment = Experiment(conf_path='./my_config.ini', max_epoch=1, model=args.model, dataset=new_ds,
                            task='node_classification', mini_batch_flag=args.mini_batch_flag, gpu=args.gpu,
                            test_flag=False, prediction_flag=False, batch_size=100, use_uva=False)
    experiment.run()
WARNING:root:The OGB package is out of date. Your version is 1.3.5, while the latest version is 1.3.6.
------------------------------------------------------------------------------
 Basic setup of this experiment: 
     model: RGCN    
     dataset: my-nc-dataset-as-nodepred   
     task: node_classification. 
 This experiment has following parameters. You can use set_params to edit them.
 Use print(experiment) to print this information again.
------------------------------------------------------------------------------
batch_size: 100
dataset_name: my-nc-dataset-as-nodepred
device: cpu
dropout: 0.2
fanout: 4
gpu: -1
hidden_dim: 64
hpo_search_space: None
hpo_trials: 100
in_dim: 64
load_from_pretrained: True
lr: 0.01
max_epoch: 0
mini_batch_flag: False
model_name: RGCN
n_bases: 40
num_layers: 3
optimizer: Adam
output_dir: ./openhgnn/output/RGCN
patience: 50
prediction_flag: True
seed: 0
test_flag: False
use_best_config: False
use_self_loop: False
use_uva: False
validation: True
weight_decay: 0.0001

08 May 15:02    INFO  [Config Info] Model: RGCN,    Task: node_classification,  Dataset: Dataset("my-nc-dataset-as-nodepred", num_graphs=1, save_path=/home/yhkj/.dgl/my-nc-dataset-as-nodepred)
08 May 15:02    INFO  [NC Specific] Modify the out_dim with num_classes
Traceback (most recent call last):
  File "/hgnn/inference.py", line 21, in <module>
    prediction_res = experiment.run()
  File "/home/yhkj/anaconda3/envs/ssl/lib/python3.8/site-packages/openhgnn/experiment.py", line 105, in run
    flow = build_flow(self.config, trainerflow)
  File "/home/yhkj/anaconda3/envs/ssl/lib/python3.8/site-packages/openhgnn/trainerflow/__init__.py", line 46, in build_flow
    return FLOW_REGISTRY[flow_name](args)
  File "/home/yhkj/anaconda3/envs/ssl/lib/python3.8/site-packages/openhgnn/trainerflow/node_classification.py", line 41, in __init__
    self.model = build_model(self.model).build_model_from_args(self.args, self.hg).to(self.device)
  File "/home/yhkj/anaconda3/envs/ssl/lib/python3.8/site-packages/openhgnn/models/RGCN.py", line 41, in build_model_from_args
    return cls(args.hidden_dim,
  File "/home/yhkj/anaconda3/envs/ssl/lib/python3.8/site-packages/openhgnn/models/RGCN.py", line 73, in __init__
    self.layers.append(RelGraphConvLayer(
  File "/home/yhkj/anaconda3/envs/ssl/lib/python3.8/site-packages/openhgnn/models/RGCN.py", line 167, in __init__
    self.conv = dglnn.HeteroGraphConv({
  File "/home/yhkj/anaconda3/envs/ssl/lib/python3.8/site-packages/dgl/nn/pytorch/hetero.py", line 132, in __init__
    self.mods = nn.ModuleDict(mods)
  File "/home/yhkj/anaconda3/envs/ssl/lib/python3.8/site-packages/torch/nn/modules/container.py", line 322, in __init__
    self.update(modules)
  File "/home/yhkj/anaconda3/envs/ssl/lib/python3.8/site-packages/torch/nn/modules/container.py", line 398, in update
    self[key] = module
  File "/home/yhkj/anaconda3/envs/ssl/lib/python3.8/site-packages/torch/nn/modules/container.py", line 329, in __setitem__
    self.add_module(key, module)
  File "/home/yhkj/anaconda3/envs/ssl/lib/python3.8/site-packages/torch/nn/modules/module.py", line 388, in add_module
    raise KeyError("attribute '{}' already exists".format(name))
KeyError: "attribute 'to' already exists"

Process finished with exit code 1

Hello, dear author! I followed the official example strictly, but returned an error on my own dataset! I would like to know this is the reason. Thank you for your help.

lazishu2000 commented 1 year ago

Thanks for your comment, however, we open issues for our related bugs and discussions. Your problem seems to be irrelevant to OpenHGNN itself, so we're closing it.

Personal advise: The name conflict occurred on line 167, where self. conv=dglnn. HeteroGraphConv ({}). Before this line of code, the class already defined an attribute called "to". However, in this line of code, there may also be an attribute named "to" in dglnn. HeteroGraphConv(), causing a name conflict.