About run_irgnn.py - Githubissues

Johny971 commented 2 years ago

Hi, I have a problem when I run run_irgnn.py First the error from line 66: init missing argument name "sizes" So I change "size" to "sizes" in line 65 After that I have this error

Traceback (most recent call last): File "/usr/local/lib/python3.7/dist-packages/torch_geometric/data/storage.py", line 48, in getattr return self[key] File "/usr/local/lib/python3.7/dist-packages/torch_geometric/data/storage.py", line 68, in getitem return self._mapping[key] KeyError: 'max'

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "/content/GNNtrain/run_irgnn.py", line 66, in num_hops=num_step_message_passing, batch_size=batch_size, bipartite=False, shuffle=True) File "/usr/local/lib/python3.7/dist-packages/torch_geometric/loader/neighbor_sampler.py", line 142, in init num_nodes = int(edge_index.max()) + 1 File "/usr/local/lib/python3.7/dist-packages/torch_geometric/data/data.py", line 345, in getattr return getattr(self._store, key) File "/usr/local/lib/python3.7/dist-packages/torch_geometric/data/storage.py", line 51, in getattr f"'{self.class.name}' object has no attribute '{key}'") AttributeError: 'GlobalStorage' object has no attribute 'max'

So I want to ask you how to fix it ? Thank you

icecat2012 commented 2 years ago

Hi, I also met the same problem. The problem is the environment. #2 torch_geometric version should <= 1.4.3

However, there are many package dependency conflicts during the downgrade process. (Installing torch_geometric make me so frustrate) Therefore, I used a brute-force method to solve this problem.

new a python file XXX_utils.py
copy and paste the bottom code (these are from torch_geometric 1.4.3 doc)
replace from torch_geometric.loader import NeighborSampler to from XXX_utils import NeighborSampler in run_irgnn.py
pray to Nyan Cat (DONE)


from __future__ import division
import warnings
import torch
import torch_geometric
from torch_geometric.data import Data
from torch_geometric.utils.repeat import repeat

try:
    from torch_cluster import neighbor_sampler
except ImportError:
    neighbor_sampler = None

def maybe_num_nodes(edge_index, num_nodes=None):
    if num_nodes is not None:
        return num_nodes
    elif isinstance(edge_index, Tensor):
        return int(edge_index.max()) + 1 if edge_index.numel() > 0 else 0
    else:
        return max(edge_index.size(0), edge_index.size(1))

def segregate_self_loops(edge_index, edge_attr=None):
    mask = edge_index[0] != edge_index[1]
    inv_mask = ~mask
    loop_edge_index = edge_index[:, inv_mask]
    loop_edge_attr = None if edge_attr is None else edge_attr[inv_mask]
    edge_index = edge_index[:, mask]
    edge_attr = None if edge_attr is None else edge_attr[mask]

    return edge_index, edge_attr, loop_edge_index, loop_edge_attr

def degree(index, num_nodes=None, dtype=None):
    num_nodes = maybe_num_nodes(index, num_nodes)
    out = torch.zeros((num_nodes), dtype=dtype, device=index.device)
    return out.scatter_add_(0, index, out.new_ones((index.size(0))))

def size_repr(value):
    if torch.is_tensor(value):
        return list(value.size())
    elif isinstance(value, int) or isinstance(value, float):
        return [1]
    elif isinstance(value, list) or isinstance(value, tuple):
        return [len(value)]
    else:
        return value

class Block(object):
    def __init__(self, n_id, res_n_id, e_id, edge_index, size):
        self.n_id = n_id
        self.res_n_id = res_n_id
        self.e_id = e_id
        self.edge_index = edge_index
        self.size = size

    def __repr__(self):
        info = [(key, getattr(self, key)) for key in self.__dict__]
        info = ['{}={}'.format(key, size_repr(item)) for key, item in info]
        return '{}({})'.format(self.__class__.__name__, ', '.join(info))

class DataFlow(object):
    def __init__(self, n_id, flow='source_to_target'):
        self.n_id = n_id
        self.flow = flow
        self.__last_n_id__ = n_id
        self.blocks = []

    @property
    def batch_size(self):
        return self.n_id.size(0)

    def append(self, n_id, res_n_id, e_id, edge_index):
        i, j = (0, 1) if self.flow == 'target_to_source' else (1, 0)
        size = [None, None]
        size[i] = self.__last_n_id__.size(0)
        size[j] = n_id.size(0)
        block = Block(n_id, res_n_id, e_id, edge_index, tuple(size))
        self.blocks.append(block)
        self.__last_n_id__ = n_id

    def __len__(self):
        return len(self.blocks)

    def __getitem__(self, idx):
        return self.blocks[::-1][idx]

    def __iter__(self):
        for block in self.blocks[::-1]:
            yield block

    def to(self, device):
        for block in self.blocks:
            block.edge_index = block.edge_index.to(device)
        return self

    def __repr__(self):
        n_ids = [self.n_id] + [block.n_id for block in self.blocks]
        sep = '<-' if self.flow == 'source_to_target' else '->'
        info = sep.join([str(n_id.size(0)) for n_id in n_ids])
        return '{}({})'.format(self.__class__.__name__, info)

class NeighborSampler(object):
    def __init__(self, data, size, num_hops, batch_size=1, shuffle=False,
                 drop_last=False, bipartite=True, add_self_loops=False,
                 flow='source_to_target'):

        if neighbor_sampler is None:
            raise ImportError('`NeighborSampler` requires `torch-cluster`.')

        self.data = data
        self.size = repeat(size, num_hops)
        self.num_hops = num_hops
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.drop_last = drop_last
        self.bipartite = bipartite
        self.add_self_loops = add_self_loops
        self.flow = flow
        self.edge_index = data.edge_index
        self.e_id = torch.arange(self.edge_index.size(1))
        if bipartite and add_self_loops:
            tmp = segregate_self_loops(self.edge_index, self.e_id)
            self.edge_index, self.e_id, self.edge_index_loop = tmp[:3]
            self.e_id_loop = self.e_id.new_full((data.num_nodes, ), -1)
            self.e_id_loop[tmp[2][0]] = tmp[3]

        assert flow in ['source_to_target', 'target_to_source']
        self.i, self.j = (0, 1) if flow == 'target_to_source' else (1, 0)
        edge_index_i, self.e_assoc = self.edge_index[self.i].sort()
        self.edge_index_j = self.edge_index[self.j, self.e_assoc]
        deg = degree(edge_index_i, data.num_nodes, dtype=torch.long)
        self.cumdeg = torch.cat([deg.new_zeros(1), deg.cumsum(0)])
        self.tmp = torch.empty(data.num_nodes, dtype=torch.long)

    def __get_batches__(self, subset=None):
        if subset is None and not self.shuffle:
            subset = torch.arange(self.data.num_nodes, dtype=torch.long)
        elif subset is None and self.shuffle:
            subset = torch.randperm(self.data.num_nodes)
        else:
            if subset.dtype == torch.bool or subset.dtype == torch.uint8:
                subset = subset.nonzero().view(-1)
            if self.shuffle:
                subset = subset[torch.randperm(subset.size(0))]

        subsets = torch.split(subset, self.batch_size)
        if self.drop_last and subsets[-1].size(0) < self.batch_size:
            subsets = subsets[:-1]
        assert len(subsets) > 0
        return subsets

    def __produce_bipartite_data_flow__(self, n_id):
        data_flow = DataFlow(n_id, self.flow)
        for l in range(self.num_hops):
            e_id = neighbor_sampler(n_id, self.cumdeg, self.size[l])
            new_n_id = self.edge_index_j.index_select(0, e_id)
            e_id = self.e_assoc[e_id]

            if self.add_self_loops:
                new_n_id = torch.cat([new_n_id, n_id], dim=0)
                new_n_id, inv = new_n_id.unique(sorted=False,
                                                return_inverse=True)
                res_n_id = inv[-n_id.size(0):]
            else:
                new_n_id = new_n_id.unique(sorted=False)
                res_n_id = None

            edges = [None, None]
            edge_index_i = self.edge_index[self.i, e_id]
            if self.add_self_loops:
                edge_index_i = torch.cat([edge_index_i, n_id], dim=0)
            self.tmp[n_id] = torch.arange(n_id.size(0))
            edges[self.i] = self.tmp[edge_index_i]
            edge_index_j = self.edge_index[self.j, e_id]
            if self.add_self_loops:
                edge_index_j = torch.cat([edge_index_j, n_id], dim=0)
            self.tmp[new_n_id] = torch.arange(new_n_id.size(0))
            edges[self.j] = self.tmp[edge_index_j]
            edge_index = torch.stack(edges, dim=0)

            e_id = self.e_id[e_id]
            if self.add_self_loops:
                if self.edge_index_loop.size(1) == self.data.num_nodes:
                    e_id = torch.cat([e_id, self.e_id_loop[n_id]])
                else:
                    e_id = None
                    if torch_geometric.is_debug_enabled():
                        warnings.warn(
                            ('Could not add edge identifiers to the DataFlow'
                             'object due to missing initial self-loops. '
                             'Please make sure that your graph already '
                             'contains self-loops in case you want to use '
                             'edge-conditioned operators.'))

            n_id = new_n_id
            data_flow.append(n_id, res_n_id, e_id, edge_index)
        return data_flow

    def __produce_subgraph__(self, b_id):
        n_ids = [b_id]
        e_ids = []
        edge_indices = []

        for l in range(self.num_hops):
            e_id = neighbor_sampler(n_ids[-1], self.cumdeg, self.size[l])
            n_id = self.edge_index_j.index_select(0, e_id)
            n_id = n_id.unique(sorted=False)
            n_ids.append(n_id)
            e_ids.append(self.e_assoc.index_select(0, e_id))
            edge_index = self.data.edge_index.index_select(1, e_ids[-1])
            edge_indices.append(edge_index)

        n_id = torch.unique(torch.cat(n_ids, dim=0), sorted=False)
        self.tmp[n_id] = torch.arange(n_id.size(0))
        e_id = torch.cat(e_ids, dim=0)
        edge_index = self.tmp[torch.cat(edge_indices, dim=1)]

        num_nodes = n_id.size(0)
        idx = edge_index[0] * num_nodes + edge_index[1]
        idx, inv = idx.unique(sorted=False, return_inverse=True)
        edge_index = torch.stack([idx / num_nodes, idx % num_nodes], dim=0)
        e_id = e_id.new_zeros(edge_index.size(1)).scatter_(0, inv, e_id)

        return Data(edge_index=edge_index, e_id=e_id, n_id=n_id, b_id=b_id,
                    sub_b_id=self.tmp[b_id], num_nodes=num_nodes)

    def __call__(self, subset=None):
        if self.bipartite:
            produce = self.__produce_bipartite_data_flow__
        else:
            produce = self.__produce_subgraph__

        for n_id in self.__get_batches__(subset):
            yield produce(n_id)

Johny971 commented 2 years ago

Hi, thank for your help. But when I run run_irgnn.py, I have an error like that Traceback (most recent call last): File "/content/GNNtrain/run_irgnn.py", line 66, in num_hops=num_step_message_passing, batch_size=batch_size, bipartite=False, shuffle=True) File "/content/GNNtrain/fix_utils.py", line 129, in init deg = degree(edge_index_i, data.num_nodes, dtype=torch.long) File "/content/GNNtrain/fix_utils.py", line 34, in degree return out.scatteradd(0, index, out.newones((index.size(0)))) IndexError: scatter(): Expected dtype int64 for index.

It's seem the error come from your utils.py file. How can you fix it

icecat2012 commented 2 years ago

I didn't meet this error before, but I guess is edge_index type error or another environment conflict. Maybe you can try to convert the input data type. In my case, I edited the code in run_irgnn.py as below z[sub_data.n_id] = model.encode(tr_x[sub_data.n_id], sub_data.edge_index.type(torch.LongTensor), tr_edge_attr[sub_data.e_id]) Also, I had rewrite the model.split_edge function.

Find the error occurred place, and the edge_index before that. Use .type(torch.LongTensor) to change the data type. For example, data.train_edge_index.type(torch.LongTensor)

ps: my code is from torch_geometric version 1.4.3, perhaps you can try other version

wwliu555 / IRGNN_TNNLS_2021

About run_irgnn.py #4