Closed Andy3117006664 closed 4 months ago
Thank you for your interests in our work. You may refer to the following scripts:
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
import torch as t
import numpy as np
from scipy.sparse import csr_matrix, coo_matrix
import argparse
import os
import pickle
parser = argparse.ArgumentParser(description='Model Parameters')
parser.add_argument('--data', default='', type=str, help='data name')
parser.add_argument('--shot', default=5, type=int, help='number of shots for each node')
args = parser.parse_args()
def make_fewshot_train_mask(train_mask, labels, shot):
node_num = train_mask.shape[0]
class_to_nodeid = dict()
for i in range(node_num):
if not train_mask[i]:
continue
if labels[i] not in class_to_nodeid:
class_to_nodeid[labels[i]] = list()
class_to_nodeid[labels[i]].append(i)
new_train_mask = (np.zeros_like(labels) != 0)
for label_class in class_to_nodeid:
nodes = class_to_nodeid[label_class]
if shot == -1:
picked_nodes = nodes
else:
picked_nodes = np.random.permutation(len(nodes))[:shot]
new_train_mask[picked_nodes] = True
print('before', np.sum(train_mask), 'after', np.sum(new_train_mask))
return new_train_mask
def make_adj_with_class_nodes(mat, labels, mask):
rows, cols = list(mat.row), list(mat.col)
assert np.min(labels) == 0
class_num = np.max(labels) + 1
node_num = mat.shape[0]
for i in range(node_num):
if mask[i]:
rows.append(i)
cols.append(labels[i] + node_num)
rows.append(labels[i] + node_num)
cols.append(i)
vals = np.ones_like(rows)
print('class num', class_num)
return coo_matrix((vals, (rows, cols)), [node_num + class_num, node_num + class_num])
def write_file(data, file):
with open(file, 'wb') as fs:
pickle.dump(data, fs)
dataset = Planetoid('data', args.data, transform=T.NormalizeFeatures())
data = dataset[0]
print(data)
node_num = data.y.shape[0]
rows = data.edge_index[0].numpy()
cols = data.edge_index[1].numpy()
vals = np.ones_like(rows)
mat = coo_matrix((vals, (rows, cols)), shape=[node_num, node_num])
feats = data.x.numpy()
labels = data.y.numpy()
train_mask, val_mask, test_mask = data.train_mask, data.val_mask, data.test_mask
masks = dict()
train_mask = train_mask + val_mask
# test_idxs = np.random.permutation(node_num)[:int(node_num * 0.2)]
# train_mask = np.ones(node_num)
# train_mask[test_idxs] = 0
# test_mask = np.zeros(node_num)
# test_mask[test_idxs] = 1
# train_mask = t.from_numpy(train_mask == 1)
# test_mask = t.from_numpy(test_mask == 1)
# val_mask = test_mask
masks['valid'] = val_mask.numpy()
masks['test'] = test_mask.numpy()
print('train', np.sum(train_mask.numpy()))
print('val', np.sum(val_mask.numpy()))
print('test', np.sum(test_mask.numpy()))
masks['train'] = make_fewshot_train_mask(train_mask.numpy(), labels, args.shot)
class_adj = make_adj_with_class_nodes(mat, labels, masks['train'])
# exit()
if not os.path.exists(args.data):
os.mkdir(args.data)
write_file(masks, f'{args.data}/mask_{args.shot}.pkl')
write_file(feats, f'{args.data}/feats.pkl')
write_file(mat, f'{args.data}/adj.pkl')
write_file(labels, f'{args.data}/label.pkl')
write_file(class_adj, f'{args.data}/adj_{args.shot}.pkl')
Thank you for your interests in our work. You may refer to the following scripts:
from torch_geometric.datasets import Planetoid import torch_geometric.transforms as T import torch as t import numpy as np from scipy.sparse import csr_matrix, coo_matrix import argparse import os import pickle parser = argparse.ArgumentParser(description='Model Parameters') parser.add_argument('--data', default='', type=str, help='data name') parser.add_argument('--shot', default=5, type=int, help='number of shots for each node') args = parser.parse_args() def make_fewshot_train_mask(train_mask, labels, shot): node_num = train_mask.shape[0] class_to_nodeid = dict() for i in range(node_num): if not train_mask[i]: continue if labels[i] not in class_to_nodeid: class_to_nodeid[labels[i]] = list() class_to_nodeid[labels[i]].append(i) new_train_mask = (np.zeros_like(labels) != 0) for label_class in class_to_nodeid: nodes = class_to_nodeid[label_class] if shot == -1: picked_nodes = nodes else: picked_nodes = np.random.permutation(len(nodes))[:shot] new_train_mask[picked_nodes] = True print('before', np.sum(train_mask), 'after', np.sum(new_train_mask)) return new_train_mask def make_adj_with_class_nodes(mat, labels, mask): rows, cols = list(mat.row), list(mat.col) assert np.min(labels) == 0 class_num = np.max(labels) + 1 node_num = mat.shape[0] for i in range(node_num): if mask[i]: rows.append(i) cols.append(labels[i] + node_num) rows.append(labels[i] + node_num) cols.append(i) vals = np.ones_like(rows) print('class num', class_num) return coo_matrix((vals, (rows, cols)), [node_num + class_num, node_num + class_num]) def write_file(data, file): with open(file, 'wb') as fs: pickle.dump(data, fs) dataset = Planetoid('data', args.data, transform=T.NormalizeFeatures()) data = dataset[0] print(data) node_num = data.y.shape[0] rows = data.edge_index[0].numpy() cols = data.edge_index[1].numpy() vals = np.ones_like(rows) mat = coo_matrix((vals, (rows, cols)), shape=[node_num, node_num]) feats = data.x.numpy() labels = data.y.numpy() train_mask, val_mask, test_mask = data.train_mask, data.val_mask, data.test_mask masks = dict() train_mask = train_mask + val_mask # test_idxs = np.random.permutation(node_num)[:int(node_num * 0.2)] # train_mask = np.ones(node_num) # train_mask[test_idxs] = 0 # test_mask = np.zeros(node_num) # test_mask[test_idxs] = 1 # train_mask = t.from_numpy(train_mask == 1) # test_mask = t.from_numpy(test_mask == 1) # val_mask = test_mask masks['valid'] = val_mask.numpy() masks['test'] = test_mask.numpy() print('train', np.sum(train_mask.numpy())) print('val', np.sum(val_mask.numpy())) print('test', np.sum(test_mask.numpy())) masks['train'] = make_fewshot_train_mask(train_mask.numpy(), labels, args.shot) class_adj = make_adj_with_class_nodes(mat, labels, masks['train']) # exit() if not os.path.exists(args.data): os.mkdir(args.data) write_file(masks, f'{args.data}/mask_{args.shot}.pkl') write_file(feats, f'{args.data}/feats.pkl') write_file(mat, f'{args.data}/adj.pkl') write_file(labels, f'{args.data}/label.pkl') write_file(class_adj, f'{args.data}/adj_{args.shot}.pkl')
Thank you for your prompt response and the helpful script for the PubMed dataset.
Could you please provide the scripts or guidance for handling Amazon-Book and ml1m these RS datasets in a similar manner?
Thank you for your assistance!
A raw pubmed dataset has data like ind.pubmed.allx, ind.pubmed.ally.
How do I convert these data from raw pubmed datasets into data in /datasets/pubmed/ folder?