The ERROR of to_dense_adj: ''RuntimeError: max(): Expected reduction dim to be specified for input.numel() == 0. Specify the reduction dim with the 'dim' argument.'' #5475
class MoleculeDataset(InMemoryDataset):
def __init__(self,
root,
#data = None,
#slices = None,
transform=None,
pre_transform=None,
pre_filter=None,
dataset='zinc250k',
empty=False):
"""
Adapted from qm9.py. Disabled the download functionality
:param root: directory of the dataset, containing a raw and processed
dir. The raw dir should contain the file containing the smiles, and the
processed dir can either empty or a previously processed file
:param dataset: name of the dataset. Currently only implemented for
zinc250k, chembl_with_labels, tox21, hiv, bace, bbbp, clintox, esol,
freesolv, lipophilicity, muv, pcba, sider, toxcast
:param empty: if True, then will not load any data obj. For
initializing empty dataset
"""
self.dataset = dataset
self.root = root
super(MoleculeDataset, self).__init__(root, transform, pre_transform,
pre_filter)
self.transform, self.pre_transform, self.pre_filter = transform, pre_transform, pre_filter
if not empty:
self.data, self.slices = torch.load(self.processed_paths[0])
def get(self, idx):
data = Data()
for key in self.data.keys:
item, slices = self.data[key], self.slices[key]
s = list(repeat(slice(None), item.dim()))
s[data.__cat_dim__(key, item)] = slice(slices[idx],
slices[idx + 1])
data[key] = item[s]
return data
@property
def raw_file_names(self):
file_name_list = os.listdir(self.raw_dir)
# assert len(file_name_list) == 1 # currently assume we have a
# # single raw file
return file_name_list
@property
def processed_file_names(self):
return 'geometric_data_processed.pt'
def download(self):
raise NotImplementedError('Must indicate valid location of raw data. '
'No download allowed')
def process(self):
data_smiles_list = []
data_list = []
if self.dataset == 'zinc_standard_agent':
input_path = self.raw_paths[0]
input_df = pd.read_csv(input_path, sep=',', compression='gzip',
dtype='str')
smiles_list = list(input_df['smiles'])
zinc_id_list = list(input_df['zinc_id'])
for i in range(len(smiles_list)):
s = smiles_list[i]
# each example contains a single species
try:
rdkit_mol = AllChem.MolFromSmiles(s)
if rdkit_mol != None: # ignore invalid mol objects
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
id = int(zinc_id_list[i].split('ZINC')[1].lstrip('0'))
data.id = torch.tensor(
[id]) # id here is zinc id value, stripped of
# leading zeros
data_list.append(data)
data_smiles_list.append(smiles_list[i])
except:
continue
elif self.dataset == "zinc_sample":
input_path = self.raw_paths[0]
with open(input_path, "r") as f:
data = f.readlines()
all_data = [x.strip() for x in data]
data_smiles_list = []
data_list = []
for i, item in enumerate(all_data):
s = item
try:
rdkit_mol = AllChem.MolFromSmiles(s)
if rdkit_mol != None:
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
id = i
data.id = torch.tensor([id]) # id here is zinc id value, stripped of
# leading zeros
data_list.append(data)
data_smiles_list.append(s)
except:
continue
elif self.dataset == 'chembl_filtered':
### get downstream test molecules.
from splitters import scaffold_split
###
downstream_dir = [
'dataset/bace',
'dataset/bbbp',
'dataset/clintox',
'dataset/esol',
'dataset/freesolv',
'dataset/hiv',
'dataset/lipophilicity',
'dataset/muv',
# 'dataset/pcba/processed/smiles.csv',
'dataset/sider',
'dataset/tox21',
'dataset/toxcast'
]
downstream_inchi_set = set()
for d_path in downstream_dir:
print(d_path)
dataset_name = d_path.split('/')[1]
downstream_dataset = MoleculeDataset(d_path, dataset=dataset_name)
downstream_smiles = pd.read_csv(os.path.join(d_path,
'processed', 'smiles.csv'),
header=None)[0].tolist()
assert len(downstream_dataset) == len(downstream_smiles)
_, _, _, (train_smiles, valid_smiles, test_smiles) = scaffold_split(downstream_dataset, downstream_smiles, task_idx=None, null_value=0,
frac_train=0.8,frac_valid=0.1, frac_test=0.1,
return_smiles=True)
### remove both test and validation molecules
remove_smiles = test_smiles + valid_smiles
downstream_inchis = []
for smiles in remove_smiles:
species_list = smiles.split('.')
for s in species_list: # record inchi for all species, not just
# largest (by default in create_standardized_mol_id if input has
# multiple species)
inchi = create_standardized_mol_id(s)
downstream_inchis.append(inchi)
downstream_inchi_set.update(downstream_inchis)
smiles_list, rdkit_mol_objs, folds, labels = \
_load_chembl_with_labels_dataset(os.path.join(self.root, 'raw'))
print('processing')
for i in range(len(rdkit_mol_objs)):
rdkit_mol = rdkit_mol_objs[i]
if rdkit_mol != None:
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
mw = Descriptors.MolWt(rdkit_mol)
if 50 <= mw <= 900:
inchi = create_standardized_mol_id(smiles_list[i])
if inchi != None and inchi not in downstream_inchi_set:
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
# fold information
if i in folds[0]:
data.fold = torch.tensor([0])
elif i in folds[1]:
data.fold = torch.tensor([1])
else:
data.fold = torch.tensor([2])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'tox21':
smiles_list, rdkit_mol_objs, labels = \
_load_tox21_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
## convert aromatic bonds to double bonds
#Chem.SanitizeMol(rdkit_mol,
#sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'hiv':
smiles_list, rdkit_mol_objs, labels = \
_load_hiv_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'bace':
smiles_list, rdkit_mol_objs, folds, labels = \
_load_bace_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data.fold = torch.tensor([folds[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'bbbp':
smiles_list, rdkit_mol_objs, labels = \
_load_bbbp_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
if rdkit_mol != None:
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'clintox':
smiles_list, rdkit_mol_objs, labels = \
_load_clintox_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
if rdkit_mol != None:
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'esol':
smiles_list, rdkit_mol_objs, labels = \
_load_esol_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'freesolv':
smiles_list, rdkit_mol_objs, labels = \
_load_freesolv_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'lipophilicity':
smiles_list, rdkit_mol_objs, labels = \
_load_lipophilicity_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'muv':
smiles_list, rdkit_mol_objs, labels = \
_load_muv_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'pcba':
smiles_list, rdkit_mol_objs, labels = \
_load_pcba_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'pcba_pretrain':
smiles_list, rdkit_mol_objs, labels = \
_load_pcba_dataset(self.raw_paths[0])
downstream_inchi = set(pd.read_csv(os.path.join(self.root,
'downstream_mol_inchi_may_24_2019'),
sep=',', header=None)[0])
for i in range(len(smiles_list)):
if '.' not in smiles_list[i]: # remove examples with
# multiples species
rdkit_mol = rdkit_mol_objs[i]
mw = Descriptors.MolWt(rdkit_mol)
if 50 <= mw <= 900:
inchi = create_standardized_mol_id(smiles_list[i])
if inchi != None and inchi not in downstream_inchi:
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
# elif self.dataset == ''
elif self.dataset == 'sider':
smiles_list, rdkit_mol_objs, labels = \
_load_sider_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'toxcast':
smiles_list, rdkit_mol_objs, labels = \
_load_toxcast_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
if rdkit_mol != None:
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'ptc_mr':
input_path = self.raw_paths[0]
input_df = pd.read_csv(input_path, sep=',', header=None, names=['id', 'label', 'smiles'])
smiles_list = input_df['smiles']
labels = input_df['label'].values
for i in range(len(smiles_list)):
s = smiles_list[i]
rdkit_mol = AllChem.MolFromSmiles(s)
if rdkit_mol != None: # ignore invalid mol objects
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i])
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'mutag':
smiles_path = os.path.join(self.root, 'raw', 'mutag_188_data.can')
# smiles_path = 'dataset/mutag/raw/mutag_188_data.can'
labels_path = os.path.join(self.root, 'raw', 'mutag_188_target.txt')
# labels_path = 'dataset/mutag/raw/mutag_188_target.txt'
smiles_list = pd.read_csv(smiles_path, sep=' ', header=None)[0]
labels = pd.read_csv(labels_path, header=None)[0].values
for i in range(len(smiles_list)):
s = smiles_list[i]
rdkit_mol = AllChem.MolFromSmiles(s)
if rdkit_mol != None: # ignore invalid mol objects
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i])
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
else:
raise ValueError('Invalid dataset name')
if self.pre_filter is not None:
data_list = [data for data in data_list if self.pre_filter(data)]
if self.pre_transform is not None:
data_list = [self.pre_transform(data) for data in data_list]
# write data_smiles_list in processed paths
data_smiles_series = pd.Series(data_smiles_list)
data_smiles_series.to_csv(os.path.join(self.processed_dir,
'smiles.csv'), index=False,
header=False)
data, slices = self.collate(data_list)
torch.save((data, slices), self.processed_paths[0])
the scaffold_split code is
def scaffold_split(dataset, smiles_list, task_idx=None, null_value=0,
frac_train=0.8, frac_valid=0.1, frac_test=0.1,
return_smiles=False):
"""
Adapted from https://github.com/deepchem/deepchem/blob/master/deepchem/splits/splitters.py
Split dataset by Bemis-Murcko scaffolds
This function can also ignore examples containing null values for a
selected task when splitting. Deterministic split
:param dataset: pytorch geometric dataset obj
:param smiles_list: list of smiles corresponding to the dataset obj
:param task_idx: column idx of the data.y tensor. Will filter out
examples with null value in specified task column of the data.y tensor
prior to splitting. If None, then no filtering
:param null_value: float that specifies null value in data.y to filter if
task_idx is provided
:param frac_train:
:param frac_valid:
:param frac_test:
:param return_smiles:
:return: train, valid, test slices of the input dataset obj. If
return_smiles = True, also returns ([train_smiles_list],
[valid_smiles_list], [test_smiles_list])
"""
np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.0)
if task_idx != None:
# filter based on null values in task_idx
# get task array
y_task = np.array([data.y[task_idx].item() for data in dataset])
# boolean array that correspond to non null values
non_null = y_task != null_value
smiles_list = list(compress(enumerate(smiles_list), non_null))
else:
non_null = np.ones(len(dataset)) == 1
smiles_list = list(compress(enumerate(smiles_list), non_null))
# create dict of the form {scaffold_i: [idx1, idx....]}
all_scaffolds = {}
for i, smiles in smiles_list:
scaffold = generate_scaffold(smiles, include_chirality=True)
if scaffold not in all_scaffolds:
all_scaffolds[scaffold] = [i]
else:
all_scaffolds[scaffold].append(i)
# sort from largest to smallest sets
all_scaffolds = {key: sorted(value) for key, value in all_scaffolds.items()}
all_scaffold_sets = [
scaffold_set for (scaffold, scaffold_set) in sorted(
all_scaffolds.items(), key=lambda x: (len(x[1]), x[1][0]), reverse=True)
]
# get train, valid test indices
train_cutoff = frac_train * len(smiles_list)
valid_cutoff = (frac_train + frac_valid) * len(smiles_list)
train_idx, valid_idx, test_idx = [], [], []
for scaffold_set in all_scaffold_sets:
if len(train_idx) + len(scaffold_set) > train_cutoff:
if len(train_idx) + len(valid_idx) + len(scaffold_set) > valid_cutoff:
test_idx.extend(scaffold_set)
else:
valid_idx.extend(scaffold_set)
else:
train_idx.extend(scaffold_set)
assert len(set(train_idx).intersection(set(valid_idx))) == 0
assert len(set(test_idx).intersection(set(valid_idx))) == 0
train_dataset = dataset[torch.tensor(train_idx)]
valid_dataset = dataset[torch.tensor(valid_idx)]
test_dataset = dataset[torch.tensor(test_idx)]
if not return_smiles:
return train_dataset, valid_dataset, test_dataset
else:
train_smiles = [smiles_list[i][1] for i in train_idx]
valid_smiles = [smiles_list[i][1] for i in valid_idx]
test_smiles = [smiles_list[i][1] for i in test_idx]
return train_dataset, valid_dataset, test_dataset, (train_smiles,
valid_smiles,
test_smiles)
def random_scaffold_split(dataset, smiles_list, task_idx=None, null_value=0,
frac_train=0.8, frac_valid=0.1, frac_test=0.1, seed=0):
"""
Adapted from https://github.com/pfnet-research/chainer-chemistry/blob/master/chainer_chemistry/dataset/splitters/scaffold_splitter.py
Split dataset by Bemis-Murcko scaffolds
This function can also ignore examples containing null values for a
selected task when splitting. Deterministic split
:param dataset: pytorch geometric dataset obj
:param smiles_list: list of smiles corresponding to the dataset obj
:param task_idx: column idx of the data.y tensor. Will filter out
examples with null value in specified task column of the data.y tensor
prior to splitting. If None, then no filtering
:param null_value: float that specifies null value in data.y to filter if
task_idx is provided
:param frac_train:
:param frac_valid:
:param frac_test:
:param seed;
:return: train, valid, test slices of the input dataset obj
"""
np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.0)
if task_idx != None:
# filter based on null values in task_idx
# get task array
y_task = np.array([data.y[task_idx].item() for data in dataset])
# boolean array that correspond to non null values
non_null = y_task != null_value
smiles_list = list(compress(enumerate(smiles_list), non_null))
else:
non_null = np.ones(len(dataset)) == 1
smiles_list = list(compress(enumerate(smiles_list), non_null))
rng = np.random.RandomState(seed)
scaffolds = defaultdict(list)
for ind, smiles in smiles_list:
scaffold = generate_scaffold(smiles, include_chirality=True)
scaffolds[scaffold].append(ind)
scaffold_sets = rng.permutation(list(scaffolds.values()))
n_total_valid = int(np.floor(frac_valid * len(dataset)))
n_total_test = int(np.floor(frac_test * len(dataset)))
train_idx = []
valid_idx = []
test_idx = []
for scaffold_set in scaffold_sets:
if len(valid_idx) + len(scaffold_set) <= n_total_valid:
valid_idx.extend(scaffold_set)
elif len(test_idx) + len(scaffold_set) <= n_total_test:
test_idx.extend(scaffold_set)
else:
train_idx.extend(scaffold_set)
train_dataset = dataset[torch.tensor(train_idx)]
valid_dataset = dataset[torch.tensor(valid_idx)]
test_dataset = dataset[torch.tensor(test_idx)]
return train_dataset, valid_dataset, test_dataset
the split_class_graphs code is:
def split_class_graphs(dataset):
y_list = []
for data in dataset:
y_list.append(tuple(data.y.tolist()))
num_classes = len(set(y_list))
y_cetos = set(y_list)
y_idxs = []
for y_ceto in y_cetos:
y_idxs.append([idx for idx, y in enumerate(y_list) if y == y_ceto])
# for i in y_idxs:
# print(len(i))
#print(y_list)
all_graphs_list = []
for graph in dataset:
adj = to_dense_adj(graph.edge_index)[0].numpy()
#print(adj)
all_graphs_list.append(adj)
class_graphs = []
for class_label in set(y_list):
c_graph_list = [all_graphs_list[i] for i in range(len(y_list)) if y_list[i] == class_label]
print(len(c_graph_list))
class_graphs.append( ( np.array(class_label), c_graph_list ) )
return class_graphs, y_idxs
and report the error:
[03:31:05] WARNING: not removing hydrogen atom without neighbors
[03:31:05] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:07] WARNING: not removing hydrogen atom without neighbors
[03:31:07] WARNING: not removing hydrogen atom without neighbors
Traceback (most recent call last):
File "graohon_get.py", line 208, in <module>
class_graphs, y_idxs = split_class_graphs(train_dataset)
File "graohon_get.py", line 64, in split_class_graphs
adj = to_dense_adj(graph.edge_index)[0].numpy()
File "/opt/conda/lib/python3.7/site-packages/torch_geometric/utils/to_dense_adj.py", line 22, in to_dense_adj
batch = edge_index.new_zeros(edge_index.max().item() + 1)
RuntimeError: max(): Expected reduction dim to be specified for input.numel() == 0. Specify the reduction dim with the 'dim' argument.
The dataset is contained in MoleculeNet, I download in http://snap.stanford.edu/gnn-pretrain/data/chem_dataset.zip
if run with Tox21 ToxCast SIDER ClinTox MUV datasets will report this error, but datasets BACE BBBP HIV will not reported.
Could you help me ?
thanks!
Environment
PyG version: 2.0.4
PyTorch version:1.9.0
OS:centos7
Python version:3.7.4
CUDA/cuDNN version:20.2
How you installed PyTorch and PyG (conda, pip, source):pip
Any other relevant information (e.g., version of torch-scatter):
torch 1.9.0
torch-cluster 1.6.0
torch-geometric 2.0.4
torch-points-kernels 0.6.10
torch-scatter 2.0.9
torch-sparse 0.6.12
torch-spline-conv 1.2.1
🐛 Describe the bug
my code is
the
`MoleculeDataset
codethe
scaffold_split
code isthe
split_class_graphs
code is:and report the error:
The dataset is contained in MoleculeNet, I download in
http://snap.stanford.edu/gnn-pretrain/data/chem_dataset.zip
if run withTox21 ToxCast SIDER ClinTox MUV
datasets will report this error, but datasetsBACE BBBP HIV
will not reported. Could you help me ? thanks!Environment
conda
,pip
, source):piptorch-scatter
): torch 1.9.0 torch-cluster 1.6.0 torch-geometric 2.0.4 torch-points-kernels 0.6.10 torch-scatter 2.0.9 torch-sparse 0.6.12 torch-spline-conv 1.2.1