Open tayssirmoussa66 opened 2 years ago
Can you ensure that
assert data.edge_index.max() < data.num_nodes
assert data.edge_index.min() >= 0
for all your data?
both conditions are verified for all data:
for i in range (len(dataset)): print(dataset[i]) print(dataset[i].edge_index.max()) print(dataset[i].edge_index.min())
Data(x=[24, 7], edge_index=[2, 44], edge_attr=[44, 3], y=[2880], z=[6], bin_index=[2, 576], bin_feat=[576, 7], smiles='CH2:15[Mg+:19].[CH2:20]1[O:21][CH2:22][CH2:23][CH2:24]1.[Cl-:14].[OH:1][c:2]1[n:3][cH:4]c:5[cH:12][cH:13]1') tensor(23) tensor(0) Data(x=[16, 7], edge_index=[2, 28], edge_attr=[28, 3], y=[1280], z=[4], bin_index=[2, 256], bin_feat=[256, 7], smiles='[CH3:14][NH2:15].N+:1([O-:3])[c:4]1[cH:5]c:6[cH:10][cH:11][c:12]1[Cl:13].[OH2:16]') tensor(14) tensor(0) Data(x=[27, 7], edge_index=[2, 56], edge_attr=[56, 3], y=[3645], z=[4], bin_index=[2, 729], bin_feat=[729, 7], smiles='CH2:1[n:3]1[cH:4]c:5c:6[c:7]2[cH:8]c:9c:10[cH:11][c:12]12.CH:25[OH:27]') tensor(26) tensor(0) Data(x=[53, 7], edge_index=[2, 106], edge_attr=[106, 3], y=[14045], z=[6], bin_index=[2, 2809], bin_feat=[2809, 7], smiles='[Cl:1]C:2=C:6[CH3:8].[Cl:51][CH2:52][Cl:53].[N:9]1(C:13[c:15]2[n:16][cH:17]c:18[cH:28]c:29[cH:30]3)[n:19][cH:20]2)[CH2:10][CH2:11][CH2:12]1.[NH2:37][c:38]1[n:39][cH:40]c:41[n:42][cH:43]1.[cH:45]1[cH:46][cH:47][n:48][cH:49][cH:50]1') tensor(52) tensor(0) Data(x=[27, 7], edge_index=[2, 58], edge_attr=[58, 3], y=[3645], z=[4], bin_index=[2, 729], bin_feat=[729, 7], smiles='[Cl:11][c:12]1[c:13]2c:14[s:24]c:25[cH:26]2.[Cl:1][c:2]1[cH:3]c:4[cH:7][cH:8][c:9]1[Cl:10]') tensor(26) tensor(0) Data(x=[27, 7], edge_index=[2, 50], edge_attr=[50, 3], y=[3645], z=[2], bin_index=[2, 729], bin_feat=[729, 7], smiles='[CH3:24]C:25[OH:27].[Cl:1][c:2]1c:3c:4c:5[C:18]#[N:19])[n:6][n:7]1.[ClH:22].[OH2:23]') tensor(24) tensor(0) Data(x=[48, 7], edge_index=[2, 100], edge_attr=[100, 3], y=[11520], z=[4], bin_index=[2, 2304], bin_feat=[2304, 7], smiles='[Cl:1][c:2]1c:3(=[O:25])=[O:26])[c:4]2c:5[CH2:6][CH2:7]N:8=[O:16])[CH2:9][CH2:10]2.[Cl:27][c:28]1[cH:29]c:30[cH:31][cH:32][c:33]1[F:34].[F:38][c:39]1[cH:40][c:41]2c:42CH:45[CH2:47][CH2:48]2') tensor(47) tensor(0) Data(x=[43, 7], edge_index=[2, 92], edge_attr=[92, 3], y=[9245], z=[6], bin_index=[2, 1849], bin_feat=[1849, 7], smiles='CH2:33[O:35]C:36[N:38]1[CH2:39][CH2:40][NH:41][CH2:42][CH2:43]1.CH:1([CH3:3])[N:4]1[CH2:5][CH2:6]N:7[CH2:27][CH2:28]4)[nH:17][c:18]3[cH:19][cH:20]2)[CH2:8][CH2:9]1') tensor(42) tensor(0) Data(x=[34, 7], edge_index=[2, 72], edge_attr=[72, 3], y=[5780], z=[4], bin_index=[2, 1156], bin_feat=[1156, 7], smiles='[F:19][c:20]1[cH:21][cH:22]c:23[CH3:31])=[O:32])[cH:33][cH:34]1.[NH2:1][CH:2]1[c:3]2c:4-[c:5]2c:6N:7[C:8]1=[O:9]') tensor(33) tensor(0) Data(x=[69, 7], edge_index=[2, 136], edge_attr=[136, 3], y=[23805], z=[6], bin_index=[2, 4761], bin_feat=[4761, 7], smiles='[Br:1][c:2]1c:3[cH:4]c:5[cH:6][cH:7]1.CH:56([CH3:63])[CH3:64].[NH2:16][c:17]1[cH:18][cH:19]c:20[CH2:27][CH2:28]2)[cH:21][n:22]1.[O:65]=[CH:66]N:67[CH3:69].P-:32([F:34])([F:35])([F:36])([F:37])[F:38].[n:39]1([O:40]C:41=N+:45[CH3:47])[c:48]2[n:49][cH:50][cH:51][cH:52][c:53]2[n:54][n:55]1') tensor(68) tensor(0) Data(x=[51, 7], edge_index=[2, 94], edge_attr=[94, 3], y=[13005], z=[8], bin_index=[2, 2601], bin_feat=[2601, 7], smiles='[BH4-:8].C:10[c:12]1[cH:13][cH:14]c:15[cH:40][cH:41]3)[CH2:24][CH2:25]2)[cH:16][cH:17]1.[CH2:6]=[O:7].[Na+:42].[Na+:9].[O:47]1[CH2:48][CH2:49][CH2:50][CH2:51]1.[OH:43]C:44[O-:46].S:1(=[O:3])([OH:4])[OH:5]') tensor(50) tensor(1) Data(x=[31, 7], edge_index=[2, 60], edge_attr=[60, 3], y=[4805], z=[6], bin_index=[2, 961], bin_feat=[961, 7], smiles='C:1(=[O:3])[c:4]1[cH:5][cH:6]c:7c:8[cH:9]1.[CH3:18]S:19(=[O:21])=[O:22].[Cl:23][CH2:24][Cl:25].[cH:26]1[cH:27][cH:28][n:29][cH:30][cH:31]1') tensor(30) tensor(0) Data(x=[25, 7], edge_index=[2, 48], edge_attr=[48, 3], y=[3125], z=[4], bin_index=[2, 625], bin_feat=[625, 7], smiles='C:2(=[O:9])[Cl:10].[Cl:11][c:12]1c:13[cH:14]c:15[nH:16][cH:17]1.[ClH:1].[cH:20]1[cH:21][cH:22][n:23][cH:24][cH:25]1') tensor(24) tensor(0) Data(x=[52, 7], edge_index=[2, 106], edge_attr=[106, 3], y=[13520], z=[4], bin_index=[2, 2704], bin_feat=[2704, 7], smiles='[Br:27][N:28]1C:29[CH2:31][CH2:32][C:33]1=[O:34].C:35(=[O:46])[c:47]1[cH:48][cH:49][cH:50][cH:51][cH:52]1.[CH3:24][CH2:25][CH3:26].[OH:1][c:2]1[cH:3][cH:4][cH:5][cH:6][cH:7]1.[c:8]1([CH2:14][CH2:15][CH2:16][O:17][c:18]2[cH:19][cH:20][cH:21][cH:22][cH:23]2)[cH:9][cH:10][cH:11][cH:12][cH:13]1') tensor(51) tensor(0) Data(x=[22, 7], edge_index=[2, 36], edge_attr=[36, 3], y=[2420], z=[2], bin_index=[2, 484], bin_feat=[484, 7], smiles='[BH4-:13].C:1(=[O:3])[NH:4][c:5]1[cH:6][cH:7]c:8[cH:11][cH:12]1.[CH3:15][OH:16].[CH3:17][CH2:18][O:19]C:20=[O:22].[Na+:14]') tensor(20) tensor(1) Data(x=[47, 7], edge_index=[2, 98], edge_attr=[98, 3], y=[11045], z=[2], bin_index=[2, 2209], bin_feat=[2209, 7], smiles='[CH3:29][c:30]1[cH:31][cH:32]c:33[cH:39][cH:40]1.[CH3:41][c:42]1[cH:43][cH:44][cH:45][cH:46][cH:47]1.[F:1][c:2]1c:3[cH:7][n:8]c:9[cH:10]2)c:24[cH:25][cH:26][cH:27]1') tensor(46) tensor(0) Data(x=[36, 7], edge_index=[2, 66], edge_attr=[66, 3], y=[6480], z=[4], bin_index=[2, 1296], bin_feat=[1296, 7], smiles='CH2:34[Cl:36].[CH3:26][CH2:27]N:28[CH2:31][CH3:32].[CH3:2]CH:3[CH3:11].[Cl:12][c:13]1[cH:14][cH:15][c:16]2c:17[cH:20]2)[cH:25]1.[ClH:1].[OH2:33]') tensor(33) tensor(0) Data(x=[32, 7], edge_index=[2, 64], edge_attr=[64, 3], y=[5120], z=[4], bin_index=[2, 1024], bin_feat=[1024, 7], smiles='CH2:1[NH:3][CH2:4][c:5]1c:6([F:27])[F:28])[cH:18][cH:19][c:20]2[O:21][CH3:22])[cH:7][cH:8]c:9[cH:10]1.[CH3:29]C:30=[O:32]') tensor(31) tensor(0)
here's how i constructed my data:
class MoleculeDataset(Dataset):
def __init__(self, root, filename, transform=None, pre_transform=None):
"""
root = Where the dataset should be stored. This folder is split
into raw_dir (downloaded dataset) and processed_dir (processed data).
"""
self.filename = filename
super(MoleculeDataset, self).__init__(root, transform, pre_transform)
@property
def raw_file_names(self):
return self.filename
@property
def processed_file_names(self):
return 'not_implemented.pt'
def download(self):
pass
def process(self):
self.data = open(self.raw_paths[0], "r")
idx = 0
for line in self.data:
r, e = line.strip("\r\n ").split()
react = r.split('>')[0]
labels, sp_labels = get_bond_label(react,e)
bin_index, bin_feature = get_bin_feature(react)
bin_index = torch.tensor(bin_index)
bin_index = bin_index.t().to(torch.long).view(2, -1)
mol_obj = Chem.MolFromSmiles(react)
# Get node features
node_feats = self._get_node_features(mol_obj)
# Get edge features
edge_feats = self._get_edge_features(mol_obj)
# Get adjacency info
edge_index = self._get_adjacency_info(mol_obj)
# Get labels info
label = self._get_labels(labels)
# Get Sp_labels info
sp_label = self._get_sp_labels(sp_labels)
# Get bin_feature
bin_feat = self._get_binary(bin_feature)
# Get bin_index
bin_idx = self._get_binary(bin_index)
# Create data object
data = Data(x=node_feats,
edge_index=edge_index,
edge_attr=edge_feats,
y=label,
z=sp_label,
bin_index=bin_idx,
bin_feat=bin_feat,
smiles=react
)
torch.save(data, osp.join(self.processed_dir, f'data_{idx}.pt'))
idx += 1
def _get_node_features(self, mol):
"""
This will return a matrix / 2d array of the shape
[Number of Nodes, Node Feature size]
"""
all_node_feats = []
for atom in mol.GetAtoms():
node_feats = []
# Feature 1: Atomic number
node_feats.append(atom.GetAtomicNum())
# Feature 2: Atom degree
node_feats.append(atom.GetDegree())
# Feature 3: Explicit Valence
node_feats.append(atom.GetExplicitValence())
# Feature 4: Implicit Valence
node_feats.append(atom.GetImplicitValence())
# Feature 5: Formal charge
node_feats.append(atom.GetFormalCharge())
# Feature 6: Aromaticity
node_feats.append(atom.GetIsAromatic())
# Feature 7: In Ring
node_feats.append(atom.IsInRing())
# Append node features to matrix
all_node_feats.append(node_feats)
all_node_feats = np.asarray(all_node_feats)
return torch.tensor(all_node_feats, dtype=torch.float)
def _get_edge_features(self, mol):
"""
This will return a matrix / 2d array of the shape
[Number of edges, Edge Feature size]
"""
all_edge_feats = []
for bond in mol.GetBonds():
edge_feats = []
# Feature 1: Bond type (as double)
edge_feats.append(bond.GetBondTypeAsDouble())
# Feature 2: Rings
edge_feats.append(bond.IsInRing())
# Feature 3: conjugated
edge_feats.append(bond.GetIsConjugated())
# Append node features to matrix (twice, per direction)
all_edge_feats += [edge_feats, edge_feats]
all_edge_feats = np.asarray(all_edge_feats)
return torch.tensor(all_edge_feats, dtype=torch.float)
def _get_adjacency_info(self, mol):
"""
We could also use rdmolops.GetAdjacencyMatrix(mol)
but we want to be sure that the order of the indices
matches the order of the edge features
"""
edge_indices = []
for bond in mol.GetBonds():
i = bond.GetBeginAtomIdx()
j = bond.GetEndAtomIdx()
edge_indices += [[i, j], [j, i]]
edge_indices = torch.tensor(edge_indices)
edge_indices = edge_indices.t().to(torch.long).view(2, -1)
return edge_indices
def _get_labels(self, e):
label = np.asarray(e)
return torch.tensor(label)
def _get_sp_labels(self, e):
sp_label = np.asarray(e)
return torch.tensor(sp_label)
def _get_binary(self, b):
binary = np.asarray(b)
return torch.tensor(binary)
def len(self):
return len(self.processed_file_names)
def get(self, idx):
""" - Equivalent to __getitem__ in pytorch
- Is not needed for PyG's InMemoryDataset
"""
data = torch.load(osp.join(self.processed_dir,
f'data_{idx}.pt'))
return data
I think I found the issue. GraphConv
only supports one-dimensional edge weights, while you pass in edge_attr
. This leads to a shape mismatch.
i didn't use SAGEConv, i used GraphConv layer , is it the same?
Yes, sorry. That‘s what I meant (edited above message).
🐛 Describe the bug
i'm working with a GCN model using Pytorch_geometric , i created my costom dataset with this shape
Data(x=[16, 7], edge_index=[2, 28], edge_attr=[28, 3], y=[1280], z=[4], bin_index=[2, 256], bin_feat=[256, 7], smiles='[CH3:14][NH2:15].N+:1([O-:3])[c:4]1[cH:5]c:6[cH:10][cH:11][c:12]1[Cl:13].[OH2:16]')
and this is the model:
class GCN(torch.nn.Module):
when i try to train the model ,there are the following errors.
2 loss = train_one_epoch( model, loader, optimizer, loss_fn) 3 print(f"Epoch {1} | Train Loss {loss}")
6 frames