import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import networkx as nx
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
from torch_geometric.data import Data
from torch_geometric.nn import SAGEConv
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay
from captum.attr import Saliency, IntegratedGradients
import random
K = 10 # Number of nearest neighbors to consider (adjust as needed)
knn = NearestNeighbors(n_neighbors=K, algorithm='ball_tree')
knn.fit(X_log_transformed) # Use the log-transformed data for graph construction
knn_indices = knn.kneighbors(return_distance=False)
graph = nx.Graph()
for i in range(len(df)):
graph.add_node(i)
for i, neighbors in enumerate(knn_indices):
for neighbor in neighbors:
if i != neighbor:
graph.add_edge(i, neighbor)
labels = {i: label for i, label in enumerate(y)}
nx.set_node_attributes(graph, labels, 'label')
Create the PyTorch Geometric Data object for the graph data
Convert the list of edges to a NumPy array and transpose it
edge_index = torch.tensor(np.array(list(graph.edges())).T, dtype=torch.long)
x = torch.tensor(X_log_transformed, dtype=torch.float) # Use the log-transformed data
y = torch.tensor(y, dtype=torch.long)
data = Data(x=x, edge_index=edge_index, y=y)
Define a custom GNN model with more complex architecture
def forward(self, x, edge_index, batch):
x = self.conv1(x, edge_index)
x = self.bn1(x)
x = self.relu(x)
x = self.dropout(x)
x = self.conv2(x, edge_index)
x = self.bn2(x)
x = self.relu(x)
x = self.dropout(x)
x = self.conv3(x, edge_index)
x = self.bn3(x)
x = self.relu(x)
x = self.dropout(x)
x = self.conv4(x, edge_index)
x = self.bn4(x)
x = self.relu(x)
x = self.dropout(x)
x = self.conv5(x, edge_index)
x = self.bn5(x)
x = self.relu(x)
x = self.dropout(x)
x = self.conv6(x, edge_index) # Additional layer
x = self.bn6(x)
x = self.relu(x)
x = self.dropout(x)
x = self.conv7(x, edge_index) # Additional layer
x = self.bn7(x)
x = self.relu(x)
x = self.dropout(x)
x = self.conv8(x, edge_index, batch) # Adjust output layer
return x
Initialize the custom GNN model with the best hyperparameters
best_hidden_channels = 256 # Replace with your best value
model = CustomGNN(num_features=X_log_transformed.shape[1], hidden_channels=best_hidden_channels, num_classes=6)
Weight initialization
def weight_init(m):
if isinstance(m, nn.Conv2d):
nn.init.xaviernormal(m.weight.data)
model.apply(weight_init)
Define an optimizer with the best learning rate
best_lr = 0.005689229656484651 # Replace with your best value
optimizer = optim.Adam(model.parameters(), lr=best_lr)
for epoch in range(350):
model.train()
optimizer.zero_grad()
# Provide the batch argument when calling the model
out = model(data.x, data.edge_index, data.batch)
loss = criterion(out, data.y)
loss.backward()
optimizer.step()
model.eval()
with torch.no_grad():
# Provide the batch argument when calling the model
out = model(data.x, data.edge_index, data.batch)
y_pred = out.argmax(dim=1)
accuracy = accuracy_score(data.y, y_pred)
scheduler.step(loss) # Adjust learning rate based on loss
print(f'Epoch {epoch}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}')
if accuracy > best_accuracy:
best_accuracy = accuracy
early_stopping_counter = 0
else:
early_stopping_counter += 1
if early_stopping_counter >= patience:
print("Early stopping")
break
print(f"Best Accuracy: {best_accuracy:.4f}")
Now i want to explain my prediction using your provided library but i can't implement it for custom dataset ( like my dataset) .Can you Provide me a simple code for it ??
How i can implement same as "vis_shapegraph.ipynb" ?
Lastly i face library installation issue like
ERROR: Could not find a version that satisfies the requirement torch-cluster (from versions: 0.1.1, 0.2.3, 0.2.4, 1.0.1, 1.0.3, 1.1.1, 1.1.2, 1.1.3, 1.1.4, 1.1.5, 1.2.1, 1.2.2, 1.2.3, 1.2.4, 1.3.0, 1.4.0, 1.4.1, 1.4.2, 1.4.3a1, 1.4.3, 1.4.4, 1.4.5, 1.5.2, 1.5.3, 1.5.4, 1.5.5, 1.5.6, 1.5.7, 1.5.8, 1.5.9, 1.6.0, 1.6.1)
ERROR: No matching distribution found for torch-cluster
First of all , Look my model code :
import torch import torch.nn as nn import torch.optim as optim import pandas as pd import numpy as np import networkx as nx from sklearn.preprocessing import MinMaxScaler from sklearn.neighbors import NearestNeighbors from torch_geometric.data import Data from torch_geometric.nn import SAGEConv from sklearn.metrics import accuracy_score from sklearn.metrics import roc_curve, auc from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt from sklearn.metrics import ConfusionMatrixDisplay
from captum.attr import Saliency, IntegratedGradients import random
Load your tabular data
excel_file_path = "/content/drive/MyDrive/GNN/chest_x_ray_dataset.xlsx" df = pd.read_excel(excel_file_path) df = df.fillna(df.mean())
Assuming your target column is named 'class'
X = df.drop('class', axis=1).values # Features y = df['class'].values # Target variable
Initialize the MinMaxScaler
scaler = MinMaxScaler()
Fit the scaler and transform X
X_normalized = scaler.fit_transform(X)
Apply Log Transformation to the features
X_log_transformed = np.log(X_normalized + 1) # Adding 1 to avoid log(0)
Generate a graph based on your features
K = 10 # Number of nearest neighbors to consider (adjust as needed) knn = NearestNeighbors(n_neighbors=K, algorithm='ball_tree') knn.fit(X_log_transformed) # Use the log-transformed data for graph construction knn_indices = knn.kneighbors(return_distance=False)
graph = nx.Graph()
for i in range(len(df)): graph.add_node(i)
for i, neighbors in enumerate(knn_indices): for neighbor in neighbors: if i != neighbor: graph.add_edge(i, neighbor)
labels = {i: label for i, label in enumerate(y)} nx.set_node_attributes(graph, labels, 'label')
Create the PyTorch Geometric Data object for the graph data
Convert the list of edges to a NumPy array and transpose it
edge_index = torch.tensor(np.array(list(graph.edges())).T, dtype=torch.long) x = torch.tensor(X_log_transformed, dtype=torch.float) # Use the log-transformed data y = torch.tensor(y, dtype=torch.long) data = Data(x=x, edge_index=edge_index, y=y)
Define a custom GNN model with more complex architecture
class CustomGNN(torch.nn.Module): def init(self, num_features, hidden_channels, num_classes): super(CustomGNN, self).init() self.conv1 = SAGEConv(num_features, hidden_channels) self.conv2 = SAGEConv(hidden_channels, hidden_channels) self.conv3 = SAGEConv(hidden_channels, hidden_channels) self.conv4 = SAGEConv(hidden_channels, hidden_channels) self.conv5 = SAGEConv(hidden_channels, hidden_channels) self.conv6 = SAGEConv(hidden_channels, hidden_channels) # Additional layer self.conv7 = SAGEConv(hidden_channels, hidden_channels) # Additional layer self.conv8 = SAGEConv(hidden_channels, num_classes) # Adjust output layer self.relu = nn.ReLU() self.bn1 = nn.BatchNorm1d(hidden_channels) self.bn2 = nn.BatchNorm1d(hidden_channels) self.bn3 = nn.BatchNorm1d(hidden_channels) self.bn4 = nn.BatchNorm1d(hidden_channels) self.bn5 = nn.BatchNorm1d(hidden_channels) self.bn6 = nn.BatchNorm1d(hidden_channels) # Additional layer self.bn7 = nn.BatchNorm1d(hidden_channels) # Additional layer self.dropout = nn.Dropout(0.3)
Initialize the custom GNN model with the best hyperparameters
best_hidden_channels = 256 # Replace with your best value model = CustomGNN(num_features=X_log_transformed.shape[1], hidden_channels=best_hidden_channels, num_classes=6)
Weight initialization
def weight_init(m): if isinstance(m, nn.Conv2d): nn.init.xaviernormal(m.weight.data)
model.apply(weight_init)
Define an optimizer with the best learning rate
best_lr = 0.005689229656484651 # Replace with your best value optimizer = optim.Adam(model.parameters(), lr=best_lr)
Define a loss function
criterion = nn.CrossEntropyLoss()
Learning rate scheduling
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)
Training loop with early stopping
best_accuracy = 0.0 patience = 150 early_stopping_counter = 0
for epoch in range(350): model.train() optimizer.zero_grad()
print(f"Best Accuracy: {best_accuracy:.4f}")
Now i want to explain my prediction using your provided library but i can't implement it for custom dataset ( like my dataset) .Can you Provide me a simple code for it ?? How i can implement same as "vis_shapegraph.ipynb" ?
Lastly i face library installation issue like ERROR: Could not find a version that satisfies the requirement torch-cluster (from versions: 0.1.1, 0.2.3, 0.2.4, 1.0.1, 1.0.3, 1.1.1, 1.1.2, 1.1.3, 1.1.4, 1.1.5, 1.2.1, 1.2.2, 1.2.3, 1.2.4, 1.3.0, 1.4.0, 1.4.1, 1.4.2, 1.4.3a1, 1.4.3, 1.4.4, 1.4.5, 1.5.2, 1.5.3, 1.5.4, 1.5.5, 1.5.6, 1.5.7, 1.5.8, 1.5.9, 1.6.0, 1.6.1) ERROR: No matching distribution found for torch-cluster