mims-harvard / GraphXAI

GraphXAI: Resource to support the development and evaluation of GNN explainers
https://zitniklab.hms.harvard.edu/projects/GraphXAI
MIT License
171 stars 29 forks source link

explanation issues #71

Open ibrahim-patwary opened 1 year ago

ibrahim-patwary commented 1 year ago

First of all , Look my model code :

import torch import torch.nn as nn import torch.optim as optim import pandas as pd import numpy as np import networkx as nx from sklearn.preprocessing import MinMaxScaler from sklearn.neighbors import NearestNeighbors from torch_geometric.data import Data from torch_geometric.nn import SAGEConv from sklearn.metrics import accuracy_score from sklearn.metrics import roc_curve, auc from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt from sklearn.metrics import ConfusionMatrixDisplay

from captum.attr import Saliency, IntegratedGradients import random

Load your tabular data

excel_file_path = "/content/drive/MyDrive/GNN/chest_x_ray_dataset.xlsx" df = pd.read_excel(excel_file_path) df = df.fillna(df.mean())

Assuming your target column is named 'class'

X = df.drop('class', axis=1).values # Features y = df['class'].values # Target variable

Initialize the MinMaxScaler

scaler = MinMaxScaler()

Fit the scaler and transform X

X_normalized = scaler.fit_transform(X)

Apply Log Transformation to the features

X_log_transformed = np.log(X_normalized + 1) # Adding 1 to avoid log(0)

Generate a graph based on your features

K = 10 # Number of nearest neighbors to consider (adjust as needed) knn = NearestNeighbors(n_neighbors=K, algorithm='ball_tree') knn.fit(X_log_transformed) # Use the log-transformed data for graph construction knn_indices = knn.kneighbors(return_distance=False)

graph = nx.Graph()

for i in range(len(df)): graph.add_node(i)

for i, neighbors in enumerate(knn_indices): for neighbor in neighbors: if i != neighbor: graph.add_edge(i, neighbor)

labels = {i: label for i, label in enumerate(y)} nx.set_node_attributes(graph, labels, 'label')

Create the PyTorch Geometric Data object for the graph data

Convert the list of edges to a NumPy array and transpose it

edge_index = torch.tensor(np.array(list(graph.edges())).T, dtype=torch.long) x = torch.tensor(X_log_transformed, dtype=torch.float) # Use the log-transformed data y = torch.tensor(y, dtype=torch.long) data = Data(x=x, edge_index=edge_index, y=y)

Define a custom GNN model with more complex architecture

class CustomGNN(torch.nn.Module): def init(self, num_features, hidden_channels, num_classes): super(CustomGNN, self).init() self.conv1 = SAGEConv(num_features, hidden_channels) self.conv2 = SAGEConv(hidden_channels, hidden_channels) self.conv3 = SAGEConv(hidden_channels, hidden_channels) self.conv4 = SAGEConv(hidden_channels, hidden_channels) self.conv5 = SAGEConv(hidden_channels, hidden_channels) self.conv6 = SAGEConv(hidden_channels, hidden_channels) # Additional layer self.conv7 = SAGEConv(hidden_channels, hidden_channels) # Additional layer self.conv8 = SAGEConv(hidden_channels, num_classes) # Adjust output layer self.relu = nn.ReLU() self.bn1 = nn.BatchNorm1d(hidden_channels) self.bn2 = nn.BatchNorm1d(hidden_channels) self.bn3 = nn.BatchNorm1d(hidden_channels) self.bn4 = nn.BatchNorm1d(hidden_channels) self.bn5 = nn.BatchNorm1d(hidden_channels) self.bn6 = nn.BatchNorm1d(hidden_channels) # Additional layer self.bn7 = nn.BatchNorm1d(hidden_channels) # Additional layer self.dropout = nn.Dropout(0.3)

def forward(self, x, edge_index, batch):
    x = self.conv1(x, edge_index)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv2(x, edge_index)
    x = self.bn2(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv3(x, edge_index)
    x = self.bn3(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv4(x, edge_index)
    x = self.bn4(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv5(x, edge_index)
    x = self.bn5(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv6(x, edge_index)  # Additional layer
    x = self.bn6(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv7(x, edge_index)  # Additional layer
    x = self.bn7(x)
    x = self.relu(x)
    x = self.dropout(x)
    x = self.conv8(x, edge_index, batch)  # Adjust output layer
    return x

Initialize the custom GNN model with the best hyperparameters

best_hidden_channels = 256 # Replace with your best value model = CustomGNN(num_features=X_log_transformed.shape[1], hidden_channels=best_hidden_channels, num_classes=6)

Weight initialization

def weight_init(m): if isinstance(m, nn.Conv2d): nn.init.xaviernormal(m.weight.data)

model.apply(weight_init)

Define an optimizer with the best learning rate

best_lr = 0.005689229656484651 # Replace with your best value optimizer = optim.Adam(model.parameters(), lr=best_lr)

Define a loss function

criterion = nn.CrossEntropyLoss()

Learning rate scheduling

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)

Training loop with early stopping

best_accuracy = 0.0 patience = 150 early_stopping_counter = 0

for epoch in range(350): model.train() optimizer.zero_grad()

# Provide the batch argument when calling the model
out = model(data.x, data.edge_index, data.batch)

loss = criterion(out, data.y)
loss.backward()
optimizer.step()

model.eval()
with torch.no_grad():
    # Provide the batch argument when calling the model
    out = model(data.x, data.edge_index, data.batch)
    y_pred = out.argmax(dim=1)
    accuracy = accuracy_score(data.y, y_pred)

scheduler.step(loss)  # Adjust learning rate based on loss

print(f'Epoch {epoch}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}')

if accuracy > best_accuracy:
    best_accuracy = accuracy
    early_stopping_counter = 0
else:
    early_stopping_counter += 1
    if early_stopping_counter >= patience:
        print("Early stopping")
        break

print(f"Best Accuracy: {best_accuracy:.4f}")

Now i want to explain my prediction using your provided library but i can't implement it for custom dataset ( like my dataset) .Can you Provide me a simple code for it ?? How i can implement same as "vis_shapegraph.ipynb" ?

Lastly i face library installation issue like ERROR: Could not find a version that satisfies the requirement torch-cluster (from versions: 0.1.1, 0.2.3, 0.2.4, 1.0.1, 1.0.3, 1.1.1, 1.1.2, 1.1.3, 1.1.4, 1.1.5, 1.2.1, 1.2.2, 1.2.3, 1.2.4, 1.3.0, 1.4.0, 1.4.1, 1.4.2, 1.4.3a1, 1.4.3, 1.4.4, 1.4.5, 1.5.2, 1.5.3, 1.5.4, 1.5.5, 1.5.6, 1.5.7, 1.5.8, 1.5.9, 1.6.0, 1.6.1) ERROR: No matching distribution found for torch-cluster

ibrahim-patwary commented 1 year ago

Thank you so much for your valuable response :)