Closed yifanyang96 closed 1 year ago
... /opt/conda/conda-bld/pytorch_1573049310284/work/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [234,0,0], thread: [94,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed. /opt/conda/conda-bld/pytorch_1573049310284/work/aten/src/ATen/native/cuda/IndexKernel.cu:60: lambda [](int)->auto::operator()(int)->auto: block: [234,0,0], thread: [95,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed. Traceback (most recent call last): File "SAGPooling/main.py", line 21, in <module> main() File "SAGPooling/main.py", line 17, in main gcn_trainer.train() File "/home/bigdatalab35/yfyang/social_network/ClusterGCN/SAGPooling/trainer_pooling.py", line 90, in train batch_average_loss.backward() File "/home/bigdatalab35/anaconda3/lib/python3.7/site-packages/torch/tensor.py", line 166, in backward torch.autograd.backward(self, gradient, retain_graph, create_graph) File "/home/bigdatalab35/anaconda3/lib/python3.7/site-packages/torch/autograd/__init__.py", line 99, in backward allow_unreachable=True) # allow_unreachable flag RuntimeError: merge_sort: failed to synchronize: device-side assert triggered
I want to add pooling layers after GCN layers.
import torch from torch_geometric.nn import GCNConv, GATConv, SAGEConv, TopKPooling, global_max_pool class StackedGCN(torch.nn.Module): """ Multi-layer GCN model. """ def __init__(self, args, input_channels, output_channels): """ :param args: Arguments object. :input_channels: Number of features. :output_channels: Number of target features. """ super(StackedGCN, self).__init__() self.args = args self.input_channels = input_channels self.output_channels = output_channels self.setup_layers() def setup_layers(self): """ Creating the layes based on the args. """ self.layers = [] self.pools = [] self.args.layers = [self.input_channels] + self.args.layers + [self.output_channels] for i, _ in enumerate(self.args.layers[:-1]): # self.layers.append(SAGEConv(self.args.layers[i],self.args.layers[i+1])) # self.layers.append(GATConv(self.args.layers[i],self.args.layers[i+1])) self.layers.append(GCNConv(self.args.layers[i],self.args.layers[i+1])) self.pools.append(TopKPooling(self.args.layers[i+1])) # self.layers = ListModule(*self.layers) # self.pools = ListModule(*self.pools) def forward(self, edges, features): """ Making a forward pass. :param edges: Edge list LongTensor. :param features: Feature matrix input FLoatTensor. :return predictions: Prediction matrix output FLoatTensor. """ # print(self.layers) for i, _ in enumerate(self.args.layers[:-2]): features = torch.nn.functional.relu(self.layers[i](features, edges)) if i>1: features = torch.nn.functional.dropout(features, p = self.args.dropout, training = self.training) features, edges, _, _, _, _ = self.pools[i](features, edges) print("Pooling Finish") # print(i) features = self.layers[i+1](features, edges) predictions = torch.nn.functional.log_softmax(features, dim=1) # print("Forward finish") return predictions
There is no error without pooling layers. I used my own dataset (features, edges). I wonder whether the missing of batch caused the problem.
Mh, do you know where exactly the error occurs? You should check this with CUDA_LAUNCH_BLOCKING=1.
CUDA_LAUNCH_BLOCKING=1
Error
My code
I want to add pooling layers after GCN layers.
There is no error without pooling layers. I used my own dataset (features, edges). I wonder whether the missing of batch caused the problem.