Open LukeLIN-web opened 1 year ago
You might need to use torch_geometric.transforms.RandomNodeSplit
to split the dataset yourself and get splits like train_mask
and test_mask
. Refer to this for an example.
Thank you for your help. I met another problem.
from torch_geometric.datasets import AttributedGraphDataset
import torch
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import SAGEConv
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv
from torch import Tensor
from tqdm import tqdm
class SAGE(torch.nn.Module):
def __init__(self, in_channels: int, hidden_channels: int,
out_channels: int, num_layers: int = 2):
super().__init__()
self.convs = torch.nn.ModuleList()
self.convs.append(SAGEConv(in_channels, hidden_channels))
for _ in range(num_layers - 2):
self.convs.append(SAGEConv(hidden_channels, hidden_channels))
self.convs.append(SAGEConv(hidden_channels, out_channels))
def forward(self, x: Tensor, edge_index: Tensor) -> Tensor:
for i, conv in enumerate(self.convs):
x = conv(x, edge_index)
if i < len(self.convs) - 1:
x = x.relu_()
x = F.dropout(x, p=0.5, training=self.training)
return x
@torch.no_grad()
def inference(self, x_all: Tensor, device: torch.device,
subgraph_loader: NeighborLoader) -> Tensor:
pbar = tqdm(total=len(subgraph_loader) * len(self.convs))
pbar.set_description('Evaluating')
# Compute representations of nodes layer by layer, using *all*
# available edges. This leads to faster computation in contrast to
# immediately computing the final representations of each batch:
for i, conv in enumerate(self.convs):
xs = []
for batch in subgraph_loader:
x = x_all[batch.node_id.to(x_all.device)].to(device)
x = conv(x, batch.edge_index.to(device))
x = x[:batch.batch_size]
if i < len(self.convs) - 1:
x = x.relu_()
xs.append(x.cpu())
pbar.update(1)
x_all = torch.cat(xs, dim=0)
pbar.close()
return x_all
transform = T.Compose([
T.RandomNodeSplit(num_val=500, num_test=500),
])
def main():
dataset = AttributedGraphDataset(root="/data/ogb/", name="mag",transform=transform)
data = dataset[0]
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
torch.manual_seed(12345)
model = SAGE(data.num_features, 2, dataset.num_classes).to(device)
train_loader = NeighborLoader(
data,
input_nodes=data.train_mask,
num_neighbors=[10, 5],
batch_size=1024,
shuffle=False,
num_workers=14,
)
print("loader finished")
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
for epoch in range(1, 2):
model.train()
for batch in train_loader:
optimizer.zero_grad()
batch = batch.to(device)
if hasattr(batch, 'adj_t'):
edge_index = batch.adj_t
else:
edge_index = batch.edge_index
out = model(batch.x, edge_index)
batch_size = batch.batch_size
out = out[:batch_size]
target = batch.y[:batch_size]
print("out", out.shape)
print("target", target.shape)
loss = F.cross_entropy(out, target.squeeze(1))
loss.backward()
optimizer.step()
print("train finished")
if __name__ == "__main__":
main()
error is
Traceback (most recent call last):
File "/root/share/pytorch_geometric/examples/attributedgraph.py", line 98, in <module>
main()
File "/root/share/pytorch_geometric/examples/attributedgraph.py", line 86, in main
out = model(batch.x, edge_index)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/root/share/pytorch_geometric/examples/attributedgraph.py", line 24, in forward
x = conv(x, edge_index)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch_geometric/nn/conv/sage_conv.py", line 131, in forward
out = self.propagate(edge_index, x=x, size=size)
File "/opt/conda/lib/python3.10/site-packages/torch_geometric/nn/conv/message_passing.py", line 484, in propagate
out = self.aggregate(out, **aggr_kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch_geometric/nn/conv/message_passing.py", line 608, in aggregate
return self.aggr_module(inputs, index, ptr=ptr, dim_size=dim_size,
File "/opt/conda/lib/python3.10/site-packages/torch_geometric/nn/aggr/base.py", line 109, in __call__
return super().__call__(x, index, ptr, dim_size, dim, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.10/site-packages/torch_geometric/nn/aggr/basic.py", line 34, in forward
return self.reduce(x, index, ptr, dim_size, dim, reduce='mean')
File "/opt/conda/lib/python3.10/site-packages/torch_geometric/nn/aggr/base.py", line 155, in reduce
return scatter(x, index, dim, dim_size, reduce)
File "/opt/conda/lib/python3.10/site-packages/torch_geometric/utils/scatter.py", line 68, in scatter
size = list(src.size())
TypeError: SparseTensor.size() missing 1 required positional argument: 'dim'
The node features in the MAG dataset are stored as torch_sparse.SparseTensor
for the purpose of memory efficiency. You can convert it to a dense matrix representation using x = x.to_dense()
. This requires a relatively large amount of memory though.
The node features in the MAG dataset are stored as
torch_sparse.SparseTensor
for the purpose of memory efficiency. You can convert it to a dense matrix representation usingx = x.to_dense()
. This requires a relatively large amount of memory though.
How to train it without converting it to a dense matrix?
Sorry, but currently PyG does not support sparse node features.
Sorry, but currently PyG does not support sparse node features.
Thanks, let me try to train using x = x.to_dense()
Failed.
File "attributedgraph.py", line 86, in main
out = model(batch.x.to_dense(), edge_index)
File "/opt/conda/lib/python3.8/site-packages/torch_sparse/tensor.py", line 481, in to_dense
mat = torch.zeros(self.sizes(), dtype=value.dtype,
RuntimeError: CUDA out of memory.
Tried to allocate 441.45 GiB (GPU 1; 15.78 GiB total capacity; 8.54 GiB already allocated; 6.14 GiB free; 8.56 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max
You can convert your sparse features to a torch.sparse.Tensor
and then embed them via a Linear
layer:
x = x.to_torch_sparse_coo_tensor()
x = lin(x)
You can convert your sparse features to a
torch.sparse.Tensor
and then embed them via aLinear
layer:x = x.to_torch_sparse_coo_tensor() x = lin(x)
Thank you. It almost works.
Traceback (most recent call last):
File "attributedgraph.py", line 100, in <module>
main()
File "attributedgraph.py", line 86, in main
bx = batch.x.to_torch_sparse_coo_tensor()
File "/opt/conda/lib/python3.8/site-packages/torch_sparse/tensor.py", line 497, in to_torch_sparse_coo_tensor
index = torch.stack([row, col], dim=0)
RuntimeError: CUDA out of memory. Tried to allocate 6.47 GiB (GPU 1; 15.78 GiB total capacity; 8.54 GiB already allocated; 6.14 GiB free; 8.56 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Weird. Looks like the main bottleneck is the conversion between two sparse formats. However, as far as I know, this operation will not incur too much memory.
Can you try something like x = x.cpu().to_torch_sparse_coo_tensor().to(device)
?
Weird. Looks like the main bottleneck is the conversion between two sparse formats. However, as far as I know, this operation will not incur too much memory.
Can you try something like
x = x.cpu().to_torch_sparse_coo_tensor().to(device)
?
RuntimeError: CUDA out of memory. Tried to allocate 6.47 GiB (GPU 1; 15.78 GiB total capacity; 10.16 GiB already allocated; 4.52 GiB free; 10.18 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
I think the problem is that NeighborLoader
currently does not support sparse node features, which means that batch.x
is exactly the same as data.x
, which requires significantly large memory. Sorry for that.
🚀 The feature, motivation and pitch
I wonder how to train AttributedGraphDataset. I cannot find any examples of it. I met some problems when I tried to write it by myself.
And it also doesn't have
train_mask
Alternatives
No response
Additional context
No response