Open asiamanman opened 3 years ago
This might be related: https://github.com/rusty1s/pytorch_cluster/issues/109
When I modified the program according to this problem, I got the following error. RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the'spawn' start method
The program I'm running is something like this:
import numpy as np
import torch
import open3d as o3d
import os
import glob
import pandas as pd
from pathlib import Path
import torch_geometric.transforms as T
from torch_geometric.data import DataLoader, Data
from torch_geometric.nn import PointConv, fps, radius, global_max_pool
import torch.nn.functional as F
from torch.nn import Sequential as Seq, Linear as Lin, ReLU, BatchNorm1d as BN
import matplotlib.pyplot as plt
##from deeplearning import creat_train_dataset, creat_test_dataset, load_train_dataset, load_test_dataset
data_train_list = []
data_test_list = []
class SAModule(torch.nn.Module):
def __init__(self, ratio, r, nn):
super(SAModule, self).__init__()
self.ratio = ratio
self.r = r
self.conv = PointConv(nn, add_self_loops=False)
def forward(self, x, pos, batch):
idx = fps(pos, batch, ratio=self.ratio)
row, col = radius(pos.cpu(), pos[idx].cpu(), self.r, batch.cpu(), batch[idx].cpu(),
max_num_neighbors=32)
row = row.to(device)
col = col.to(device)
edge_index = torch.stack([col, row], dim=0)
x = self.conv(x, (pos, pos[idx]), edge_index)
pos, batch = pos[idx], batch[idx]
return x, pos, batch
class GlobalSAModule(torch.nn.Module):
def __init__(self, nn):
super(GlobalSAModule, self).__init__()
self.nn = nn
def forward(self, x, pos, batch):
x = self.nn(torch.cat([x, pos], dim=1))
x = global_max_pool(x, batch)
pos = pos.new_zeros((x.size(0), 3))
batch = torch.arange(x.size(0), device=batch.device)
return x, pos, batch
def MLP(channels, batch_norm=True):
return Seq(*[
Seq(Lin(channels[i - 1], channels[i]), ReLU(), BN(channels[i]))
for i in range(1, len(channels))
])
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.sa1_module = SAModule(0.5, 0.2, MLP([3, 64, 64, 128]))
self.sa2_module = SAModule(0.25, 0.4, MLP([128 + 3, 128, 128, 256]))
self.sa3_module = GlobalSAModule(MLP([256 + 3, 256, 512, 1024]))
self.lin1 = Lin(1024, 512)
self.lin2 = Lin(512, 256)
self.lin3 = Lin(256, 10)
def forward(self, data):
sa0_out = (data.x, data.pos, data.batch)
sa1_out = self.sa1_module(*sa0_out)
sa2_out = self.sa2_module(*sa1_out)
sa3_out = self.sa3_module(*sa2_out)
x, pos, batch = sa3_out
x = F.relu(self.lin1(x))
x = F.dropout(x, p=0.5, training=self.training)
x = F.relu(self.lin2(x))
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin3(x)
return F.log_softmax(x, dim=-1)
def train(loader):
model.train()
correct = 0
for data in loader:
data = data.to(device)
optimizer.zero_grad()
pred = model(data).max(1)[1]
#print(pred)
correct += pred.eq(data.y).sum().item()
loss = F.nll_loss(model(data), data.y)
loss.backward()
optimizer.step()
return correct / len(loader.dataset)
def test(loader):
model.eval()
a=1
correct = 0
for data in loader:
data = data.to(device)
#print(len(loader))
print(a)
with torch.no_grad():
pred = model(data).max(1)[1]
print(pred)
print(data.y)
correct += pred.eq(data.y).sum().item()
#mistake = len(loader)-len1
#mistake1 = correct - len1
#loss = mistake1 / mistake
#print(loss)
print(correct)
a = a+1
return correct / len(loader.dataset)
def creat_train_dataset(pcd, objtype):
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class_num_dict={'stand':0, 'sleep':1, 'sit':2, 'others':3}
class_num=class_num_dict[objtype]
label=torch.tensor([class_num], dtype=torch.int64, device=device)
pcd_numpy = np.asarray(pcd.points).astype(np.float32)
pcd_tenser = torch.from_numpy(pcd_numpy)
pcd_tenser = pcd_tenser.to(device)
data=Data(pos=pcd_tenser, y=label)
#ns = T.NormalizeScale()
#data = ns(data)
data_train_list.append(data)
def creat_test_dataset(pcd, objtype):
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class_num_dict={'stand':0, 'sleep':1, 'sit':2, 'others':3}
class_num=class_num_dict[objtype]
label=torch.tensor([class_num], dtype=torch.int64, device=device)
pcd_numpy = np.asarray(pcd).astype(np.float32)
pcd_tenser = torch.from_numpy(pcd_numpy)
pcd_tenser = pcd_tenser.to(device)
data=Data(pos=pcd_tenser, y=label)
#ns = T.NormalizeScale()
#data = ns(data)
data_test_list.append(data)
def load_train_dataset():
files0=sorted(glob.glob("/home/keigo/デスクトップ/train/train_stand/*.pcd"))
files1=sorted(glob.glob("/home/keigo/デスクトップ/train/train_sleep/*.pcd"))
files2=sorted(glob.glob("/home/keigo/デスクトップ/train/train_sit/*.pcd"))
files3=sorted(glob.glob("/home/keigo/デスクトップ/train/others/*.pcd"))
for a in files0:
pcd = o3d.io.read_point_cloud(a)
creat_train_dataset(pcd=pcd, objtype='stand')
##data_train_list.append(data)
for b in files1:
pcd = o3d.io.read_point_cloud(b)
creat_train_dataset(pcd=pcd, objtype='sleep')
for c in files2:
pcd = o3d.io.read_point_cloud(c)
creat_train_dataset(pcd=pcd, objtype='sit')
for d in files3:
pcd = o3d.io.read_point_cloud(d)
creat_train_dataset(pcd=pcd, objtype='others')
data_loader = DataLoader(data_train_list, batch_size=10, shuffle=True, num_workers=1)
##print(data_train_list[0])
#print(len(data_loader))
return data_loader
#print(data_loader)
#print(len(data_loader))
#print(len(data_train_list))
def load_test_dataset():
files6=sorted(glob.glob("/home/keigo/デスクトップ/test3/test_sit/*.pcd"))
for c in files6:
pcd = o3d.io.read_point_cloud(c)
for f in range(2):
plane_model,inliers = pcd.segment_plane(distance_threshold=0.05,ransac_n=3,num_iterations=1000)
[a,b,c,d]=plane_model
#print(f"Plane equation: {a:.2f}x + {b:.2f}y + {c:.2f}z + {d:.2f} = 0")
inlier_cloud=pcd.select_by_index(inliers)
inlier_cloud.paint_uniform_color([1.0,0,0])
outliers_cloud=pcd.select_by_index(inliers, invert=True)
#print(inlier_cloud)
#print(pcd)
pcd=outliers_cloud
with o3d.utility.VerbosityContextManager(
o3d.utility.VerbosityLevel.Debug) as cm:
labels = np.array(
pcd.cluster_dbscan(eps=0.13, min_points=45, print_progress=True))
max_label = labels.max()+1
#print(f"point cloud has {max_label} clusters")
colors = plt.get_cmap("tab20")(labels / (max_label if max_label > 0 else 1))
colors[labels < 0] = 0
pcd.colors = o3d.utility.Vector3dVector(colors[:, :3])
#o3d.visualization.draw_geometries([pcd])
if max_label > 0:
xyz = np.asarray(pcd.points)
#print(xyz)
for i in range(max_label):
cluster_xyz = xyz[labels == i]
#print(len(cluster_xyz))
point_cloud = o3d.geometry.PointCloud()
point_cloud.points = o3d.utility.Vector3dVector(cluster_xyz)
#print(cluster_xyz)
#lists.append(point_cloud)
#print(lists)
if len(cluster_xyz) > 600:
o3d.visualization.draw_geometries([point_cloud])
creat_test_dataset(pcd=cluster_xyz, objtype='sit')
data_loader = DataLoader(data_test_list, batch_size=1, shuffle=False, num_workers=1)
#print(len(data_loader))
return data_loader
if __name__ == '__main__':
#test_loader = load_test_dataset()
#train_loader = load_train_dataset()
#print(len(test_loader))
#print(data_test_list)
#print(data_train_list[0].pos)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model.load_state_dict(torch.load("/home/keigo/デスクトップ/train/model.pth"))
test_loader=load_test_dataset()
#print(test_loader)
test(test_loader)
#test_acc = test(test_loader)
#print('Test: {:.4f}'.format(test_acc))
#model_path = 'model.pth1'
#torch.save(model.state_dict(), model_path )
A DataLoader
that holds CUDA tensors does not work when using multiple workers. You need to either use CPU tensors for data loading, or use num_workers=0
.
Thanks! The program was executed when num_workers = 0. However, the accuracy does not increase. Is there any cause?
What do you mean? I thought that the problem was that running your model on Jetson Nano will not result in the same accuracy. Is this still correct?
Yes, you can't get the same accuracy. Below is the modified code.
import numpy as np
import torch
import open3d as o3d
import os
import glob
import pandas as pd
from pathlib import Path
import torch_geometric.transforms as T
from torch_geometric.data import DataLoader, Data
from torch_geometric.nn import PointConv, fps, radius, global_max_pool
import torch.nn.functional as F
from torch.nn import Sequential as Seq, Linear as Lin, ReLU, BatchNorm1d as BN
import matplotlib.pyplot as plt
##from deeplearning import creat_train_dataset, creat_test_dataset, load_train_dataset, load_test_dataset
data_train_list = []
data_test_list = []
class SAModule(torch.nn.Module):
def __init__(self, ratio, r, nn):
super(SAModule, self).__init__()
self.ratio = ratio
self.r = r
self.conv = PointConv(nn, add_self_loops=False)
def forward(self, x, pos, batch):
idx = fps(pos, batch, ratio=self.ratio)
row, col = radius(pos.cpu(), pos[idx].cpu(), self.r, batch.cpu(), batch[idx].cpu(),
max_num_neighbors=32)
row = row.to(device)
col = col.to(device)
edge_index = torch.stack([col, row], dim=0)
x = self.conv(x, (pos, pos[idx]), edge_index)
pos, batch = pos[idx], batch[idx]
return x, pos, batch
class GlobalSAModule(torch.nn.Module):
def __init__(self, nn):
super(GlobalSAModule, self).__init__()
self.nn = nn
def forward(self, x, pos, batch):
x = self.nn(torch.cat([x, pos], dim=1))
x = global_max_pool(x, batch)
pos = pos.new_zeros((x.size(0), 3))
batch = torch.arange(x.size(0), device=batch.device)
return x, pos, batch
def MLP(channels, batch_norm=True):
return Seq(*[
Seq(Lin(channels[i - 1], channels[i]), ReLU(), BN(channels[i]))
for i in range(1, len(channels))
])
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.sa1_module = SAModule(0.5, 0.2, MLP([3, 64, 64, 128]))
self.sa2_module = SAModule(0.25, 0.4, MLP([128 + 3, 128, 128, 256]))
self.sa3_module = GlobalSAModule(MLP([256 + 3, 256, 512, 1024]))
self.lin1 = Lin(1024, 512)
self.lin2 = Lin(512, 256)
self.lin3 = Lin(256, 10)
def forward(self, data):
sa0_out = (data.x, data.pos, data.batch)
sa1_out = self.sa1_module(*sa0_out)
sa2_out = self.sa2_module(*sa1_out)
sa3_out = self.sa3_module(*sa2_out)
x, pos, batch = sa3_out
x = F.relu(self.lin1(x))
x = F.dropout(x, p=0.5, training=self.training)
x = F.relu(self.lin2(x))
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin3(x)
return F.log_softmax(x, dim=-1)
def train(loader):
model.train()
correct = 0
for data in loader:
data = data.to(device)
optimizer.zero_grad()
pred = model(data).max(1)[1]
#print(pred)
correct += pred.eq(data.y).sum().item()
loss = F.nll_loss(model(data), data.y)
loss.backward()
optimizer.step()
return correct / len(loader.dataset)
def test(loader):
model.eval()
a=1
correct = 0
for data in loader:
data = data.to(device)
#print(len(loader))
print(a)
with torch.no_grad():
pred = model(data).max(1)[1]
print(pred)
print(data.y)
correct += pred.eq(data.y).sum().item()
#mistake = len(loader)-len1
#mistake1 = correct - len1
#loss = mistake1 / mistake
#print(loss)
print(correct)
a = a+1
return correct / len(loader.dataset)
def creat_train_dataset(pcd, objtype):
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class_num_dict={'stand':0, 'sleep':1, 'sit':2, 'others':3}
class_num=class_num_dict[objtype]
label=torch.tensor([class_num], dtype=torch.int64, device=device)
pcd_numpy = np.asarray(pcd.points).astype(np.float32)
pcd_tenser = torch.from_numpy(pcd_numpy)
pcd_tenser = pcd_tenser.to(device)
data=Data(pos=pcd_tenser, y=label)
#ns = T.NormalizeScale()
#data = ns(data)
data_train_list.append(data)
def creat_test_dataset(pcd, objtype):
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class_num_dict={'stand':0, 'sleep':1, 'sit':2, 'others':3}
class_num=class_num_dict[objtype]
label=torch.tensor([class_num], dtype=torch.int64, device=device)
pcd_numpy = np.asarray(pcd).astype(np.float32)
pcd_tenser = torch.from_numpy(pcd_numpy)
pcd_tenser = pcd_tenser.to(device)
data=Data(pos=pcd_tenser, y=label)
#ns = T.NormalizeScale()
#data = ns(data)
data_test_list.append(data)
def load_train_dataset():
files0=sorted(glob.glob("/home/keigo/デスクトップ/train/train_stand/*.pcd"))
files1=sorted(glob.glob("/home/keigo/デスクトップ/train/train_sleep/*.pcd"))
files2=sorted(glob.glob("/home/keigo/デスクトップ/train/train_sit/*.pcd"))
files3=sorted(glob.glob("/home/keigo/デスクトップ/train/others/*.pcd"))
for a in files0:
pcd = o3d.io.read_point_cloud(a)
creat_train_dataset(pcd=pcd, objtype='stand')
##data_train_list.append(data)
for b in files1:
pcd = o3d.io.read_point_cloud(b)
creat_train_dataset(pcd=pcd, objtype='sleep')
for c in files2:
pcd = o3d.io.read_point_cloud(c)
creat_train_dataset(pcd=pcd, objtype='sit')
for d in files3:
pcd = o3d.io.read_point_cloud(d)
creat_train_dataset(pcd=pcd, objtype='others')
data_loader = DataLoader(data_train_list, batch_size=10, shuffle=True, num_workers=1)
##print(data_train_list[0])
#print(len(data_loader))
return data_loader
#print(data_loader)
#print(len(data_loader))
#print(len(data_train_list))
def load_test_dataset():
files6=sorted(glob.glob("/home/keigo/デスクトップ/test3/test_sit/*.pcd"))
for c in files6:
pcd = o3d.io.read_point_cloud(c)
for f in range(2):
plane_model,inliers = pcd.segment_plane(distance_threshold=0.05,ransac_n=3,num_iterations=1000)
[a,b,c,d]=plane_model
#print(f"Plane equation: {a:.2f}x + {b:.2f}y + {c:.2f}z + {d:.2f} = 0")
inlier_cloud=pcd.select_by_index(inliers)
inlier_cloud.paint_uniform_color([1.0,0,0])
outliers_cloud=pcd.select_by_index(inliers, invert=True)
#print(inlier_cloud)
#print(pcd)
pcd=outliers_cloud
with o3d.utility.VerbosityContextManager(
o3d.utility.VerbosityLevel.Debug) as cm:
labels = np.array(
pcd.cluster_dbscan(eps=0.13, min_points=45, print_progress=True))
max_label = labels.max()+1
#print(f"point cloud has {max_label} clusters")
colors = plt.get_cmap("tab20")(labels / (max_label if max_label > 0 else 1))
colors[labels < 0] = 0
pcd.colors = o3d.utility.Vector3dVector(colors[:, :3])
#o3d.visualization.draw_geometries([pcd])
if max_label > 0:
xyz = np.asarray(pcd.points)
#print(xyz)
for i in range(max_label):
cluster_xyz = xyz[labels == i]
#print(len(cluster_xyz))
point_cloud = o3d.geometry.PointCloud()
point_cloud.points = o3d.utility.Vector3dVector(cluster_xyz)
#print(cluster_xyz)
#lists.append(point_cloud)
#print(lists)
if len(cluster_xyz) > 600:
o3d.visualization.draw_geometries([point_cloud])
creat_test_dataset(pcd=cluster_xyz, objtype='sit')
data_loader = DataLoader(data_test_list, batch_size=1, shuffle=False, num_workers=0)
#print(len(data_loader))
return data_loader
if __name__ == '__main__':
#test_loader = load_test_dataset()
#train_loader = load_train_dataset()
#print(len(test_loader))
#print(data_test_list)
#print(data_train_list[0].pos)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model.load_state_dict(torch.load("/home/keigo/デスクトップ/train/model.pth"))
test_loader=load_test_dataset()
#print(test_loader)
test(test_loader)
#test_acc = test(test_loader)
#print('Test: {:.4f}'.format(test_acc))
#model_path = 'model.pth1'
#torch.save(model.state_dict(), model_path )
So the change is that you call the CPU function of radius
, I guess. What happens when you bump up the number of max neighbors, e.g., max_num_neighbors=128
? What happens if you change that call to using knn
instead of radius
?
Setting max_num_neighbors to 128 will turn off Jetsonnano. The same is true for 64. Using knn instead of radius does not increase accuracy.
It's hard to identify the cause of this issue as I do not have access to a Jetson Nano. We need to first identify the cause of this error (which is likely in radius
but not yet confirmed). Can you confirm that the output of radius
is indeed different on Colab and Jetson?
I used googlecolab to generate a learning model for a program that improved pointnet2_classification.py. If you load the generated model on googlecolab and infer it, you can infer with high accuracy, but if you load the generated model on a different device (I work with jetsonnano) and infer it, the accuracy will be low. Do you know anything about this cause?