Closed SZUshenyan closed 4 months ago
Hello, I want to use the test set in the semantic-kitti dataset for inference visualization. I have successfully used mayavi for inference visualization of the validation set, but there was a problem visualizing the test set. The error was reported without the preprocessed.npy file, but the.npy file was obtained by processing the.label and.invalid files. So I want to ask do you necessarily need.label and.invalid files for inference visualization?
The SemanticKitti data are labeled only for the first 11 sequences and not for the later ones.inference only requires images not labels
Yes, there is no labels for test sequences. You can ignore these files during inference
是的,测试序列没有标签。您可以在推理过程中忽略这些文件
Hello, I have run the inference code, but nothing appears on Gradio. I now want to inference about my image data and save it as a pkl file, and then use mayavi for visualization, can you help me?
Hi, I don't understand. Can you use my visualization script to visualize it?
Hi, I don't understand. Can you use my visualization script to visualize it?
Hello, I used two methods to inference my own image data. The first is the inference code you provided in huggingface, which is already running successfully on Gradio ; The second method is to first use my own mydataset_output.py to inference the image data and save it as a pkl file, and then use visualize.py to visualize the pkl file, but the second method produces a different 3D semantic occupancy prediction than the first method . I have posted the code of mydataset_output.py and visualize.py below, please help me to see where I should deal with the problem. Looking forward to your reply, thank you! In addition, I also want to use the 3D semantic occupancy prediction results obtained from monoscene for path planning (using the improved APF method), do you think it is feasible?
mydataset_output.py
import os
import cv2
import numpy as np
import torch
from torchvision import transforms
import pickle
# from helpers import *
from helpers import get_projections, majority_pooling, draw
from monoscene.monoscene import MonoScene
torch.set_grad_enabled(False)
model = MonoScene.load_from_checkpoint(
"/home/zhx/Data/sy/MonoScene/trained_models/monoscene_kitti.ckpt",
dataset="kitti",
n_classes=20,
feature=64,
project_scale=2,
full_scene_size=(256, 256, 32),
)
img_W, img_H = 1220, 370
input_folder = "/home/zhx/Data/sy/MonoScene2/images/mydataset/"
output_folder = "/home/zhx/Data/sy/MonoScene2/images/myoutput_pkls"
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for filename in os.listdir(input_folder):
if filename.endswith(".jpg") or filename.endswith(".png"):
img_path = os.path.join(input_folder, filename)
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# img = np.array(img, dtype=np.float32) / 255.0
img = np.array(img, dtype=np.float32, copy=False) / 255.0
normalize_rgb = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
),
]
)
img = normalize_rgb(img)
batch = get_projections(img_W, img_H)
batch["img"] = img
for k in batch:
batch[k] = batch[k].unsqueeze(0)#.cuda()
pred = model(batch).squeeze()
pred = majority_pooling(pred, k_size=2)
#pred = np.argmax(pred, axis=1)
# Save prediction result as a .pkl file
output_dict = {
"pred": pred.astype(np.uint16),
"fov_mask": batch["fov_mask_2"].detach().cpu().numpy()
}
output_filename = os.path.splitext(filename)[0] + ".pkl"
output_path = os.path.join(output_folder, output_filename)
with open(output_path, "wb") as f:
pickle.dump(output_dict, f)
print(f"Prediction saved as {output_path}")
visualize.py
# from operator import gt
import pickle
import numpy as np
from omegaconf import DictConfig
import hydra
from mayavi import mlab
def get_grid_coords(dims, resolution):
"""
:param dims: the dimensions of the grid [x, y, z] (i.e. [256, 256, 32])
:return coords_grid: is the center coords of voxels in the grid
"""
g_xx = np.arange(0, dims[0] + 1)
g_yy = np.arange(0, dims[1] + 1)
sensor_pose = 10
g_zz = np.arange(0, dims[2] + 1)
# Obtaining the grid with coords...
xx, yy, zz = np.meshgrid(g_xx[:-1], g_yy[:-1], g_zz[:-1])
coords_grid = np.array([xx.flatten(), yy.flatten(), zz.flatten()]).T
coords_grid = coords_grid.astype(np.float)
coords_grid = (coords_grid * resolution) + resolution / 2
temp = np.copy(coords_grid)
temp[:, 0] = coords_grid[:, 1]
temp[:, 1] = coords_grid[:, 0]
coords_grid = np.copy(temp)
return coords_grid
def draw(
voxels,
# T_velo_2_cam,
# vox_origin,
fov_mask,
# img_size,
# f,
voxel_size=0.4,
d=7, # 7m - determine the size of the mesh representing the camera
):
# # Compute the coordinates of the mesh representing camera
# x = d * img_size[0] / (2 * f)
# y = d * img_size[1] / (2 * f)
# tri_points = np.array(
# [
# [0, 0, 0],
# [x, y, d],
# [-x, y, d],
# [-x, -y, d],
# [x, -y, d],
# ]
# )
# tri_points = np.hstack([tri_points, np.ones((5, 1))])
# tri_points = (np.linalg.inv(T_velo_2_cam) @ tri_points.T).T
# x = tri_points[:, 0] - vox_origin[0]
# y = tri_points[:, 1] - vox_origin[1]
# z = tri_points[:, 2] - vox_origin[2]
# triangles = [
# (0, 1, 2),
# (0, 1, 4),
# (0, 3, 4),
# (0, 2, 3),
# ]
fov_mask = fov_mask.reshape(-1)
# Compute the voxels coordinates
grid_coords = get_grid_coords(
[voxels.shape[0], voxels.shape[1], voxels.shape[2]], voxel_size
)
# Attach the predicted class to every voxel
grid_coords = np.vstack([grid_coords.T, voxels.reshape(-1)]).T
# Get the voxels inside FOV
fov_grid_coords = grid_coords[fov_mask, :]
# Get the voxels outside FOV
outfov_grid_coords = grid_coords[~fov_mask, :]
# Remove empty and unknown voxels
fov_voxels = fov_grid_coords[
(fov_grid_coords[:, 3] > 0) & (fov_grid_coords[:, 3] < 255)
]
outfov_voxels = outfov_grid_coords[
(outfov_grid_coords[:, 3] > 0) & (outfov_grid_coords[:, 3] < 255)
]
figure = mlab.figure(size=(1400, 1400), bgcolor=(1, 1, 1))
# # Draw the camera
# mlab.triangular_mesh(
# x, y, z, triangles, representation="wireframe", color=(0, 0, 0), line_width=5
# )
# Draw occupied inside FOV voxels
plt_plot_fov = mlab.points3d(
fov_voxels[:, 0],
fov_voxels[:, 1],
fov_voxels[:, 2],
fov_voxels[:, 3],
colormap="viridis",
scale_factor=voxel_size - 0.05 * voxel_size,
mode="cube",
opacity=1.0,
vmin=1,
vmax=19,
)
# Draw occupied outside FOV voxels
plt_plot_outfov = mlab.points3d(
outfov_voxels[:, 0],
outfov_voxels[:, 1],
outfov_voxels[:, 2],
outfov_voxels[:, 3],
colormap="viridis",
scale_factor=voxel_size - 0.05 * voxel_size,
mode="cube",
opacity=1.0,
vmin=1,
vmax=19,
)
colors = np.array(
[
[100, 150, 245, 255],
[100, 230, 245, 255],
[30, 60, 150, 255],
[80, 30, 180, 255],
[100, 80, 250, 255],
[255, 30, 30, 255],
[255, 40, 200, 255],
[150, 30, 90, 255],
[255, 0, 255, 255],
[255, 150, 255, 255],
[75, 0, 75, 255],
[175, 0, 75, 255],
[255, 200, 0, 255],
[255, 120, 50, 255],
[0, 175, 0, 255],
[135, 60, 0, 255],
[150, 240, 80, 255],
[255, 240, 150, 255],
[255, 0, 0, 255],
]
).astype(np.uint8)
plt_plot_fov.glyph.scale_mode = "scale_by_vector"
plt_plot_outfov.glyph.scale_mode = "scale_by_vector"
plt_plot_fov.module_manager.scalar_lut_manager.lut.table = colors
outfov_colors = colors
outfov_colors[:, :3] = outfov_colors[:, :3] // 3 * 2
plt_plot_outfov.module_manager.scalar_lut_manager.lut.table = outfov_colors
mlab.show()
@hydra.main(config_path=None)
def main(config: DictConfig):
scan = config.file
with open(scan, "rb") as handle:
b = pickle.load(handle)
fov_mask = b["fov_mask"]
# T_velo_2_cam = b["T_velo_2_cam"]
# vox_origin = np.array([0, -25.6, -2])
pred = b["pred"]
draw(
pred,
# T_velo_2_cam,
# vox_origin,
fov_mask,
# img_size=(1220, 370),
# f=707.0912,
voxel_size=0.4,
# d=7,
)
if __name__ == "__main__":
main()
你好,我想用semantic-kitti数据集中的测试集进行推理可视化,之前用mayavi成功做过验证集的推理可视化,但是可视化测试集时出现问题,报错说没有预处理的.npy文件,但是.npy文件是处理.label和.invalid文件得到的,所以想问一下推理可视化一定需要.label和.invalid文件吗?
you should modify the generate_output.py according to the note. `data_loader = data_module.val_dataloader()
Hello, I want to use the test set in the semantic-kitti dataset for inference visualization. I have successfully used mayavi for inference visualization of the validation set, but there was a problem visualizing the test set. The error was reported without the preprocessed.npy file, but the.npy file was obtained by processing the.label and.invalid files. So I want to ask do you necessarily need.label and.invalid files for inference visualization?