Open shb9793 opened 1 year ago
And this is my config.
_base_ = [
'../../../../configs/_base_/datasets/kitti-3d-3class.py',
'../../../../configs/_base_/schedules/cyclic_40e.py',
'../../../../configs/_base_/default_runtime.py'
]
plugin=True
plugin_dir='projects_uvtr/mmdet3d_plugin/'
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
voxel_size = [0.05, 0.05, 0.1]
fp16_enabled = True
bev_stride = 4
sample_num = 5
# For nuScenes we usually do 10-class detection
class_names = ['Pedestrian', 'Cyclist', 'Car']
input_modality = dict(
use_lidar=True,
use_camera=False,
use_radar=False,
use_map=False,
use_external=False)
model = dict(
type='UVTR',
pts_voxel_layer=dict(
max_num_points=5, voxel_size=voxel_size, max_voxels=(16000, 40000),
point_cloud_range=point_cloud_range),
pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=4),
pts_middle_encoder=dict(
type='SparseEncoderHD',
in_channels=4,
sparse_shape=[41, 1600, 1408],
output_channels=256,
order=('conv', 'norm', 'act'),
encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 128)),
encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
block_type='basicblock',
fp16_enabled=False), # not enable FP16 here
pts_backbone=dict(
type='SECOND3D',
in_channels=[256, 256, 256],
out_channels=[128, 256, 512],
layer_nums=[5, 5, 5],
layer_strides=[1, 2, 4],
is_cascade=False,
norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
conv_cfg=dict(type='Conv3d', kernel=(1,3,3), bias=False)),
pts_neck=dict(
type='SECOND3DFPN',
in_channels=[128, 256, 512],
out_channels=[256, 256, 256],
upsample_strides=[1, 2, 4],
norm_cfg=dict(type='BN3d', eps=1e-3, momentum=0.01),
upsample_cfg=dict(type='deconv3d', bias=False),
extra_conv=dict(type='Conv3d', num_conv=3, bias=False),
use_conv_for_no_stride=True),
pts_bbox_head=dict(
type='UVTRHead',
# transformer_cfg
num_query=300,
num_classes=3,
in_channels=256,
sync_cls_avg_factor=True,
with_box_refine=True,
as_two_stage=False,
transformer=dict(
type='Uni3DDETR',
fp16_enabled=fp16_enabled,
decoder=dict(
type='UniTransformerDecoder',
num_layers=3,
return_intermediate=True,
transformerlayers=dict(
type='BaseTransformerLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=256,
num_heads=8,
dropout=0.1),
dict(
type='UniCrossAtten',
num_points=1,
embed_dims=256,
num_sweeps=1,
fp16_enabled=fp16_enabled)
],
ffn_cfgs=dict(
type='FFN',
embed_dims=256,
feedforward_channels=512,
num_fcs=2,
ffn_drop=0.1,
act_cfg=dict(type='ReLU', inplace=True),
),
norm_cfg=dict(type='LN'),
operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
'ffn', 'norm'))
)
),
bbox_coder=dict(
type='NMSFreeCoder',
post_center_range=[0, -40, -3, 70.4, 40, 1],
pc_range=point_cloud_range,
max_num=100,
voxel_size=voxel_size,
num_classes=3),
positional_encoding=dict(
type='SinePositionalEncoding',
num_feats=128,
normalize=True,
offset=-0.5),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_bbox=dict(type='L1Loss', loss_weight=0.25),
loss_iou=dict(type='GIoULoss', loss_weight=0.0),
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
),
# model training and testing settings
train_cfg=dict(pts=dict(
grid_size=[1408, 1600, 40],
voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
out_size_factor=bev_stride,
assigner=dict(
type='HungarianAssigner3D',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
iou_cost=dict(type='IoUCost', weight=0.0), # Fake cost. This is just to make it compatible with DETR head.
pc_range=point_cloud_range))))
dataset_type = 'KittiDataset'
data_root = '/share/home/scz6240/openmmlab0171/DAIR-V2X-Dataset/single-infrastructure-side/'
file_client_args = dict(backend='disk')
db_sampler = dict(
type='UnifiedDataBaseSampler',
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl', # please change to your own database file
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
classes=class_names,
sample_groups=dict(Car=12, Pedestrian=10, Cyclist=10),
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=[0, 1, 2, 3],
file_client_args=file_client_args))
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=dict(backend='disk')),
dict(
type='ObjectSample',
db_sampler=dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
classes=['Pedestrian', 'Cyclist', 'Car'],
sample_groups=dict(Car=12, Pedestrian=10, Cyclist=10))),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(
type='PointsRangeFilter', point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(
type='ObjectRangeFilter', point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(type='PointShuffle'),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car']),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter',
point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=8,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type='KittiDataset',
data_root='/share/home/scz6240/openmmlab0171/DAIR-V2X-Dataset/single-infrastructure-side/',
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=dict(backend='disk')),
dict(
type='ObjectSample',
db_sampler=dict(
data_root='/share/home/scz6240/openmmlab0171/DAIR-V2X-Dataset/single-infrastructure-side/',
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(
Car=5, Pedestrian=10, Cyclist=10)),
classes=['Pedestrian', 'Cyclist', 'Car'],
sample_groups=dict(Car=12, Pedestrian=10,
Cyclist=10))),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(
type='PointsRangeFilter',
point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(
type='ObjectRangeFilter',
point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(type='PointShuffle'),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car']),
dict(
type='Collect3D',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
],
modality=dict(use_lidar=True, use_camera=False),
classes=['Pedestrian', 'Cyclist', 'Car'],
test_mode=False,
box_type_3d='LiDAR')),
val=dict(
type='KittiDataset',
data_root='/share/home/scz6240/openmmlab0171/DAIR-V2X-Dataset/single-infrastructure-side/',
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter',
point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['points'])
])
],
modality=dict(use_lidar=True, use_camera=False),
classes=['Pedestrian', 'Cyclist', 'Car'],
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
type='KittiDataset',
data_root='/share/home/scz6240/openmmlab0171/DAIR-V2X-Dataset/single-infrastructure-side/',
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1.0, 1.0],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter',
point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['points'])
])
],
modality=dict(use_lidar=True, use_camera=False),
classes=['Pedestrian', 'Cyclist', 'Car'],
test_mode=True,
box_type_3d='LiDAR'))
evaluation = dict(
interval=1,
pipeline=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['points'])
])
checkpoint_config = dict(interval=1)
runner = dict(type='EpochBasedRunner', max_epochs=40)
optimizer = dict(type='AdamW', lr=2e-5, weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
work_dir = '/share/home/scz6240/openmmlab0171/mmdetection3d/work_dir/uvtr_dair'
find_unused_parameters = True
workflow = [('train', 1)]
gpu_ids = range(0, 1)
dist_params = dict(backend='nccl')
log_level = 'INFO'
# fp16 setting
fp16 = dict(loss_scale=32.)
It seems like fp16 error, but I don't know how to settle this issue. Looking forward to your critical suggestions. Many thanks!
Hi, I guess you should check the data format of points in this function results = self.simple_test(img_metas[0], points, img[0], **kwargs)
. It seems the data of points is list
here. It's better to modify it according to your data loader.
Excuse me, I don't know how to modify the data format of input points. Could you give me some suggestions, please
Sorry to bother again. I have tried to reimplement your LiDAR-based model on KITTI-like dataset. But after the first epoch, the error occurs as follows: