关于rcbevdet(Radar-camera Fusion in Bird’s Eye View for 3D Object Detection)数据处理的调试
paper: RCBEVDet: Radar-camera Fusion in Bird’s Eye View for 3D Object Detection
github: github.com/VDIGPKU/RCB….
arXvi: [2403.16440] RCBEVDet: Radar-camera Fusion in Bird's Eye View for 3D Object Detection
一,mmdetection系列数据处理流程
mmdetection框架对数据集的处理流程一般是首先在配置文件./configs/project_name/xxx.py中对数据集的配置的部分进行配置(dataset_type, data_root, pipeline(train, val, test...), data), 然后在train.py中进行数据集对象的构建(build_dataset), 之后通过train_model调用位于mmdet3d/apis/train.py的接口来实现build_dataloader。最后通过构建的dataloader对象来迭代的进行采样-->pipline-->返回批次数据。
1.数据集的配置
address: configs/rcbevdet/rcbevdet-256x704-r50-BEV128-9kf-depth-cbgs12e-circlelarger.py
effect: 对于数据集的类型,位置,数据增强配置,共享数据配置,数据处理流程等进行配置
Location of action: 通过build_dataset将配置信息写入到实际进行处理的数据集类中
- 配置示例:
# Dataset part configs start
dataset_type = 'NuScenesDatasetRC'
data_root = 'data/nuscenes/'
file_client_args = dict(backend='disk')
bda_aug_conf = dict(
rot_lim=(-22.5, 22.5),
scale_lim=(0.95, 1.05),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5)
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=data_config,
sequential=True),
dict( #load radar
type='LoadRadarPointsMultiSweeps',
load_dim=18,
sweeps_num=8,
use_dim=radar_use_dims,
max_num=1200, ),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names),
dict(type='GlobalRotScaleTrans_radar'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['img_inputs', 'gt_bboxes_3d', 'gt_labels_3d',
'gt_depth','radar'])
]
test_pipeline = [
dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
dict(
type='LoadRadarPointsMultiSweeps',
load_dim=18,
sweeps_num=8,
use_dim=radar_use_dims,
max_num=1200, ),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=False),
dict(type='GlobalRotScaleTrans_radar'),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs','radar'])
])
]
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=True,
use_map=False,
use_external=False)
share_data_config = dict(
type=dataset_type,
classes=class_names,
modality=input_modality,
img_info_prototype='bevdet4d',
multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
)
test_data_config = dict(
pipeline=test_pipeline,
ann_file=data_root + 'nuscenes_RC_infos_val.pkl')
data = dict(
samples_per_gpu=8,
workers_per_gpu=8,
train=dict(
type='CBGSDataset',
dataset=dict(
data_root=data_root,
ann_file=data_root + 'nuscenes_RC_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
test_mode=False,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR')),
val=test_data_config,
test=test_data_config)
for key in ['val', 'test']:
data[key].update(share_data_config)
data['train']['dataset'].update(share_data_config)
#end
2.build_dataset
根据配置信息构建实际的数据处理集对象,通过从配置文件中拿到配置信息
- 用法示例:
address: tools/train.py
datasets = [build_dataset(cfg.data.train)]
- build_dataset函数的具体实现:
address: mmdet3d/datasets/builder.py
def build_dataset(cfg, default_args=None):
from mmdet3d.datasets.dataset_wrappers import CBGSDataset
from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,
ConcatDataset, RepeatDataset)
if isinstance(cfg, (list, tuple)):
dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
elif cfg['type'] == 'ConcatDataset':
dataset = ConcatDataset(
[build_dataset(c, default_args) for c in cfg['datasets']],
cfg.get('separate_eval', True))
elif cfg['type'] == 'RepeatDataset':
dataset = RepeatDataset(
build_dataset(cfg['dataset'], default_args), cfg['times'])
elif cfg['type'] == 'ClassBalancedDataset':
dataset = ClassBalancedDataset(
build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])
elif cfg['type'] == 'CBGSDataset':
dataset = CBGSDataset(build_dataset(cfg['dataset'], default_args))
elif isinstance(cfg.get('ann_file'), (list, tuple)):
dataset = _concat_dataset(cfg, default_args)
elif cfg['type'] in DATASETS._module_dict.keys():
dataset = build_from_cfg(cfg, DATASETS, default_args)
else:
dataset = build_from_cfg(cfg, MMDET_DATASETS, default_args)
return dataset
3.build_dataloader
通过第二步将配置信息通过build_dataset的方式得到的dataset对象将作为构建dataloader对象的一部分。通过build_mmdet_dataloader接口来实现训练, 验证,测试阶段的dataloader。
- 用法示例:
address: mmdet3d/apis/train.py
data_loaders = [
build_mmdet_dataloader(
ds,
cfg.data.samples_per_gpu,
cfg.data.workers_per_gpu,
# `num_gpus` will be ignored if distributed
num_gpus=len(cfg.gpu_ids),
dist=distributed,
seed=cfg.seed,
runner_type=runner_type,
persistent_workers=cfg.data.get('persistent_workers', False))
for ds in dataset
]
- 具体实现:
reference: from mmdet.datasets import build_dataloader as build_mmdet_dataloader
address: :~/anaconda3/envs/your_env_name/lib/python3.8/site-packages/mmdet/dataset
def build_dataloader(dataset,
samples_per_gpu,
workers_per_gpu,
num_gpus=1,
dist=True,
shuffle=True,
seed=None,
runner_type='EpochBasedRunner',
persistent_workers=False,
class_aware_sampler=None,
**kwargs):
"""Build PyTorch DataLoader.
In distributed training, each GPU/process has a dataloader.
In non-distributed training, there is only one dataloader for all GPUs.
Args:
dataset (Dataset): A PyTorch dataset.
samples_per_gpu (int): Number of training samples on each GPU, i.e.,
batch size of each GPU.
workers_per_gpu (int): How many subprocesses to use for data loading
for each GPU.
num_gpus (int): Number of GPUs. Only used in non-distributed training.
dist (bool): Distributed training/test or not. Default: True.
shuffle (bool): Whether to shuffle the data at every epoch.
Default: True.
seed (int, Optional): Seed to be used. Default: None.
runner_type (str): Type of runner. Default: `EpochBasedRunner`
persistent_workers (bool): If True, the data loader will not shutdown
the worker processes after a dataset has been consumed once.
This allows to maintain the workers `Dataset` instances alive.
This argument is only valid when PyTorch>=1.7.0. Default: False.
class_aware_sampler (dict): Whether to use `ClassAwareSampler`
during training. Default: None.
kwargs: any keyword argument to be used to initialize DataLoader
Returns:
DataLoader: A PyTorch dataloader.
"""
rank, world_size = get_dist_info()
if dist:
# When model is :obj:`DistributedDataParallel`,
# `batch_size` of :obj:`dataloader` is the
# number of training samples on each GPU.
batch_size = samples_per_gpu
num_workers = workers_per_gpu
else:
# When model is obj:`DataParallel`
# the batch size is samples on all the GPUS
batch_size = num_gpus * samples_per_gpu
num_workers = num_gpus * workers_per_gpu
if runner_type == 'IterBasedRunner':
# this is a batch sampler, which can yield
# a mini-batch indices each time.
# it can be used in both `DataParallel` and
# `DistributedDataParallel`
if shuffle:
batch_sampler = InfiniteGroupBatchSampler(
dataset, batch_size, world_size, rank, seed=seed)
else:
batch_sampler = InfiniteBatchSampler(
dataset,
batch_size,
world_size,
rank,
seed=seed,
shuffle=False)
batch_size = 1
sampler = None
else:
if class_aware_sampler is not None:
# ClassAwareSampler can be used in both distributed and
# non-distributed training.
num_sample_class = class_aware_sampler.get('num_sample_class', 1)
sampler = ClassAwareSampler(
dataset,
samples_per_gpu,
world_size,
rank,
seed=seed,
num_sample_class=num_sample_class)
elif dist:
# DistributedGroupSampler will definitely shuffle the data to
# satisfy that images on each GPU are in the same group
if shuffle:
sampler = DistributedGroupSampler(
dataset, samples_per_gpu, world_size, rank, seed=seed)
else:
sampler = DistributedSampler(
dataset, world_size, rank, shuffle=False, seed=seed)
else:
sampler = GroupSampler(dataset,
samples_per_gpu) if shuffle else None
batch_sampler = None
init_fn = partial(
worker_init_fn, num_workers=num_workers, rank=rank,
seed=seed) if seed is not None else None
if (TORCH_VERSION != 'parrots'
and digit_version(TORCH_VERSION) >= digit_version('1.7.0')):
kwargs['persistent_workers'] = persistent_workers
elif persistent_workers is True:
warnings.warn('persistent_workers is invalid because your pytorch '
'version is lower than 1.7.0')
data_loader = DataLoader(
dataset,
batch_size=batch_size,
sampler=sampler,
num_workers=num_workers,
batch_sampler=batch_sampler,
collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
pin_memory=kwargs.pop('pin_memory', False),
worker_init_fn=init_fn,
**kwargs)
return data_loader
4. sampler & pipline
address: mmdet3d/datasets
mmdet3d/datasets文件夹下主要是针对数据集的具体操作的实现,包括采样、以及采样之后的数据处理流程。
4.1 sampler
address: mmdet3d/datasets/samplers
这个文件夹下主要是有关采样器相关的文件,包括构建采样器, 分布式采样、分布式群组采样、InfiniteGroupEachSampleInBatchSampler采样的实现。
- 采样器的构建示例:
from mmcv.utils.registry import Registry, build_from_cfg
SAMPLER = Registry('sampler')
def build_sampler(cfg, default_args):
return build_from_cfg(cfg, SAMPLER, default_args)
- 分布式群组采样示例:
import math
import itertools
import copy
import torch.distributed as dist
import numpy as np
import torch
from mmcv.runner import get_dist_info
from torch.utils.data import Sampler
from .sampler import SAMPLER
import random
from IPython import embed
@SAMPLER.register_module()
class DistributedGroupSampler(Sampler):
"""Sampler that restricts data loading to a subset of the dataset.
It is especially useful in conjunction with
:class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
process can pass a DistributedSampler instance as a DataLoader sampler,
and load a subset of the original dataset that is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Arguments:
dataset: Dataset used for sampling.
num_replicas (optional): Number of processes participating in
distributed training.
rank (optional): Rank of the current process within num_replicas.
seed (int, optional): random seed used to shuffle the sampler if
``shuffle=True``. This number should be identical across all
processes in the distributed group. Default: 0.
"""
4.2 pipline
address: mmdet3d/datasets/pipelines
piplines文件夹下的文件
- rcbevdet的数据处理piplines相关配置:
address: configs/rcbevdet/rcbevdet-256x704-r50-BEV128-9kf-depth-cbgs12e-circlelarger.py
train_pipeline
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=data_config,
sequential=True),
dict( #load radar
type='LoadRadarPointsMultiSweeps',
load_dim=18,
sweeps_num=8,
use_dim=radar_use_dims,
max_num=1200, ),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names),
dict(type='GlobalRotScaleTrans_radar'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['img_inputs', 'gt_bboxes_3d', 'gt_labels_3d',
'gt_depth','radar'])
]
test_pipline
test_pipeline = [
dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
dict(
type='LoadRadarPointsMultiSweeps',
load_dim=18,
sweeps_num=8,
use_dim=radar_use_dims,
max_num=1200, ),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=False),
dict(type='GlobalRotScaleTrans_radar'),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs','radar'])
])
]
val_pipline 同test_pipline
- rcbevdet的数据处理piplines具体实现:
address: mmdet3d/datasets/pipelines/loading.py
以训练pipline中的PrepareImageInputs示例:
@PIPELINES.register_module()
class PrepareImageInputs(object):
"""Load multi channel images from a list of separate channel files.
Expects results['img_filename'] to be a list of filenames.
Args:
to_float32 (bool): Whether to convert the img to float32.
Defaults to False.
color_type (str): Color type of the file. Defaults to 'unchanged'.
"""
def __init__(
self,
data_config,
is_train=False,
sequential=False,
ego_cam='CAM_FRONT',
add_adj_bbox=False,
with_stereo=False,
with_future_pred=False,
img_norm_cfg=None,
ignore=[],
):
self.is_train = is_train
self.data_config = data_config
self.normalize_img = mmlabNormalize
self.sequential = sequential
self.ego_cam = ego_cam
self.with_future_pred = with_future_pred
self.add_adj_bbox = add_adj_bbox
self.img_norm_cfg = img_norm_cfg
self.with_stereo = with_stereo
self.ignore = ignore
if len(ignore) > 0:
print(self.ignore)
def get_rot(self, h):
def img_transform(self, img, post_rot, post_tran, resize, resize_dims,
crop, flip, rotate):
# adjust image
return img, post_rot, post_tran
def img_transform_core(self, img, resize_dims, crop, flip, rotate):
# adjust image
return img
def choose_cams(self):
return cam_names
def sample_augmentation(self, H, W, flip=None, scale=None):
return resize, resize_dims, crop, flip, rotate
def get_sweep2key_transformation(self,
cam_info,
key_info,
cam_name,
ego_cam=None):
return sweepego2keyego
def get_sensor_transforms(self, cam_info, cam_name):
return sensor2ego, ego2global
def get_inputs(self, results, flip=None, scale=None):
return (imgs, sensor2egos, ego2globals, intrins, post_rots, post_trans)
4.3 返回数据
通过epoch_runner(data_loaders[i], **kwargs),运行一个轮次的的训练或验证或测试,在这个过程中通过dataloader 返回批次数据。
示例代码:
def epoch_runner(data_loader, model, optimizer, criterion, is_training=True):
if is_training:
model.train()
else:
model.eval()
for batch in data_loader:
inputs, labels = batch['img_inputs'], batch['gt_labels_3d']
# 前向传播
outputs = model(inputs)
loss = criterion(outputs, labels)
if is_training:
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 记录和日志
print(f"Loss: {loss.item()}")
# 在训练过程中调用
epoch_runner(train_data_loader, model, optimizer, criterion, is_training=True)
# 在验证过程中调用
epoch_runner(val_data_loader, model, optimizer, criterion, is_training=False)
上面代码中for batch in data_loader就是dataloader返回批次数据。
总结
以上就是整个数据在送入模型前的整个处理流程,注意为了方便调试请将类似于num_works = xxx超参设置成0(num_work = 0)。