Shortcuts

Source code for mmdet3d.models.roi_heads.bbox_heads.point_rcnn_bbox_head

# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
from mmcv.cnn import ConvModule, normal_init
from mmcv.cnn.bricks import build_conv_layer
from mmcv.runner import BaseModule
from torch import nn as nn

from mmdet3d.core.bbox.structures import (LiDARInstance3DBoxes,
                                          rotation_3d_in_axis, xywhr2xyxyr)
from mmdet3d.core.post_processing import nms_bev, nms_normal_bev
from mmdet3d.models.builder import HEADS, build_loss
from mmdet3d.ops import build_sa_module
from mmdet.core import build_bbox_coder, multi_apply


[docs]@HEADS.register_module() class PointRCNNBboxHead(BaseModule): """PointRCNN RoI Bbox head. Args: num_classes (int): The number of classes to prediction. in_channels (int): Input channels of point features. mlp_channels (list[int]): the number of mlp channels pred_layer_cfg (dict, optional): Config of classfication and regression prediction layers. Defaults to None. num_points (tuple, optional): The number of points which each SA module samples. Defaults to (128, 32, -1). radius (tuple, optional): Sampling radius of each SA module. Defaults to (0.2, 0.4, 100). num_samples (tuple, optional): The number of samples for ball query in each SA module. Defaults to (64, 64, 64). sa_channels (tuple, optional): Out channels of each mlp in SA module. Defaults to ((128, 128, 128), (128, 128, 256), (256, 256, 512)). bbox_coder (dict, optional): Config dict of box coders. Defaults to dict(type='DeltaXYZWLHRBBoxCoder'). sa_cfg (dict, optional): Config of set abstraction module, which may contain the following keys and values: - pool_mod (str): Pool method ('max' or 'avg') for SA modules. - use_xyz (bool): Whether to use xyz as a part of features. - normalize_xyz (bool): Whether to normalize xyz with radii in each SA module. Defaults to dict(type='PointSAModule', pool_mod='max', use_xyz=True). conv_cfg (dict, optional): Config dict of convolutional layers. Defaults to dict(type='Conv1d'). norm_cfg (dict, optional): Config dict of normalization layers. Defaults to dict(type='BN1d'). act_cfg (dict, optional): Config dict of activation layers. Defaults to dict(type='ReLU'). bias (str, optional): Type of bias. Defaults to 'auto'. loss_bbox (dict, optional): Config of regression loss function. Defaults to dict(type='SmoothL1Loss', beta=1.0 / 9.0, reduction='sum', loss_weight=1.0). loss_cls (dict, optional): Config of classification loss function. Defaults to dict(type='CrossEntropyLoss', use_sigmoid=True, reduction='sum', loss_weight=1.0). with_corner_loss (bool, optional): Whether using corner loss. Defaults to True. init_cfg (dict, optional): Config of initialization. Defaults to None. """ def __init__( self, num_classes, in_channels, mlp_channels, pred_layer_cfg=None, num_points=(128, 32, -1), radius=(0.2, 0.4, 100), num_samples=(64, 64, 64), sa_channels=((128, 128, 128), (128, 128, 256), (256, 256, 512)), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), sa_cfg=dict(type='PointSAModule', pool_mod='max', use_xyz=True), conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), act_cfg=dict(type='ReLU'), bias='auto', loss_bbox=dict( type='SmoothL1Loss', beta=1.0 / 9.0, reduction='sum', loss_weight=1.0), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, reduction='sum', loss_weight=1.0), with_corner_loss=True, init_cfg=None): super(PointRCNNBboxHead, self).__init__(init_cfg=init_cfg) self.num_classes = num_classes self.num_sa = len(sa_channels) self.with_corner_loss = with_corner_loss self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.bias = bias self.loss_bbox = build_loss(loss_bbox) self.loss_cls = build_loss(loss_cls) self.bbox_coder = build_bbox_coder(bbox_coder) self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.in_channels = in_channels mlp_channels = [self.in_channels] + mlp_channels shared_mlps = nn.Sequential() for i in range(len(mlp_channels) - 1): shared_mlps.add_module( f'layer{i}', ConvModule( mlp_channels[i], mlp_channels[i + 1], kernel_size=(1, 1), stride=(1, 1), inplace=False, conv_cfg=dict(type='Conv2d'))) self.xyz_up_layer = nn.Sequential(*shared_mlps) c_out = mlp_channels[-1] self.merge_down_layer = ConvModule( c_out * 2, c_out, kernel_size=(1, 1), stride=(1, 1), inplace=False, conv_cfg=dict(type='Conv2d')) pre_channels = c_out self.SA_modules = nn.ModuleList() sa_in_channel = pre_channels for sa_index in range(self.num_sa): cur_sa_mlps = list(sa_channels[sa_index]) cur_sa_mlps = [sa_in_channel] + cur_sa_mlps sa_out_channel = cur_sa_mlps[-1] cur_num_points = num_points[sa_index] if cur_num_points <= 0: cur_num_points = None self.SA_modules.append( build_sa_module( num_point=cur_num_points, radius=radius[sa_index], num_sample=num_samples[sa_index], mlp_channels=cur_sa_mlps, cfg=sa_cfg)) sa_in_channel = sa_out_channel self.cls_convs = self._add_conv_branch( pred_layer_cfg.in_channels, pred_layer_cfg.cls_conv_channels) self.reg_convs = self._add_conv_branch( pred_layer_cfg.in_channels, pred_layer_cfg.reg_conv_channels) prev_channel = pred_layer_cfg.cls_conv_channels[-1] self.conv_cls = build_conv_layer( self.conv_cfg, in_channels=prev_channel, out_channels=self.num_classes, kernel_size=1) prev_channel = pred_layer_cfg.reg_conv_channels[-1] self.conv_reg = build_conv_layer( self.conv_cfg, in_channels=prev_channel, out_channels=self.bbox_coder.code_size * self.num_classes, kernel_size=1) if init_cfg is None: self.init_cfg = dict(type='Xavier', layer=['Conv2d', 'Conv1d']) def _add_conv_branch(self, in_channels, conv_channels): """Add shared or separable branch. Args: in_channels (int): Input feature channel. conv_channels (tuple): Middle feature channels. """ conv_spec = [in_channels] + list(conv_channels) # add branch specific conv layers conv_layers = nn.Sequential() for i in range(len(conv_spec) - 1): conv_layers.add_module( f'layer{i}', ConvModule( conv_spec[i], conv_spec[i + 1], kernel_size=1, padding=0, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, bias=self.bias, inplace=True)) return conv_layers
[docs] def init_weights(self): """Initialize weights of the head.""" super().init_weights() for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): if m.bias is not None: nn.init.constant_(m.bias, 0) normal_init(self.conv_reg.weight, mean=0, std=0.001)
[docs] def forward(self, feats): """Forward pass. Args: feats (torch.Torch): Features from RCNN modules. Returns: tuple[torch.Tensor]: Score of class and bbox predictions. """ input_data = feats.clone().detach() xyz_input = input_data[..., 0:self.in_channels].transpose( 1, 2).unsqueeze(dim=3).contiguous().clone().detach() xyz_features = self.xyz_up_layer(xyz_input) rpn_features = input_data[..., self.in_channels:].transpose( 1, 2).unsqueeze(dim=3) merged_features = torch.cat((xyz_features, rpn_features), dim=1) merged_features = self.merge_down_layer(merged_features) l_xyz, l_features = [input_data[..., 0:3].contiguous()], \ [merged_features.squeeze(dim=3)] for i in range(len(self.SA_modules)): li_xyz, li_features, cur_indices = \ self.SA_modules[i](l_xyz[i], l_features[i]) l_xyz.append(li_xyz) l_features.append(li_features) shared_features = l_features[-1] x_cls = shared_features x_reg = shared_features x_cls = self.cls_convs(x_cls) rcnn_cls = self.conv_cls(x_cls) x_reg = self.reg_convs(x_reg) rcnn_reg = self.conv_reg(x_reg) rcnn_cls = rcnn_cls.transpose(1, 2).contiguous().squeeze(dim=1) rcnn_reg = rcnn_reg.transpose(1, 2).contiguous().squeeze(dim=1) return rcnn_cls, rcnn_reg
[docs] def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, bbox_weights): """Computing losses. Args: cls_score (torch.Tensor): Scores of each RoI. bbox_pred (torch.Tensor): Predictions of bboxes. rois (torch.Tensor): RoI bboxes. labels (torch.Tensor): Labels of class. bbox_targets (torch.Tensor): Target of positive bboxes. pos_gt_bboxes (torch.Tensor): Ground truths of positive bboxes. reg_mask (torch.Tensor): Mask for positive bboxes. label_weights (torch.Tensor): Weights of class loss. bbox_weights (torch.Tensor): Weights of bbox loss. Returns: dict: Computed losses. - loss_cls (torch.Tensor): Loss of classes. - loss_bbox (torch.Tensor): Loss of bboxes. - loss_corner (torch.Tensor): Loss of corners. """ losses = dict() rcnn_batch_size = cls_score.shape[0] # calculate class loss cls_flat = cls_score.view(-1) loss_cls = self.loss_cls(cls_flat, labels, label_weights) losses['loss_cls'] = loss_cls # calculate regression loss code_size = self.bbox_coder.code_size pos_inds = (reg_mask > 0) pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds].clone() bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat( 1, pos_bbox_pred.shape[-1]) loss_bbox = self.loss_bbox( pos_bbox_pred.unsqueeze(dim=0), bbox_targets.unsqueeze(dim=0).detach(), bbox_weights_flat.unsqueeze(dim=0)) losses['loss_bbox'] = loss_bbox if pos_inds.any() != 0 and self.with_corner_loss: rois = rois.detach() pos_roi_boxes3d = rois[..., 1:].view(-1, code_size)[pos_inds] pos_roi_boxes3d = pos_roi_boxes3d.view(-1, code_size) batch_anchors = pos_roi_boxes3d.clone().detach() pos_rois_rotation = pos_roi_boxes3d[..., 6].view(-1) roi_xyz = pos_roi_boxes3d[..., 0:3].view(-1, 3) batch_anchors[..., 0:3] = 0 # decode boxes pred_boxes3d = self.bbox_coder.decode( batch_anchors, pos_bbox_pred.view(-1, code_size)).view(-1, code_size) pred_boxes3d[..., 0:3] = rotation_3d_in_axis( pred_boxes3d[..., 0:3].unsqueeze(1), (pos_rois_rotation), axis=2).squeeze(1) pred_boxes3d[:, 0:3] += roi_xyz # calculate corner loss loss_corner = self.get_corner_loss_lidar(pred_boxes3d, pos_gt_bboxes) losses['loss_corner'] = loss_corner else: losses['loss_corner'] = loss_cls.new_tensor(0) return losses
[docs] def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1.0): """Calculate corner loss of given boxes. Args: pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7). gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7). delta (float, optional): huber loss threshold. Defaults to 1.0 Returns: torch.FloatTensor: Calculated corner loss in shape (N). """ assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0] # This is a little bit hack here because we assume the box for # PointRCNN is in LiDAR coordinates gt_boxes_structure = LiDARInstance3DBoxes(gt_bbox3d) pred_box_corners = LiDARInstance3DBoxes(pred_bbox3d).corners gt_box_corners = gt_boxes_structure.corners # This flip only changes the heading direction of GT boxes gt_bbox3d_flip = gt_boxes_structure.clone() gt_bbox3d_flip.tensor[:, 6] += np.pi gt_box_corners_flip = gt_bbox3d_flip.corners corner_dist = torch.min( torch.norm(pred_box_corners - gt_box_corners, dim=2), torch.norm(pred_box_corners - gt_box_corners_flip, dim=2)) # huber loss abs_error = corner_dist.abs() quadratic = abs_error.clamp(max=delta) linear = (abs_error - quadratic) corner_loss = 0.5 * quadratic**2 + delta * linear return corner_loss.mean(dim=1)
[docs] def get_targets(self, sampling_results, rcnn_train_cfg, concat=True): """Generate targets. Args: sampling_results (list[:obj:`SamplingResult`]): Sampled results from rois. rcnn_train_cfg (:obj:`ConfigDict`): Training config of rcnn. concat (bool, optional): Whether to concatenate targets between batches. Defaults to True. Returns: tuple[torch.Tensor]: Targets of boxes and class prediction. """ pos_bboxes_list = [res.pos_bboxes for res in sampling_results] pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results] iou_list = [res.iou for res in sampling_results] targets = multi_apply( self._get_target_single, pos_bboxes_list, pos_gt_bboxes_list, iou_list, cfg=rcnn_train_cfg) (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, bbox_weights) = targets if concat: label = torch.cat(label, 0) bbox_targets = torch.cat(bbox_targets, 0) pos_gt_bboxes = torch.cat(pos_gt_bboxes, 0) reg_mask = torch.cat(reg_mask, 0) label_weights = torch.cat(label_weights, 0) label_weights /= torch.clamp(label_weights.sum(), min=1.0) bbox_weights = torch.cat(bbox_weights, 0) bbox_weights /= torch.clamp(bbox_weights.sum(), min=1.0) return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, bbox_weights)
def _get_target_single(self, pos_bboxes, pos_gt_bboxes, ious, cfg): """Generate training targets for a single sample. Args: pos_bboxes (torch.Tensor): Positive boxes with shape (N, 7). pos_gt_bboxes (torch.Tensor): Ground truth boxes with shape (M, 7). ious (torch.Tensor): IoU between `pos_bboxes` and `pos_gt_bboxes` in shape (N, M). cfg (dict): Training configs. Returns: tuple[torch.Tensor]: Target for positive boxes. (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, bbox_weights) """ cls_pos_mask = ious > cfg.cls_pos_thr cls_neg_mask = ious < cfg.cls_neg_thr interval_mask = (cls_pos_mask == 0) & (cls_neg_mask == 0) # iou regression target label = (cls_pos_mask > 0).float() label[interval_mask] = (ious[interval_mask] - cfg.cls_neg_thr) / \ (cfg.cls_pos_thr - cfg.cls_neg_thr) # label weights label_weights = (label >= 0).float() # box regression target reg_mask = pos_bboxes.new_zeros(ious.size(0)).long() reg_mask[0:pos_gt_bboxes.size(0)] = 1 bbox_weights = (reg_mask > 0).float() if reg_mask.bool().any(): pos_gt_bboxes_ct = pos_gt_bboxes.clone().detach() roi_center = pos_bboxes[..., 0:3] roi_ry = pos_bboxes[..., 6] % (2 * np.pi) # canonical transformation pos_gt_bboxes_ct[..., 0:3] -= roi_center pos_gt_bboxes_ct[..., 6] -= roi_ry pos_gt_bboxes_ct[..., 0:3] = rotation_3d_in_axis( pos_gt_bboxes_ct[..., 0:3].unsqueeze(1), -(roi_ry), axis=2).squeeze(1) # flip orientation if gt have opposite orientation ry_label = pos_gt_bboxes_ct[..., 6] % (2 * np.pi) # 0 ~ 2pi is_opposite = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5) ry_label[is_opposite] = (ry_label[is_opposite] + np.pi) % ( 2 * np.pi) # (0 ~ pi/2, 3pi/2 ~ 2pi) flag = ry_label > np.pi ry_label[flag] = ry_label[flag] - np.pi * 2 # (-pi/2, pi/2) ry_label = torch.clamp(ry_label, min=-np.pi / 2, max=np.pi / 2) pos_gt_bboxes_ct[..., 6] = ry_label rois_anchor = pos_bboxes.clone().detach() rois_anchor[:, 0:3] = 0 rois_anchor[:, 6] = 0 bbox_targets = self.bbox_coder.encode(rois_anchor, pos_gt_bboxes_ct) else: # no fg bbox bbox_targets = pos_gt_bboxes.new_empty((0, 7)) return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, bbox_weights)
[docs] def get_bboxes(self, rois, cls_score, bbox_pred, class_labels, img_metas, cfg=None): """Generate bboxes from bbox head predictions. Args: rois (torch.Tensor): RoI bounding boxes. cls_score (torch.Tensor): Scores of bounding boxes. bbox_pred (torch.Tensor): Bounding boxes predictions class_labels (torch.Tensor): Label of classes img_metas (list[dict]): Point cloud and image's meta info. cfg (:obj:`ConfigDict`, optional): Testing config. Defaults to None. Returns: list[tuple]: Decoded bbox, scores and labels after nms. """ roi_batch_id = rois[..., 0] roi_boxes = rois[..., 1:] # boxes without batch id batch_size = int(roi_batch_id.max().item() + 1) # decode boxes roi_ry = roi_boxes[..., 6].view(-1) roi_xyz = roi_boxes[..., 0:3].view(-1, 3) local_roi_boxes = roi_boxes.clone().detach() local_roi_boxes[..., 0:3] = 0 rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred) rcnn_boxes3d[..., 0:3] = rotation_3d_in_axis( rcnn_boxes3d[..., 0:3].unsqueeze(1), roi_ry, axis=2).squeeze(1) rcnn_boxes3d[:, 0:3] += roi_xyz # post processing result_list = [] for batch_id in range(batch_size): cur_class_labels = class_labels[batch_id] cur_cls_score = cls_score[roi_batch_id == batch_id].view(-1) cur_box_prob = cur_cls_score.unsqueeze(1) cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id] keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d, cfg.score_thr, cfg.nms_thr, img_metas[batch_id], cfg.use_rotate_nms) selected_bboxes = cur_rcnn_boxes3d[keep] selected_label_preds = cur_class_labels[keep] selected_scores = cur_cls_score[keep] result_list.append( (img_metas[batch_id]['box_type_3d'](selected_bboxes, self.bbox_coder.code_size), selected_scores, selected_label_preds)) return result_list
[docs] def multi_class_nms(self, box_probs, box_preds, score_thr, nms_thr, input_meta, use_rotate_nms=True): """Multi-class NMS for box head. Note: This function has large overlap with the `box3d_multiclass_nms` implemented in `mmdet3d.core.post_processing`. We are considering merging these two functions in the future. Args: box_probs (torch.Tensor): Predicted boxes probabilities in shape (N,). box_preds (torch.Tensor): Predicted boxes in shape (N, 7+C). score_thr (float): Threshold of scores. nms_thr (float): Threshold for NMS. input_meta (dict): Meta information of the current sample. use_rotate_nms (bool, optional): Whether to use rotated nms. Defaults to True. Returns: torch.Tensor: Selected indices. """ if use_rotate_nms: nms_func = nms_bev else: nms_func = nms_normal_bev assert box_probs.shape[ 1] == self.num_classes, f'box_probs shape: {str(box_probs.shape)}' selected_list = [] selected_labels = [] boxes_for_nms = xywhr2xyxyr(input_meta['box_type_3d']( box_preds, self.bbox_coder.code_size).bev) score_thresh = score_thr if isinstance( score_thr, list) else [score_thr for x in range(self.num_classes)] nms_thresh = nms_thr if isinstance( nms_thr, list) else [nms_thr for x in range(self.num_classes)] for k in range(0, self.num_classes): class_scores_keep = box_probs[:, k] >= score_thresh[k] if class_scores_keep.int().sum() > 0: original_idxs = class_scores_keep.nonzero( as_tuple=False).view(-1) cur_boxes_for_nms = boxes_for_nms[class_scores_keep] cur_rank_scores = box_probs[class_scores_keep, k] cur_selected = nms_func(cur_boxes_for_nms, cur_rank_scores, nms_thresh[k]) if cur_selected.shape[0] == 0: continue selected_list.append(original_idxs[cur_selected]) selected_labels.append( torch.full([cur_selected.shape[0]], k + 1, dtype=torch.int64, device=box_preds.device)) keep = torch.cat( selected_list, dim=0) if len(selected_list) > 0 else [] return keep
Read the Docs v: dev
Versions
latest
stable
v1.0.0rc1
v1.0.0rc0
v0.18.1
v0.18.0
v0.17.3
v0.17.2
v0.17.1
v0.17.0
v0.16.0
v0.15.0
v0.14.0
v0.13.0
v0.12.0
v0.11.0
v0.10.0
v0.9.0
dev
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.