Shortcuts

Source code for mmdet3d.models.detectors.imvoxelnet

# Copyright (c) OpenMMLab. All rights reserved.
import torch

from mmdet3d.core import bbox3d2result, build_prior_generator
from mmdet3d.core.bbox.structures.utils import get_proj_mat_by_coord_type
from mmdet3d.models.fusion_layers.point_fusion import point_sample
from mmdet.models.detectors import BaseDetector
from ..builder import DETECTORS, build_backbone, build_head, build_neck


[docs]@DETECTORS.register_module() class ImVoxelNet(BaseDetector): r"""`ImVoxelNet <https://arxiv.org/abs/2106.01178>`_. Args: backbone (dict): Config of the backbone. neck (dict): Config of the 2d neck. neck_3d (dict): Config of the 3d neck. bbox_head (dict): Config of the head. prior_generator (dict): Config of the prior generator. n_voxels (tuple[int]): Number of voxels for x, y, and z axis. coord_type (str): The type of coordinates of points cloud: 'DEPTH', 'LIDAR', or 'CAMERA'. train_cfg (dict, optional): Config for train stage. Defaults to None. test_cfg (dict, optional): Config for test stage. Defaults to None. init_cfg (dict, optional): Config for weight initialization. Defaults to None. pretrained (str, optional): Deprecated initialization parameter. Defaults to None. """ def __init__(self, backbone, neck, neck_3d, bbox_head, prior_generator, n_voxels, coord_type, train_cfg=None, test_cfg=None, init_cfg=None, pretrained=None): super().__init__(init_cfg=init_cfg) self.backbone = build_backbone(backbone) self.neck = build_neck(neck) self.neck_3d = build_neck(neck_3d) bbox_head.update(train_cfg=train_cfg) bbox_head.update(test_cfg=test_cfg) self.bbox_head = build_head(bbox_head) self.n_voxels = n_voxels self.coord_type = coord_type self.prior_generator = build_prior_generator(prior_generator) self.train_cfg = train_cfg self.test_cfg = test_cfg
[docs] def extract_feat(self, img, img_metas): """Extract 3d features from the backbone -> fpn -> 3d projection. -> 3d neck -> bbox_head. Args: img (torch.Tensor): Input images of shape (N, C_in, H, W). img_metas (list): Image metas. Returns: Tuple: - torch.Tensor: Features of shape (N, C_out, N_x, N_y, N_z). - torch.Tensor: Valid mask of shape (N, 1, N_x, N_y, N_z). """ x = self.backbone(img) x = self.neck(x)[0] points = self.prior_generator.grid_anchors([self.n_voxels[::-1]], device=img.device)[0][:, :3] volumes, valid_preds = [], [] for feature, img_meta in zip(x, img_metas): img_scale_factor = ( points.new_tensor(img_meta['scale_factor'][:2]) if 'scale_factor' in img_meta.keys() else 1) img_flip = img_meta['flip'] if 'flip' in img_meta.keys() else False img_crop_offset = ( points.new_tensor(img_meta['img_crop_offset']) if 'img_crop_offset' in img_meta.keys() else 0) proj_mat = points.new_tensor( get_proj_mat_by_coord_type(img_meta, self.coord_type)) volume = point_sample( img_meta, img_features=feature[None, ...], points=points, proj_mat=points.new_tensor(proj_mat), coord_type=self.coord_type, img_scale_factor=img_scale_factor, img_crop_offset=img_crop_offset, img_flip=img_flip, img_pad_shape=img.shape[-2:], img_shape=img_meta['img_shape'][:2], aligned=False) volumes.append( volume.reshape(self.n_voxels[::-1] + [-1]).permute(3, 2, 1, 0)) valid_preds.append( ~torch.all(volumes[-1] == 0, dim=0, keepdim=True)) x = torch.stack(volumes) x = self.neck_3d(x) x = self.bbox_head(x) return x, torch.stack(valid_preds).float()
[docs] def forward_train(self, img, img_metas, gt_bboxes_3d, gt_labels_3d, **kwargs): """Forward of training. Args: img (torch.Tensor): Input images of shape (N, C_in, H, W). img_metas (list): Image metas. gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch. gt_labels_3d (list[torch.Tensor]): gt class labels of each batch. Returns: dict[str, torch.Tensor]: A dictionary of loss components. """ x, valid_preds = self.extract_feat(img, img_metas) # For indoor datasets ImVoxelNet uses ImVoxelHead that handles # mask of visible voxels. if self.coord_type == 'DEPTH': x += (valid_preds, ) losses = self.bbox_head.loss(*x, gt_bboxes_3d, gt_labels_3d, img_metas) return losses
[docs] def forward_test(self, img, img_metas, **kwargs): """Forward of testing. Args: img (torch.Tensor): Input images of shape (N, C_in, H, W). img_metas (list): Image metas. Returns: list[dict]: Predicted 3d boxes. """ # not supporting aug_test for now return self.simple_test(img, img_metas)
[docs] def simple_test(self, img, img_metas): """Test without augmentations. Args: img (torch.Tensor): Input images of shape (N, C_in, H, W). img_metas (list): Image metas. Returns: list[dict]: Predicted 3d boxes. """ x, valid_preds = self.extract_feat(img, img_metas) # For indoor datasets ImVoxelNet uses ImVoxelHead that handles # mask of visible voxels. if self.coord_type == 'DEPTH': x += (valid_preds, ) bbox_list = self.bbox_head.get_bboxes(*x, img_metas) bbox_results = [ bbox3d2result(det_bboxes, det_scores, det_labels) for det_bboxes, det_scores, det_labels in bbox_list ] return bbox_results
[docs] def aug_test(self, imgs, img_metas, **kwargs): """Test with augmentations. Args: imgs (list[torch.Tensor]): Input images of shape (N, C_in, H, W). img_metas (list): Image metas. Returns: list[dict]: Predicted 3d boxes. """ raise NotImplementedError
Read the Docs v: dev
Versions
latest
stable
v1.0.0rc1
v1.0.0rc0
v0.18.1
v0.18.0
v0.17.3
v0.17.2
v0.17.1
v0.17.0
v0.16.0
v0.15.0
v0.14.0
v0.13.0
v0.12.0
v0.11.0
v0.10.0
v0.9.0
dev
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.