123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222 |
- # Copyright (c) OpenMMLab. All rights reserved.
- import copy
- from typing import List, Tuple
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- from mmcv.ops import nms
- from mmengine.structures import InstanceData
- from torch import Tensor
- from mmdet.registry import MODELS
- from mmdet.utils import ConfigType, InstanceList, MultiConfig, OptInstanceList
- from .guided_anchor_head import GuidedAnchorHead
- @MODELS.register_module()
- class GARPNHead(GuidedAnchorHead):
- """Guided-Anchor-based RPN head."""
- def __init__(self,
- in_channels: int,
- num_classes: int = 1,
- init_cfg: MultiConfig = dict(
- type='Normal',
- layer='Conv2d',
- std=0.01,
- override=dict(
- type='Normal',
- name='conv_loc',
- std=0.01,
- bias_prob=0.01)),
- **kwargs) -> None:
- super().__init__(
- num_classes=num_classes,
- in_channels=in_channels,
- init_cfg=init_cfg,
- **kwargs)
- def _init_layers(self) -> None:
- """Initialize layers of the head."""
- self.rpn_conv = nn.Conv2d(
- self.in_channels, self.feat_channels, 3, padding=1)
- super(GARPNHead, self)._init_layers()
- def forward_single(self, x: Tensor) -> Tuple[Tensor]:
- """Forward feature of a single scale level."""
- x = self.rpn_conv(x)
- x = F.relu(x, inplace=True)
- (cls_score, bbox_pred, shape_pred,
- loc_pred) = super().forward_single(x)
- return cls_score, bbox_pred, shape_pred, loc_pred
- def loss_by_feat(
- self,
- cls_scores: List[Tensor],
- bbox_preds: List[Tensor],
- shape_preds: List[Tensor],
- loc_preds: List[Tensor],
- batch_gt_instances: InstanceList,
- batch_img_metas: List[dict],
- batch_gt_instances_ignore: OptInstanceList = None) -> dict:
- """Calculate the loss based on the features extracted by the detection
- head.
- Args:
- cls_scores (list[Tensor]): Box scores for each scale level
- has shape (N, num_anchors * num_classes, H, W).
- bbox_preds (list[Tensor]): Box energies / deltas for each scale
- level with shape (N, num_anchors * 4, H, W).
- shape_preds (list[Tensor]): shape predictions for each scale
- level with shape (N, 1, H, W).
- loc_preds (list[Tensor]): location predictions for each scale
- level with shape (N, num_anchors * 2, H, W).
- batch_gt_instances (list[:obj:`InstanceData`]): Batch of
- gt_instance. It usually includes ``bboxes`` and ``labels``
- attributes.
- batch_img_metas (list[dict]): Meta information of each image, e.g.,
- image size, scaling factor, etc.
- batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
- Batch of gt_instances_ignore. It includes ``bboxes`` attribute
- data that is ignored during training and testing.
- Defaults to None.
- Returns:
- dict: A dictionary of loss components.
- """
- losses = super().loss_by_feat(
- cls_scores,
- bbox_preds,
- shape_preds,
- loc_preds,
- batch_gt_instances,
- batch_img_metas,
- batch_gt_instances_ignore=batch_gt_instances_ignore)
- return dict(
- loss_rpn_cls=losses['loss_cls'],
- loss_rpn_bbox=losses['loss_bbox'],
- loss_anchor_shape=losses['loss_shape'],
- loss_anchor_loc=losses['loss_loc'])
- def _predict_by_feat_single(self,
- cls_scores: List[Tensor],
- bbox_preds: List[Tensor],
- mlvl_anchors: List[Tensor],
- mlvl_masks: List[Tensor],
- img_meta: dict,
- cfg: ConfigType,
- rescale: bool = False) -> InstanceData:
- """Transform a single image's features extracted from the head into
- bbox results.
- Args:
- cls_scores (list[Tensor]): Box scores from all scale
- levels of a single image, each item has shape
- (num_priors * num_classes, H, W).
- bbox_preds (list[Tensor]): Box energies / deltas from
- all scale levels of a single image, each item has shape
- (num_priors * 4, H, W).
- mlvl_anchors (list[Tensor]): Each element in the list is
- the anchors of a single level in feature pyramid. it has
- shape (num_priors, 4).
- mlvl_masks (list[Tensor]): Each element in the list is location
- masks of a single level.
- img_meta (dict): Image meta info.
- cfg (:obj:`ConfigDict` or dict): Test / postprocessing
- configuration, if None, test_cfg would be used.
- rescale (bool): If True, return boxes in original image space.
- Defaults to False.
- Returns:
- :obj:`InstanceData`: Detection results of each image
- after the post process.
- Each item usually contains following keys.
- - scores (Tensor): Classification scores, has a shape
- (num_instance, )
- - labels (Tensor): Labels of bboxes, has a shape (num_instances, ).
- - bboxes (Tensor): Has a shape (num_instances, 4), the last
- dimension 4 arrange as (x1, y1, x2, y2).
- """
- cfg = self.test_cfg if cfg is None else cfg
- cfg = copy.deepcopy(cfg)
- assert cfg.nms.get('type', 'nms') == 'nms', 'GARPNHead only support ' \
- 'naive nms.'
- mlvl_proposals = []
- for idx in range(len(cls_scores)):
- rpn_cls_score = cls_scores[idx]
- rpn_bbox_pred = bbox_preds[idx]
- anchors = mlvl_anchors[idx]
- mask = mlvl_masks[idx]
- assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
- # if no location is kept, end.
- if mask.sum() == 0:
- continue
- rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
- if self.use_sigmoid_cls:
- rpn_cls_score = rpn_cls_score.reshape(-1)
- scores = rpn_cls_score.sigmoid()
- else:
- rpn_cls_score = rpn_cls_score.reshape(-1, 2)
- # remind that we set FG labels to [0, num_class-1]
- # since mmdet v2.0
- # BG cat_id: num_class
- scores = rpn_cls_score.softmax(dim=1)[:, :-1]
- # filter scores, bbox_pred w.r.t. mask.
- # anchors are filtered in get_anchors() beforehand.
- scores = scores[mask]
- rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1,
- 4)[mask, :]
- if scores.dim() == 0:
- rpn_bbox_pred = rpn_bbox_pred.unsqueeze(0)
- anchors = anchors.unsqueeze(0)
- scores = scores.unsqueeze(0)
- # filter anchors, bbox_pred, scores w.r.t. scores
- if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
- _, topk_inds = scores.topk(cfg.nms_pre)
- rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
- anchors = anchors[topk_inds, :]
- scores = scores[topk_inds]
- # get proposals w.r.t. anchors and rpn_bbox_pred
- proposals = self.bbox_coder.decode(
- anchors, rpn_bbox_pred, max_shape=img_meta['img_shape'])
- # filter out too small bboxes
- if cfg.min_bbox_size >= 0:
- w = proposals[:, 2] - proposals[:, 0]
- h = proposals[:, 3] - proposals[:, 1]
- valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size)
- if not valid_mask.all():
- proposals = proposals[valid_mask]
- scores = scores[valid_mask]
- # NMS in current level
- proposals, _ = nms(proposals, scores, cfg.nms.iou_threshold)
- proposals = proposals[:cfg.nms_post, :]
- mlvl_proposals.append(proposals)
- proposals = torch.cat(mlvl_proposals, 0)
- if cfg.get('nms_across_levels', False):
- # NMS across multi levels
- proposals, _ = nms(proposals[:, :4], proposals[:, -1],
- cfg.nms.iou_threshold)
- proposals = proposals[:cfg.max_per_img, :]
- else:
- scores = proposals[:, 4]
- num = min(cfg.max_per_img, proposals.shape[0])
- _, topk_inds = scores.topk(num)
- proposals = proposals[topk_inds, :]
- bboxes = proposals[:, :-1]
- scores = proposals[:, -1]
- if rescale:
- assert img_meta.get('scale_factor') is not None
- bboxes /= bboxes.new_tensor(img_meta['scale_factor']).repeat(
- (1, 2))
- results = InstanceData()
- results.bboxes = bboxes
- results.scores = scores
- results.labels = scores.new_zeros(scores.size(0), dtype=torch.long)
- return results
|