123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- # Copyright (c) OpenMMLab. All rights reserved.
- from typing import Tuple
- import torch.nn as nn
- from mmcv.cnn import ConvModule
- from mmcv.ops import MaskedConv2d
- from torch import Tensor
- from mmdet.registry import MODELS
- from mmdet.utils import OptConfigType, OptMultiConfig
- from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead
- @MODELS.register_module()
- class GARetinaHead(GuidedAnchorHead):
- """Guided-Anchor-based RetinaNet head."""
- def __init__(self,
- num_classes: int,
- in_channels: int,
- stacked_convs: int = 4,
- conv_cfg: OptConfigType = None,
- norm_cfg: OptConfigType = None,
- init_cfg: OptMultiConfig = None,
- **kwargs) -> None:
- if init_cfg is None:
- init_cfg = dict(
- type='Normal',
- layer='Conv2d',
- std=0.01,
- override=[
- dict(
- type='Normal',
- name='conv_loc',
- std=0.01,
- bias_prob=0.01),
- dict(
- type='Normal',
- name='retina_cls',
- std=0.01,
- bias_prob=0.01)
- ])
- self.stacked_convs = stacked_convs
- self.conv_cfg = conv_cfg
- self.norm_cfg = norm_cfg
- super().__init__(
- num_classes=num_classes,
- in_channels=in_channels,
- init_cfg=init_cfg,
- **kwargs)
- def _init_layers(self) -> None:
- """Initialize layers of the head."""
- self.relu = nn.ReLU(inplace=True)
- self.cls_convs = nn.ModuleList()
- self.reg_convs = nn.ModuleList()
- for i in range(self.stacked_convs):
- chn = self.in_channels if i == 0 else self.feat_channels
- self.cls_convs.append(
- ConvModule(
- chn,
- self.feat_channels,
- 3,
- stride=1,
- padding=1,
- conv_cfg=self.conv_cfg,
- norm_cfg=self.norm_cfg))
- self.reg_convs.append(
- ConvModule(
- chn,
- self.feat_channels,
- 3,
- stride=1,
- padding=1,
- conv_cfg=self.conv_cfg,
- norm_cfg=self.norm_cfg))
- self.conv_loc = nn.Conv2d(self.feat_channels, 1, 1)
- num_anchors = self.square_anchor_generator.num_base_priors[0]
- self.conv_shape = nn.Conv2d(self.feat_channels, num_anchors * 2, 1)
- self.feature_adaption_cls = FeatureAdaption(
- self.feat_channels,
- self.feat_channels,
- kernel_size=3,
- deform_groups=self.deform_groups)
- self.feature_adaption_reg = FeatureAdaption(
- self.feat_channels,
- self.feat_channels,
- kernel_size=3,
- deform_groups=self.deform_groups)
- self.retina_cls = MaskedConv2d(
- self.feat_channels,
- self.num_base_priors * self.cls_out_channels,
- 3,
- padding=1)
- self.retina_reg = MaskedConv2d(
- self.feat_channels, self.num_base_priors * 4, 3, padding=1)
- def forward_single(self, x: Tensor) -> Tuple[Tensor]:
- """Forward feature map of a single scale level."""
- cls_feat = x
- reg_feat = x
- for cls_conv in self.cls_convs:
- cls_feat = cls_conv(cls_feat)
- for reg_conv in self.reg_convs:
- reg_feat = reg_conv(reg_feat)
- loc_pred = self.conv_loc(cls_feat)
- shape_pred = self.conv_shape(reg_feat)
- cls_feat = self.feature_adaption_cls(cls_feat, shape_pred)
- reg_feat = self.feature_adaption_reg(reg_feat, shape_pred)
- if not self.training:
- mask = loc_pred.sigmoid()[0] >= self.loc_filter_thr
- else:
- mask = None
- cls_score = self.retina_cls(cls_feat, mask)
- bbox_pred = self.retina_reg(reg_feat, mask)
- return cls_score, bbox_pred, shape_pred, loc_pred
|