123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246 |
- # Copyright (c) OpenMMLab. All rights reserved.
- import torch
- import torch.nn as nn
- from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule
- from mmengine.model import BaseModule
- from torch import Tensor
- from mmdet.utils import ConfigType, OptConfigType, OptMultiConfig
- from .se_layer import ChannelAttention
- class DarknetBottleneck(BaseModule):
- """The basic bottleneck block used in Darknet.
- Each ResBlock consists of two ConvModules and the input is added to the
- final output. Each ConvModule is composed of Conv, BN, and LeakyReLU.
- The first convLayer has filter size of 1x1 and the second one has the
- filter size of 3x3.
- Args:
- in_channels (int): The input channels of this Module.
- out_channels (int): The output channels of this Module.
- expansion (float): The kernel size of the convolution.
- Defaults to 0.5.
- add_identity (bool): Whether to add identity to the out.
- Defaults to True.
- use_depthwise (bool): Whether to use depthwise separable convolution.
- Defaults to False.
- conv_cfg (dict): Config dict for convolution layer. Defaults to None,
- which means using conv2d.
- norm_cfg (dict): Config dict for normalization layer.
- Defaults to dict(type='BN').
- act_cfg (dict): Config dict for activation layer.
- Defaults to dict(type='Swish').
- """
- def __init__(self,
- in_channels: int,
- out_channels: int,
- expansion: float = 0.5,
- add_identity: bool = True,
- use_depthwise: bool = False,
- conv_cfg: OptConfigType = None,
- norm_cfg: ConfigType = dict(
- type='BN', momentum=0.03, eps=0.001),
- act_cfg: ConfigType = dict(type='Swish'),
- init_cfg: OptMultiConfig = None) -> None:
- super().__init__(init_cfg=init_cfg)
- hidden_channels = int(out_channels * expansion)
- conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule
- self.conv1 = ConvModule(
- in_channels,
- hidden_channels,
- 1,
- conv_cfg=conv_cfg,
- norm_cfg=norm_cfg,
- act_cfg=act_cfg)
- self.conv2 = conv(
- hidden_channels,
- out_channels,
- 3,
- stride=1,
- padding=1,
- conv_cfg=conv_cfg,
- norm_cfg=norm_cfg,
- act_cfg=act_cfg)
- self.add_identity = \
- add_identity and in_channels == out_channels
- def forward(self, x: Tensor) -> Tensor:
- """Forward function."""
- identity = x
- out = self.conv1(x)
- out = self.conv2(out)
- if self.add_identity:
- return out + identity
- else:
- return out
- class CSPNeXtBlock(BaseModule):
- """The basic bottleneck block used in CSPNeXt.
- Args:
- in_channels (int): The input channels of this Module.
- out_channels (int): The output channels of this Module.
- expansion (float): Expand ratio of the hidden channel. Defaults to 0.5.
- add_identity (bool): Whether to add identity to the out. Only works
- when in_channels == out_channels. Defaults to True.
- use_depthwise (bool): Whether to use depthwise separable convolution.
- Defaults to False.
- kernel_size (int): The kernel size of the second convolution layer.
- Defaults to 5.
- conv_cfg (dict): Config dict for convolution layer. Defaults to None,
- which means using conv2d.
- norm_cfg (dict): Config dict for normalization layer.
- Defaults to dict(type='BN', momentum=0.03, eps=0.001).
- act_cfg (dict): Config dict for activation layer.
- Defaults to dict(type='SiLU').
- init_cfg (:obj:`ConfigDict` or dict or list[dict] or
- list[:obj:`ConfigDict`], optional): Initialization config dict.
- Defaults to None.
- """
- def __init__(self,
- in_channels: int,
- out_channels: int,
- expansion: float = 0.5,
- add_identity: bool = True,
- use_depthwise: bool = False,
- kernel_size: int = 5,
- conv_cfg: OptConfigType = None,
- norm_cfg: ConfigType = dict(
- type='BN', momentum=0.03, eps=0.001),
- act_cfg: ConfigType = dict(type='SiLU'),
- init_cfg: OptMultiConfig = None) -> None:
- super().__init__(init_cfg=init_cfg)
- hidden_channels = int(out_channels * expansion)
- conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule
- self.conv1 = conv(
- in_channels,
- hidden_channels,
- 3,
- stride=1,
- padding=1,
- norm_cfg=norm_cfg,
- act_cfg=act_cfg)
- self.conv2 = DepthwiseSeparableConvModule(
- hidden_channels,
- out_channels,
- kernel_size,
- stride=1,
- padding=kernel_size // 2,
- conv_cfg=conv_cfg,
- norm_cfg=norm_cfg,
- act_cfg=act_cfg)
- self.add_identity = \
- add_identity and in_channels == out_channels
- def forward(self, x: Tensor) -> Tensor:
- """Forward function."""
- identity = x
- out = self.conv1(x)
- out = self.conv2(out)
- if self.add_identity:
- return out + identity
- else:
- return out
- class CSPLayer(BaseModule):
- """Cross Stage Partial Layer.
- Args:
- in_channels (int): The input channels of the CSP layer.
- out_channels (int): The output channels of the CSP layer.
- expand_ratio (float): Ratio to adjust the number of channels of the
- hidden layer. Defaults to 0.5.
- num_blocks (int): Number of blocks. Defaults to 1.
- add_identity (bool): Whether to add identity in blocks.
- Defaults to True.
- use_cspnext_block (bool): Whether to use CSPNeXt block.
- Defaults to False.
- use_depthwise (bool): Whether to use depthwise separable convolution in
- blocks. Defaults to False.
- channel_attention (bool): Whether to add channel attention in each
- stage. Defaults to True.
- conv_cfg (dict, optional): Config dict for convolution layer.
- Defaults to None, which means using conv2d.
- norm_cfg (dict): Config dict for normalization layer.
- Defaults to dict(type='BN')
- act_cfg (dict): Config dict for activation layer.
- Defaults to dict(type='Swish')
- init_cfg (:obj:`ConfigDict` or dict or list[dict] or
- list[:obj:`ConfigDict`], optional): Initialization config dict.
- Defaults to None.
- """
- def __init__(self,
- in_channels: int,
- out_channels: int,
- expand_ratio: float = 0.5,
- num_blocks: int = 1,
- add_identity: bool = True,
- use_depthwise: bool = False,
- use_cspnext_block: bool = False,
- channel_attention: bool = False,
- conv_cfg: OptConfigType = None,
- norm_cfg: ConfigType = dict(
- type='BN', momentum=0.03, eps=0.001),
- act_cfg: ConfigType = dict(type='Swish'),
- init_cfg: OptMultiConfig = None) -> None:
- super().__init__(init_cfg=init_cfg)
- block = CSPNeXtBlock if use_cspnext_block else DarknetBottleneck
- mid_channels = int(out_channels * expand_ratio)
- self.channel_attention = channel_attention
- self.main_conv = ConvModule(
- in_channels,
- mid_channels,
- 1,
- conv_cfg=conv_cfg,
- norm_cfg=norm_cfg,
- act_cfg=act_cfg)
- self.short_conv = ConvModule(
- in_channels,
- mid_channels,
- 1,
- conv_cfg=conv_cfg,
- norm_cfg=norm_cfg,
- act_cfg=act_cfg)
- self.final_conv = ConvModule(
- 2 * mid_channels,
- out_channels,
- 1,
- conv_cfg=conv_cfg,
- norm_cfg=norm_cfg,
- act_cfg=act_cfg)
- self.blocks = nn.Sequential(*[
- block(
- mid_channels,
- mid_channels,
- 1.0,
- add_identity,
- use_depthwise,
- conv_cfg=conv_cfg,
- norm_cfg=norm_cfg,
- act_cfg=act_cfg) for _ in range(num_blocks)
- ])
- if channel_attention:
- self.attention = ChannelAttention(2 * mid_channels)
- def forward(self, x: Tensor) -> Tensor:
- """Forward function."""
- x_short = self.short_conv(x)
- x_main = self.main_conv(x)
- x_main = self.blocks(x_main)
- x_final = torch.cat((x_main, x_short), dim=1)
- if self.channel_attention:
- x_final = self.attention(x_final)
- return self.final_conv(x_final)
|