123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213 |
- # Copyright (c) OpenMMLab. All rights reserved.
- # Copyright (c) 2019 Western Digital Corporation or its affiliates.
- import warnings
- import torch.nn as nn
- from mmcv.cnn import ConvModule
- from mmengine.model import BaseModule
- from torch.nn.modules.batchnorm import _BatchNorm
- from mmdet.registry import MODELS
- class ResBlock(BaseModule):
- """The basic residual block used in Darknet. Each ResBlock consists of two
- ConvModules and the input is added to the final output. Each ConvModule is
- composed of Conv, BN, and LeakyReLU. In YoloV3 paper, the first convLayer
- has half of the number of the filters as much as the second convLayer. The
- first convLayer has filter size of 1x1 and the second one has the filter
- size of 3x3.
- Args:
- in_channels (int): The input channels. Must be even.
- conv_cfg (dict): Config dict for convolution layer. Default: None.
- norm_cfg (dict): Dictionary to construct and config norm layer.
- Default: dict(type='BN', requires_grad=True)
- act_cfg (dict): Config dict for activation layer.
- Default: dict(type='LeakyReLU', negative_slope=0.1).
- init_cfg (dict or list[dict], optional): Initialization config dict.
- Default: None
- """
- def __init__(self,
- in_channels,
- conv_cfg=None,
- norm_cfg=dict(type='BN', requires_grad=True),
- act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
- init_cfg=None):
- super(ResBlock, self).__init__(init_cfg)
- assert in_channels % 2 == 0 # ensure the in_channels is even
- half_in_channels = in_channels // 2
- # shortcut
- cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
- self.conv1 = ConvModule(in_channels, half_in_channels, 1, **cfg)
- self.conv2 = ConvModule(
- half_in_channels, in_channels, 3, padding=1, **cfg)
- def forward(self, x):
- residual = x
- out = self.conv1(x)
- out = self.conv2(out)
- out = out + residual
- return out
- @MODELS.register_module()
- class Darknet(BaseModule):
- """Darknet backbone.
- Args:
- depth (int): Depth of Darknet. Currently only support 53.
- out_indices (Sequence[int]): Output from which stages.
- frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
- -1 means not freezing any parameters. Default: -1.
- conv_cfg (dict): Config dict for convolution layer. Default: None.
- norm_cfg (dict): Dictionary to construct and config norm layer.
- Default: dict(type='BN', requires_grad=True)
- act_cfg (dict): Config dict for activation layer.
- Default: dict(type='LeakyReLU', negative_slope=0.1).
- norm_eval (bool): Whether to set norm layers to eval mode, namely,
- freeze running stats (mean and var). Note: Effect on Batch Norm
- and its variants only.
- pretrained (str, optional): model pretrained path. Default: None
- init_cfg (dict or list[dict], optional): Initialization config dict.
- Default: None
- Example:
- >>> from mmdet.models import Darknet
- >>> import torch
- >>> self = Darknet(depth=53)
- >>> self.eval()
- >>> inputs = torch.rand(1, 3, 416, 416)
- >>> level_outputs = self.forward(inputs)
- >>> for level_out in level_outputs:
- ... print(tuple(level_out.shape))
- ...
- (1, 256, 52, 52)
- (1, 512, 26, 26)
- (1, 1024, 13, 13)
- """
- # Dict(depth: (layers, channels))
- arch_settings = {
- 53: ((1, 2, 8, 8, 4), ((32, 64), (64, 128), (128, 256), (256, 512),
- (512, 1024)))
- }
- def __init__(self,
- depth=53,
- out_indices=(3, 4, 5),
- frozen_stages=-1,
- conv_cfg=None,
- norm_cfg=dict(type='BN', requires_grad=True),
- act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
- norm_eval=True,
- pretrained=None,
- init_cfg=None):
- super(Darknet, self).__init__(init_cfg)
- if depth not in self.arch_settings:
- raise KeyError(f'invalid depth {depth} for darknet')
- self.depth = depth
- self.out_indices = out_indices
- self.frozen_stages = frozen_stages
- self.layers, self.channels = self.arch_settings[depth]
- cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
- self.conv1 = ConvModule(3, 32, 3, padding=1, **cfg)
- self.cr_blocks = ['conv1']
- for i, n_layers in enumerate(self.layers):
- layer_name = f'conv_res_block{i + 1}'
- in_c, out_c = self.channels[i]
- self.add_module(
- layer_name,
- self.make_conv_res_block(in_c, out_c, n_layers, **cfg))
- self.cr_blocks.append(layer_name)
- self.norm_eval = norm_eval
- assert not (init_cfg and pretrained), \
- 'init_cfg and pretrained cannot be specified at the same time'
- if isinstance(pretrained, str):
- warnings.warn('DeprecationWarning: pretrained is deprecated, '
- 'please use "init_cfg" instead')
- self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
- elif pretrained is None:
- if init_cfg is None:
- self.init_cfg = [
- dict(type='Kaiming', layer='Conv2d'),
- dict(
- type='Constant',
- val=1,
- layer=['_BatchNorm', 'GroupNorm'])
- ]
- else:
- raise TypeError('pretrained must be a str or None')
- def forward(self, x):
- outs = []
- for i, layer_name in enumerate(self.cr_blocks):
- cr_block = getattr(self, layer_name)
- x = cr_block(x)
- if i in self.out_indices:
- outs.append(x)
- return tuple(outs)
- def _freeze_stages(self):
- if self.frozen_stages >= 0:
- for i in range(self.frozen_stages):
- m = getattr(self, self.cr_blocks[i])
- m.eval()
- for param in m.parameters():
- param.requires_grad = False
- def train(self, mode=True):
- super(Darknet, self).train(mode)
- self._freeze_stages()
- if mode and self.norm_eval:
- for m in self.modules():
- if isinstance(m, _BatchNorm):
- m.eval()
- @staticmethod
- def make_conv_res_block(in_channels,
- out_channels,
- res_repeat,
- conv_cfg=None,
- norm_cfg=dict(type='BN', requires_grad=True),
- act_cfg=dict(type='LeakyReLU',
- negative_slope=0.1)):
- """In Darknet backbone, ConvLayer is usually followed by ResBlock. This
- function will make that. The Conv layers always have 3x3 filters with
- stride=2. The number of the filters in Conv layer is the same as the
- out channels of the ResBlock.
- Args:
- in_channels (int): The number of input channels.
- out_channels (int): The number of output channels.
- res_repeat (int): The number of ResBlocks.
- conv_cfg (dict): Config dict for convolution layer. Default: None.
- norm_cfg (dict): Dictionary to construct and config norm layer.
- Default: dict(type='BN', requires_grad=True)
- act_cfg (dict): Config dict for activation layer.
- Default: dict(type='LeakyReLU', negative_slope=0.1).
- """
- cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
- model = nn.Sequential()
- model.add_module(
- 'conv',
- ConvModule(
- in_channels, out_channels, 3, stride=2, padding=1, **cfg))
- for idx in range(res_repeat):
- model.add_module('res{}'.format(idx),
- ResBlock(out_channels, **cfg))
- return model
|