cspnext_pafpn.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. import math
  3. from typing import Sequence, Tuple
  4. import torch
  5. import torch.nn as nn
  6. from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule
  7. from mmengine.model import BaseModule
  8. from torch import Tensor
  9. from mmdet.registry import MODELS
  10. from mmdet.utils import ConfigType, OptMultiConfig
  11. from ..layers import CSPLayer
  12. @MODELS.register_module()
  13. class CSPNeXtPAFPN(BaseModule):
  14. """Path Aggregation Network with CSPNeXt blocks.
  15. Args:
  16. in_channels (Sequence[int]): Number of input channels per scale.
  17. out_channels (int): Number of output channels (used at each scale)
  18. num_csp_blocks (int): Number of bottlenecks in CSPLayer.
  19. Defaults to 3.
  20. use_depthwise (bool): Whether to use depthwise separable convolution in
  21. blocks. Defaults to False.
  22. expand_ratio (float): Ratio to adjust the number of channels of the
  23. hidden layer. Default: 0.5
  24. upsample_cfg (dict): Config dict for interpolate layer.
  25. Default: `dict(scale_factor=2, mode='nearest')`
  26. conv_cfg (dict, optional): Config dict for convolution layer.
  27. Default: None, which means using conv2d.
  28. norm_cfg (dict): Config dict for normalization layer.
  29. Default: dict(type='BN')
  30. act_cfg (dict): Config dict for activation layer.
  31. Default: dict(type='Swish')
  32. init_cfg (dict or list[dict], optional): Initialization config dict.
  33. Default: None.
  34. """
  35. def __init__(
  36. self,
  37. in_channels: Sequence[int],
  38. out_channels: int,
  39. num_csp_blocks: int = 3,
  40. use_depthwise: bool = False,
  41. expand_ratio: float = 0.5,
  42. upsample_cfg: ConfigType = dict(scale_factor=2, mode='nearest'),
  43. conv_cfg: bool = None,
  44. norm_cfg: ConfigType = dict(type='BN', momentum=0.03, eps=0.001),
  45. act_cfg: ConfigType = dict(type='Swish'),
  46. init_cfg: OptMultiConfig = dict(
  47. type='Kaiming',
  48. layer='Conv2d',
  49. a=math.sqrt(5),
  50. distribution='uniform',
  51. mode='fan_in',
  52. nonlinearity='leaky_relu')
  53. ) -> None:
  54. super().__init__(init_cfg)
  55. self.in_channels = in_channels
  56. self.out_channels = out_channels
  57. conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule
  58. # build top-down blocks
  59. self.upsample = nn.Upsample(**upsample_cfg)
  60. self.reduce_layers = nn.ModuleList()
  61. self.top_down_blocks = nn.ModuleList()
  62. for idx in range(len(in_channels) - 1, 0, -1):
  63. self.reduce_layers.append(
  64. ConvModule(
  65. in_channels[idx],
  66. in_channels[idx - 1],
  67. 1,
  68. conv_cfg=conv_cfg,
  69. norm_cfg=norm_cfg,
  70. act_cfg=act_cfg))
  71. self.top_down_blocks.append(
  72. CSPLayer(
  73. in_channels[idx - 1] * 2,
  74. in_channels[idx - 1],
  75. num_blocks=num_csp_blocks,
  76. add_identity=False,
  77. use_depthwise=use_depthwise,
  78. use_cspnext_block=True,
  79. expand_ratio=expand_ratio,
  80. conv_cfg=conv_cfg,
  81. norm_cfg=norm_cfg,
  82. act_cfg=act_cfg))
  83. # build bottom-up blocks
  84. self.downsamples = nn.ModuleList()
  85. self.bottom_up_blocks = nn.ModuleList()
  86. for idx in range(len(in_channels) - 1):
  87. self.downsamples.append(
  88. conv(
  89. in_channels[idx],
  90. in_channels[idx],
  91. 3,
  92. stride=2,
  93. padding=1,
  94. conv_cfg=conv_cfg,
  95. norm_cfg=norm_cfg,
  96. act_cfg=act_cfg))
  97. self.bottom_up_blocks.append(
  98. CSPLayer(
  99. in_channels[idx] * 2,
  100. in_channels[idx + 1],
  101. num_blocks=num_csp_blocks,
  102. add_identity=False,
  103. use_depthwise=use_depthwise,
  104. use_cspnext_block=True,
  105. expand_ratio=expand_ratio,
  106. conv_cfg=conv_cfg,
  107. norm_cfg=norm_cfg,
  108. act_cfg=act_cfg))
  109. self.out_convs = nn.ModuleList()
  110. for i in range(len(in_channels)):
  111. self.out_convs.append(
  112. conv(
  113. in_channels[i],
  114. out_channels,
  115. 3,
  116. padding=1,
  117. conv_cfg=conv_cfg,
  118. norm_cfg=norm_cfg,
  119. act_cfg=act_cfg))
  120. def forward(self, inputs: Tuple[Tensor, ...]) -> Tuple[Tensor, ...]:
  121. """
  122. Args:
  123. inputs (tuple[Tensor]): input features.
  124. Returns:
  125. tuple[Tensor]: YOLOXPAFPN features.
  126. """
  127. assert len(inputs) == len(self.in_channels)
  128. # top-down path
  129. inner_outs = [inputs[-1]]
  130. for idx in range(len(self.in_channels) - 1, 0, -1):
  131. feat_heigh = inner_outs[0]
  132. feat_low = inputs[idx - 1]
  133. feat_heigh = self.reduce_layers[len(self.in_channels) - 1 - idx](
  134. feat_heigh)
  135. inner_outs[0] = feat_heigh
  136. upsample_feat = self.upsample(feat_heigh)
  137. inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
  138. torch.cat([upsample_feat, feat_low], 1))
  139. inner_outs.insert(0, inner_out)
  140. # bottom-up path
  141. outs = [inner_outs[0]]
  142. for idx in range(len(self.in_channels) - 1):
  143. feat_low = outs[-1]
  144. feat_height = inner_outs[idx + 1]
  145. downsample_feat = self.downsamples[idx](feat_low)
  146. out = self.bottom_up_blocks[idx](
  147. torch.cat([downsample_feat, feat_height], 1))
  148. outs.append(out)
  149. # out convs
  150. for idx, conv in enumerate(self.out_convs):
  151. outs[idx] = conv(outs[idx])
  152. return tuple(outs)