SoftwareGroup_BinocularSonar
/
mmdetection


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
							# Copyright (c) OpenMMLab. All rights reserved.
import unittest

import pytest
import torch
from mmengine.config import ConfigDict

from mmdet.models.layers import DropBlock
from mmdet.registry import MODELS
from mmdet.utils import register_all_modules

register_all_modules()


def test_dropblock():
    feat = torch.rand(1, 1, 11, 11)
    drop_prob = 1.0
    dropblock = DropBlock(drop_prob, block_size=11, warmup_iters=0)
    out_feat = dropblock(feat)
    assert (out_feat == 0).all() and out_feat.shape == feat.shape
    drop_prob = 0.5
    dropblock = DropBlock(drop_prob, block_size=5, warmup_iters=0)
    out_feat = dropblock(feat)
    assert out_feat.shape == feat.shape

    # drop_prob must be (0,1]
    with pytest.raises(AssertionError):
        DropBlock(1.5, 3)

    # block_size cannot be an even number
    with pytest.raises(AssertionError):
        DropBlock(0.5, 2)

    # warmup_iters cannot be less than 0
    with pytest.raises(AssertionError):
        DropBlock(0.5, 3, -1)


class TestPixelDecoder(unittest.TestCase):

    def test_forward(self):
        base_channels = 64
        pixel_decoder_cfg = ConfigDict(
            dict(
                type='PixelDecoder',
                in_channels=[base_channels * 2**i for i in range(4)],
                feat_channels=base_channels,
                out_channels=base_channels,
                norm_cfg=dict(type='GN', num_groups=32),
                act_cfg=dict(type='ReLU')))
        self = MODELS.build(pixel_decoder_cfg)
        self.init_weights()
        img_metas = [{}, {}]
        feats = [
            torch.rand(
                (2, base_channels * 2**i, 4 * 2**(3 - i), 5 * 2**(3 - i)))
            for i in range(4)
        ]
        mask_feature, memory = self(feats, img_metas)

        assert (memory == feats[-1]).all()
        assert mask_feature.shape == feats[0].shape


class TestTransformerEncoderPixelDecoder(unittest.TestCase):

    def test_forward(self):
        base_channels = 64
        pixel_decoder_cfg = ConfigDict(
            dict(
                type='TransformerEncoderPixelDecoder',
                in_channels=[base_channels * 2**i for i in range(4)],
                feat_channels=base_channels,
                out_channels=base_channels,
                norm_cfg=dict(type='GN', num_groups=32),
                act_cfg=dict(type='ReLU'),
                encoder=dict(  # DetrTransformerEncoder
                    num_layers=6,
                    layer_cfg=dict(  # DetrTransformerEncoderLayer
                        self_attn_cfg=dict(  # MultiheadAttention
                            embed_dims=base_channels,
                            num_heads=8,
                            attn_drop=0.1,
                            proj_drop=0.1,
                            dropout_layer=None,
                            batch_first=True),
                        ffn_cfg=dict(
                            embed_dims=base_channels,
                            feedforward_channels=base_channels * 8,
                            num_fcs=2,
                            act_cfg=dict(type='ReLU', inplace=True),
                            ffn_drop=0.1,
                            dropout_layer=None,
                            add_identity=True),
                        norm_cfg=dict(type='LN'),
                        init_cfg=None),
                    init_cfg=None),
                positional_encoding=dict(
                    num_feats=base_channels // 2, normalize=True)))
        self = MODELS.build(pixel_decoder_cfg)
        self.init_weights()
        img_metas = [{
            'batch_input_shape': (128, 160),
            'img_shape': (120, 160),
        }, {
            'batch_input_shape': (128, 160),
            'img_shape': (125, 160),
        }]
        feats = [
            torch.rand(
                (2, base_channels * 2**i, 4 * 2**(3 - i), 5 * 2**(3 - i)))
            for i in range(4)
        ]
        mask_feature, memory = self(feats, img_metas)

        assert memory.shape[-2:] == feats[-1].shape[-2:]
        assert mask_feature.shape == feats[0].shape


class TestMSDeformAttnPixelDecoder(unittest.TestCase):

    def test_forward(self):
        base_channels = 64
        pixel_decoder_cfg = ConfigDict(
            dict(
                type='MSDeformAttnPixelDecoder',
                in_channels=[base_channels * 2**i for i in range(4)],
                strides=[4, 8, 16, 32],
                feat_channels=base_channels,
                out_channels=base_channels,
                num_outs=3,
                norm_cfg=dict(type='GN', num_groups=32),
                act_cfg=dict(type='ReLU'),
                encoder=dict(  # DeformableDetrTransformerEncoder
                    num_layers=6,
                    layer_cfg=dict(  # DeformableDetrTransformerEncoderLayer
                        self_attn_cfg=dict(  # MultiScaleDeformableAttention
                            embed_dims=base_channels,
                            num_heads=8,
                            num_levels=3,
                            num_points=4,
                            im2col_step=64,
                            dropout=0.0,
                            batch_first=True,
                            norm_cfg=None,
                            init_cfg=None),
                        ffn_cfg=dict(
                            embed_dims=base_channels,
                            feedforward_channels=base_channels * 4,
                            num_fcs=2,
                            ffn_drop=0.0,
                            act_cfg=dict(type='ReLU', inplace=True))),
                    init_cfg=None),
                positional_encoding=dict(
                    num_feats=base_channels // 2, normalize=True),
                init_cfg=None))
        self = MODELS.build(pixel_decoder_cfg)
        self.init_weights()
        feats = [
            torch.rand(
                (2, base_channels * 2**i, 4 * 2**(3 - i), 5 * 2**(3 - i)))
            for i in range(4)
        ]
        mask_feature, multi_scale_features = self(feats)

        assert mask_feature.shape == feats[0].shape
        assert len(multi_scale_features) == 3
        multi_scale_features = multi_scale_features[::-1]
        for i in range(3):
            assert multi_scale_features[i].shape[-2:] == feats[i +
                                                               1].shape[-2:]