123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200 |
- # Copyright (c) OpenMMLab. All rights reserved.
- from unittest import TestCase
- import numpy as np
- import torch
- from mmengine.config import ConfigDict
- from mmengine.structures import InstanceData
- from mmdet.models.dense_heads import CondInstBboxHead, CondInstMaskHead
- from mmdet.structures.mask import BitmapMasks
- def _rand_masks(num_items, bboxes, img_w, img_h):
- rng = np.random.RandomState(0)
- masks = np.zeros((num_items, img_h, img_w), dtype=np.float32)
- for i, bbox in enumerate(bboxes):
- bbox = bbox.astype(np.int32)
- mask = (rng.rand(1, bbox[3] - bbox[1], bbox[2] - bbox[0]) >
- 0.3).astype(np.int64)
- masks[i:i + 1, bbox[1]:bbox[3], bbox[0]:bbox[2]] = mask
- return BitmapMasks(masks, height=img_h, width=img_w)
- def _fake_mask_feature_head():
- mask_feature_head = ConfigDict(
- in_channels=1,
- feat_channels=1,
- start_level=0,
- end_level=2,
- out_channels=8,
- mask_stride=8,
- num_stacked_convs=4,
- norm_cfg=dict(type='BN', requires_grad=True))
- return mask_feature_head
- class TestCondInstHead(TestCase):
- def test_condinst_bboxhead_loss(self):
- """Tests condinst bboxhead loss when truth is empty and non-empty."""
- s = 256
- img_metas = [{
- 'img_shape': (s, s, 3),
- 'pad_shape': (s, s, 3),
- 'scale_factor': 1,
- }]
- condinst_bboxhead = CondInstBboxHead(
- num_classes=4,
- in_channels=1,
- feat_channels=1,
- stacked_convs=1,
- norm_cfg=None)
- # Fcos head expects a multiple levels of features per image
- feats = (
- torch.rand(1, 1, s // stride[1], s // stride[0])
- for stride in condinst_bboxhead.prior_generator.strides)
- cls_scores, bbox_preds, centernesses, param_preds =\
- condinst_bboxhead.forward(feats)
- # Test that empty ground truth encourages the network to
- # predict background
- gt_instances = InstanceData()
- gt_instances.bboxes = torch.empty((0, 4))
- gt_instances.labels = torch.LongTensor([])
- gt_instances.masks = _rand_masks(0, gt_instances.bboxes.numpy(), s, s)
- empty_gt_losses = condinst_bboxhead.loss_by_feat(
- cls_scores, bbox_preds, centernesses, param_preds, [gt_instances],
- img_metas)
- # When there is no truth, the cls loss should be nonzero but
- # box loss and centerness loss should be zero
- empty_cls_loss = empty_gt_losses['loss_cls'].item()
- empty_box_loss = empty_gt_losses['loss_bbox'].item()
- empty_ctr_loss = empty_gt_losses['loss_centerness'].item()
- self.assertGreater(empty_cls_loss, 0, 'cls loss should be non-zero')
- self.assertEqual(
- empty_box_loss, 0,
- 'there should be no box loss when there are no true boxes')
- self.assertEqual(
- empty_ctr_loss, 0,
- 'there should be no centerness loss when there are no true boxes')
- # When truth is non-empty then all cls, box loss and centerness loss
- # should be nonzero for random inputs
- gt_instances = InstanceData()
- gt_instances.bboxes = torch.Tensor(
- [[23.6667, 23.8757, 238.6326, 151.8874]])
- gt_instances.labels = torch.LongTensor([2])
- gt_instances.masks = _rand_masks(1, gt_instances.bboxes.numpy(), s, s)
- one_gt_losses = condinst_bboxhead.loss_by_feat(cls_scores, bbox_preds,
- centernesses,
- param_preds,
- [gt_instances],
- img_metas)
- onegt_cls_loss = one_gt_losses['loss_cls'].item()
- onegt_box_loss = one_gt_losses['loss_bbox'].item()
- onegt_ctr_loss = one_gt_losses['loss_centerness'].item()
- self.assertGreater(onegt_cls_loss, 0, 'cls loss should be non-zero')
- self.assertGreater(onegt_box_loss, 0, 'box loss should be non-zero')
- self.assertGreater(onegt_ctr_loss, 0,
- 'centerness loss should be non-zero')
- # Test the `center_sampling` works fine.
- condinst_bboxhead.center_sampling = True
- ctrsamp_losses = condinst_bboxhead.loss_by_feat(
- cls_scores, bbox_preds, centernesses, param_preds, [gt_instances],
- img_metas)
- ctrsamp_cls_loss = ctrsamp_losses['loss_cls'].item()
- ctrsamp_box_loss = ctrsamp_losses['loss_bbox'].item()
- ctrsamp_ctr_loss = ctrsamp_losses['loss_centerness'].item()
- self.assertGreater(ctrsamp_cls_loss, 0, 'cls loss should be non-zero')
- self.assertGreater(ctrsamp_box_loss, 0, 'box loss should be non-zero')
- self.assertGreater(ctrsamp_ctr_loss, 0,
- 'centerness loss should be non-zero')
- # Test the `norm_on_bbox` works fine.
- condinst_bboxhead.norm_on_bbox = True
- normbox_losses = condinst_bboxhead.loss_by_feat(
- cls_scores, bbox_preds, centernesses, param_preds, [gt_instances],
- img_metas)
- normbox_cls_loss = normbox_losses['loss_cls'].item()
- normbox_box_loss = normbox_losses['loss_bbox'].item()
- normbox_ctr_loss = normbox_losses['loss_centerness'].item()
- self.assertGreater(normbox_cls_loss, 0, 'cls loss should be non-zero')
- self.assertGreater(normbox_box_loss, 0, 'box loss should be non-zero')
- self.assertGreater(normbox_ctr_loss, 0,
- 'centerness loss should be non-zero')
- def test_condinst_maskhead_loss(self):
- """Tests condinst maskhead loss when truth is empty and non-empty."""
- s = 256
- img_metas = [{
- 'img_shape': (s, s, 3),
- 'pad_shape': (s, s, 3),
- 'scale_factor': 1,
- }]
- condinst_bboxhead = CondInstBboxHead(
- num_classes=4,
- in_channels=1,
- feat_channels=1,
- stacked_convs=1,
- norm_cfg=None)
- mask_feature_head = _fake_mask_feature_head()
- condinst_maskhead = CondInstMaskHead(
- mask_feature_head=mask_feature_head,
- loss_mask=dict(
- type='DiceLoss',
- use_sigmoid=True,
- activate=True,
- eps=5e-6,
- loss_weight=1.0))
- # Fcos head expects a multiple levels of features per image
- feats = []
- for i in range(len(condinst_bboxhead.strides)):
- feats.append(
- torch.rand(1, 1, s // (2**(i + 3)), s // (2**(i + 3))))
- feats = tuple(feats)
- cls_scores, bbox_preds, centernesses, param_preds =\
- condinst_bboxhead.forward(feats)
- # Test that empty ground truth encourages the network to
- # predict background
- gt_instances = InstanceData()
- gt_instances.bboxes = torch.empty((0, 4))
- gt_instances.labels = torch.LongTensor([])
- gt_instances.masks = _rand_masks(0, gt_instances.bboxes.numpy(), s, s)
- _ = condinst_bboxhead.loss_by_feat(cls_scores, bbox_preds,
- centernesses, param_preds,
- [gt_instances], img_metas)
- # When truth is empty then all mask loss
- # should be zero for random inputs
- positive_infos = condinst_bboxhead.get_positive_infos()
- mask_outs = condinst_maskhead.forward(feats, positive_infos)
- empty_gt_mask_losses = condinst_maskhead.loss_by_feat(
- *mask_outs, [gt_instances], img_metas, positive_infos)
- loss_mask = empty_gt_mask_losses['loss_mask']
- self.assertEqual(loss_mask, 0, 'mask loss should be zero')
- # When truth is non-empty then all cls, box loss and centerness loss
- # should be nonzero for random inputs
- gt_instances = InstanceData()
- gt_instances.bboxes = torch.Tensor(
- [[23.6667, 23.8757, 238.6326, 151.8874]])
- gt_instances.labels = torch.LongTensor([2])
- gt_instances.masks = _rand_masks(1, gt_instances.bboxes.numpy(), s, s)
- _ = condinst_bboxhead.loss_by_feat(cls_scores, bbox_preds,
- centernesses, param_preds,
- [gt_instances], img_metas)
- positive_infos = condinst_bboxhead.get_positive_infos()
- mask_outs = condinst_maskhead.forward(feats, positive_infos)
- one_gt_mask_losses = condinst_maskhead.loss_by_feat(
- *mask_outs, [gt_instances], img_metas, positive_infos)
- loss_mask = one_gt_mask_losses['loss_mask']
- self.assertGreater(loss_mask, 0, 'mask loss should be nonzero')
|