123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- # Copyright (c) OpenMMLab. All rights reserved.
- from unittest import TestCase
- import torch
- from mmpose.models.backbones.pvt import (PVTEncoderLayer,
- PyramidVisionTransformer,
- PyramidVisionTransformerV2)
- class TestPVT(TestCase):
- def test_pvt_block(self):
- # test PVT structure and forward
- block = PVTEncoderLayer(
- embed_dims=64, num_heads=4, feedforward_channels=256)
- self.assertEqual(block.ffn.embed_dims, 64)
- self.assertEqual(block.attn.num_heads, 4)
- self.assertEqual(block.ffn.feedforward_channels, 256)
- x = torch.randn(1, 56 * 56, 64)
- x_out = block(x, (56, 56))
- self.assertEqual(x_out.shape, torch.Size([1, 56 * 56, 64]))
- def test_pvt(self):
- """Test PVT backbone."""
- # test pretrained image size
- with self.assertRaises(AssertionError):
- PyramidVisionTransformer(pretrain_img_size=(224, 224, 224))
- # test padding
- model = PyramidVisionTransformer(
- paddings=['corner', 'corner', 'corner', 'corner'])
- temp = torch.randn((1, 3, 32, 32))
- outs = model(temp)
- self.assertEqual(outs[0].shape, (1, 64, 8, 8))
- self.assertEqual(outs[1].shape, (1, 128, 4, 4))
- self.assertEqual(outs[2].shape, (1, 320, 2, 2))
- self.assertEqual(outs[3].shape, (1, 512, 1, 1))
- # Test absolute position embedding
- temp = torch.randn((1, 3, 224, 224))
- model = PyramidVisionTransformer(
- pretrain_img_size=224, use_abs_pos_embed=True)
- model.init_weights()
- model(temp)
- # Test normal inference
- temp = torch.randn((1, 3, 32, 32))
- model = PyramidVisionTransformer()
- outs = model(temp)
- self.assertEqual(outs[0].shape, (1, 64, 8, 8))
- self.assertEqual(outs[1].shape, (1, 128, 4, 4))
- self.assertEqual(outs[2].shape, (1, 320, 2, 2))
- self.assertEqual(outs[3].shape, (1, 512, 1, 1))
- # Test abnormal inference size
- temp = torch.randn((1, 3, 33, 33))
- model = PyramidVisionTransformer()
- outs = model(temp)
- self.assertEqual(outs[0].shape, (1, 64, 8, 8))
- self.assertEqual(outs[1].shape, (1, 128, 4, 4))
- self.assertEqual(outs[2].shape, (1, 320, 2, 2))
- self.assertEqual(outs[3].shape, (1, 512, 1, 1))
- # Test abnormal inference size
- temp = torch.randn((1, 3, 112, 137))
- model = PyramidVisionTransformer()
- outs = model(temp)
- self.assertEqual(outs[0].shape, (1, 64, 28, 34))
- self.assertEqual(outs[1].shape, (1, 128, 14, 17))
- self.assertEqual(outs[2].shape, (1, 320, 7, 8))
- self.assertEqual(outs[3].shape, (1, 512, 3, 4))
- def test_pvtv2(self):
- """Test PVTv2 backbone."""
- with self.assertRaises(TypeError):
- # Pretrained arg must be str or None.
- PyramidVisionTransformerV2(pretrained=123)
- # test pretrained image size
- with self.assertRaises(AssertionError):
- PyramidVisionTransformerV2(pretrain_img_size=(224, 224, 224))
- # test init_cfg with pretrained model
- model = PyramidVisionTransformerV2(
- embed_dims=32,
- num_layers=[2, 2, 2, 2],
- init_cfg=dict(
- type='Pretrained',
- checkpoint='https://github.com/whai362/PVT/'
- 'releases/download/v2/pvt_v2_b0.pth'))
- model.init_weights()
- # test init weights from scratch
- model = PyramidVisionTransformerV2(
- embed_dims=32, num_layers=[2, 2, 2, 2])
- model.init_weights()
- # Test normal inference
- temp = torch.randn((1, 3, 32, 32))
- model = PyramidVisionTransformerV2()
- outs = model(temp)
- self.assertEqual(outs[0].shape, (1, 64, 8, 8))
- self.assertEqual(outs[1].shape, (1, 128, 4, 4))
- self.assertEqual(outs[2].shape, (1, 320, 2, 2))
- self.assertEqual(outs[3].shape, (1, 512, 1, 1))
- # Test abnormal inference size
- temp = torch.randn((1, 3, 31, 31))
- model = PyramidVisionTransformerV2()
- outs = model(temp)
- self.assertEqual(outs[0].shape, (1, 64, 8, 8))
- self.assertEqual(outs[1].shape, (1, 128, 4, 4))
- self.assertEqual(outs[2].shape, (1, 320, 2, 2))
- self.assertEqual(outs[3].shape, (1, 512, 1, 1))
- # Test abnormal inference size
- temp = torch.randn((1, 3, 112, 137))
- model = PyramidVisionTransformerV2()
- outs = model(temp)
- self.assertEqual(outs[0].shape, (1, 64, 28, 35))
- self.assertEqual(outs[1].shape, (1, 128, 14, 18))
- self.assertEqual(outs[2].shape, (1, 320, 7, 9))
- self.assertEqual(outs[3].shape, (1, 512, 4, 5))
|