dino-5scale_swin-l_8xb2-12e_coco.py 1.1 KB

12345678910111213141516171819202122232425262728293031
  1. _base_ = './dino-4scale_r50_8xb2-12e_coco.py'
  2. fp16 = dict(loss_scale=512.)
  3. pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa
  4. num_levels = 5
  5. model = dict(
  6. num_feature_levels=num_levels,
  7. backbone=dict(
  8. _delete_=True,
  9. type='SwinTransformer',
  10. pretrain_img_size=384,
  11. embed_dims=192,
  12. depths=[2, 2, 18, 2],
  13. num_heads=[6, 12, 24, 48],
  14. window_size=12,
  15. mlp_ratio=4,
  16. qkv_bias=True,
  17. qk_scale=None,
  18. drop_rate=0.,
  19. attn_drop_rate=0.,
  20. drop_path_rate=0.2,
  21. patch_norm=True,
  22. out_indices=(0, 1, 2, 3),
  23. # Please only add indices that would be used
  24. # in FPN, otherwise some parameter will not be used
  25. with_cp=True,
  26. convert_weights=True,
  27. init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
  28. neck=dict(in_channels=[192, 384, 768, 1536], num_outs=num_levels),
  29. encoder=dict(layer_cfg=dict(self_attn_cfg=dict(num_levels=num_levels))),
  30. decoder=dict(layer_cfg=dict(cross_attn_cfg=dict(num_levels=num_levels))))