efficientdet_effb3_bifpn_8xb16-crop896-300e_coco.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. _base_ = [
  2. 'mmdet::_base_/datasets/coco_detection.py',
  3. 'mmdet::_base_/schedules/schedule_1x.py',
  4. 'mmdet::_base_/default_runtime.py'
  5. ]
  6. custom_imports = dict(
  7. imports=['projects.EfficientDet.efficientdet'], allow_failed_imports=False)
  8. image_size = 896
  9. batch_augments = [
  10. dict(type='BatchFixedSizePad', size=(image_size, image_size))
  11. ]
  12. dataset_type = 'CocoDataset'
  13. evalute_type = 'CocoMetric'
  14. norm_cfg = dict(type='SyncBN', requires_grad=True, eps=1e-3, momentum=0.01)
  15. checkpoint = 'https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32-aa-advprop_in1k_20220119-53b41118.pth' # noqa
  16. model = dict(
  17. type='EfficientDet',
  18. data_preprocessor=dict(
  19. type='DetDataPreprocessor',
  20. mean=[123.675, 116.28, 103.53],
  21. std=[58.395, 57.12, 57.375],
  22. bgr_to_rgb=True,
  23. pad_size_divisor=image_size,
  24. batch_augments=batch_augments),
  25. backbone=dict(
  26. type='EfficientNet',
  27. arch='b3',
  28. drop_path_rate=0.3,
  29. out_indices=(3, 4, 5),
  30. frozen_stages=0,
  31. conv_cfg=dict(type='Conv2dSamePadding'),
  32. norm_cfg=norm_cfg,
  33. norm_eval=False,
  34. init_cfg=dict(
  35. type='Pretrained', prefix='backbone', checkpoint=checkpoint)),
  36. neck=dict(
  37. type='BiFPN',
  38. num_stages=6,
  39. in_channels=[48, 136, 384],
  40. out_channels=160,
  41. start_level=0,
  42. norm_cfg=norm_cfg),
  43. bbox_head=dict(
  44. type='EfficientDetSepBNHead',
  45. num_classes=80,
  46. num_ins=5,
  47. in_channels=160,
  48. feat_channels=160,
  49. stacked_convs=4,
  50. norm_cfg=norm_cfg,
  51. anchor_generator=dict(
  52. type='AnchorGenerator',
  53. octave_base_scale=4,
  54. scales_per_octave=3,
  55. ratios=[1.0, 0.5, 2.0],
  56. strides=[8, 16, 32, 64, 128],
  57. center_offset=0.5),
  58. bbox_coder=dict(
  59. type='DeltaXYWHBBoxCoder',
  60. target_means=[.0, .0, .0, .0],
  61. target_stds=[1.0, 1.0, 1.0, 1.0]),
  62. loss_cls=dict(
  63. type='FocalLoss',
  64. use_sigmoid=True,
  65. gamma=1.5,
  66. alpha=0.25,
  67. loss_weight=1.0),
  68. loss_bbox=dict(type='HuberLoss', beta=0.1, loss_weight=50)),
  69. # training and testing settings
  70. train_cfg=dict(
  71. assigner=dict(
  72. type='MaxIoUAssigner',
  73. pos_iou_thr=0.5,
  74. neg_iou_thr=0.5,
  75. min_pos_iou=0,
  76. ignore_iof_thr=-1),
  77. sampler=dict(
  78. type='PseudoSampler'), # Focal loss should use PseudoSampler
  79. allowed_border=-1,
  80. pos_weight=-1,
  81. debug=False),
  82. test_cfg=dict(
  83. nms_pre=1000,
  84. min_bbox_size=0,
  85. score_thr=0.05,
  86. nms=dict(
  87. type='soft_nms',
  88. iou_threshold=0.3,
  89. sigma=0.5,
  90. min_score=1e-3,
  91. method='gaussian'),
  92. max_per_img=100))
  93. # dataset settings
  94. train_pipeline = [
  95. dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
  96. dict(type='LoadAnnotations', with_bbox=True),
  97. dict(
  98. type='RandomResize',
  99. scale=(image_size, image_size),
  100. ratio_range=(0.1, 2.0),
  101. keep_ratio=True),
  102. dict(type='RandomCrop', crop_size=(image_size, image_size)),
  103. dict(type='RandomFlip', prob=0.5),
  104. dict(type='PackDetInputs')
  105. ]
  106. test_pipeline = [
  107. dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
  108. dict(type='Resize', scale=(image_size, image_size), keep_ratio=True),
  109. dict(type='LoadAnnotations', with_bbox=True),
  110. dict(
  111. type='PackDetInputs',
  112. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
  113. 'scale_factor'))
  114. ]
  115. train_dataloader = dict(
  116. batch_size=16,
  117. num_workers=8,
  118. dataset=dict(type=dataset_type, pipeline=train_pipeline))
  119. val_dataloader = dict(dataset=dict(type=dataset_type, pipeline=test_pipeline))
  120. test_dataloader = val_dataloader
  121. val_evaluator = dict(type=evalute_type)
  122. test_evaluator = val_evaluator
  123. optim_wrapper = dict(
  124. optimizer=dict(lr=0.16, weight_decay=4e-5),
  125. paramwise_cfg=dict(
  126. norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True),
  127. clip_grad=dict(max_norm=10, norm_type=2))
  128. # learning policy
  129. max_epochs = 300
  130. param_scheduler = [
  131. dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=917),
  132. dict(
  133. type='CosineAnnealingLR',
  134. eta_min=0.0,
  135. begin=1,
  136. T_max=299,
  137. end=300,
  138. by_epoch=True,
  139. convert_to_iter_based=True)
  140. ]
  141. train_cfg = dict(max_epochs=max_epochs, val_interval=1)
  142. vis_backends = [
  143. dict(type='LocalVisBackend'),
  144. dict(type='TensorboardVisBackend')
  145. ]
  146. visualizer = dict(
  147. type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
  148. default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=15))
  149. custom_hooks = [
  150. dict(
  151. type='EMAHook',
  152. ema_type='ExpMomentumEMA',
  153. momentum=0.0002,
  154. update_buffers=True,
  155. priority=49)
  156. ]
  157. # cudnn_benchmark=True can accelerate fix-size training
  158. env_cfg = dict(cudnn_benchmark=True)
  159. # NOTE: `auto_scale_lr` is for automatically scaling LR,
  160. # USER SHOULD NOT CHANGE ITS VALUES.
  161. # base_batch_size = (8 GPUs) x (16 samples per GPU)
  162. auto_scale_lr = dict(base_batch_size=128)