centernet_r18-dcnv2_8xb16-crop512-140e_coco.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. _base_ = [
  2. '../_base_/datasets/coco_detection.py',
  3. '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py',
  4. './centernet_tta.py'
  5. ]
  6. dataset_type = 'CocoDataset'
  7. data_root = 'data/coco/'
  8. # model settings
  9. model = dict(
  10. type='CenterNet',
  11. data_preprocessor=dict(
  12. type='DetDataPreprocessor',
  13. mean=[123.675, 116.28, 103.53],
  14. std=[58.395, 57.12, 57.375],
  15. bgr_to_rgb=True),
  16. backbone=dict(
  17. type='ResNet',
  18. depth=18,
  19. norm_eval=False,
  20. norm_cfg=dict(type='BN'),
  21. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')),
  22. neck=dict(
  23. type='CTResNetNeck',
  24. in_channels=512,
  25. num_deconv_filters=(256, 128, 64),
  26. num_deconv_kernels=(4, 4, 4),
  27. use_dcn=True),
  28. bbox_head=dict(
  29. type='CenterNetHead',
  30. num_classes=80,
  31. in_channels=64,
  32. feat_channels=64,
  33. loss_center_heatmap=dict(type='GaussianFocalLoss', loss_weight=1.0),
  34. loss_wh=dict(type='L1Loss', loss_weight=0.1),
  35. loss_offset=dict(type='L1Loss', loss_weight=1.0)),
  36. train_cfg=None,
  37. test_cfg=dict(topk=100, local_maximum_kernel=3, max_per_img=100))
  38. train_pipeline = [
  39. dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
  40. dict(type='LoadAnnotations', with_bbox=True),
  41. dict(
  42. type='PhotoMetricDistortion',
  43. brightness_delta=32,
  44. contrast_range=(0.5, 1.5),
  45. saturation_range=(0.5, 1.5),
  46. hue_delta=18),
  47. dict(
  48. type='RandomCenterCropPad',
  49. # The cropped images are padded into squares during training,
  50. # but may be less than crop_size.
  51. crop_size=(512, 512),
  52. ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),
  53. mean=[0, 0, 0],
  54. std=[1, 1, 1],
  55. to_rgb=True,
  56. test_pad_mode=None),
  57. # Make sure the output is always crop_size.
  58. dict(type='Resize', scale=(512, 512), keep_ratio=True),
  59. dict(type='RandomFlip', prob=0.5),
  60. dict(type='PackDetInputs')
  61. ]
  62. test_pipeline = [
  63. dict(
  64. type='LoadImageFromFile',
  65. backend_args={{_base_.backend_args}},
  66. to_float32=True),
  67. # don't need Resize
  68. dict(
  69. type='RandomCenterCropPad',
  70. ratios=None,
  71. border=None,
  72. mean=[0, 0, 0],
  73. std=[1, 1, 1],
  74. to_rgb=True,
  75. test_mode=True,
  76. test_pad_mode=['logical_or', 31],
  77. test_pad_add_pix=1),
  78. dict(type='LoadAnnotations', with_bbox=True),
  79. dict(
  80. type='PackDetInputs',
  81. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'border'))
  82. ]
  83. # Use RepeatDataset to speed up training
  84. train_dataloader = dict(
  85. batch_size=16,
  86. num_workers=4,
  87. persistent_workers=True,
  88. sampler=dict(type='DefaultSampler', shuffle=True),
  89. dataset=dict(
  90. _delete_=True,
  91. type='RepeatDataset',
  92. times=5,
  93. dataset=dict(
  94. type=dataset_type,
  95. data_root=data_root,
  96. ann_file='annotations/instances_train2017.json',
  97. data_prefix=dict(img='train2017/'),
  98. filter_cfg=dict(filter_empty_gt=True, min_size=32),
  99. pipeline=train_pipeline,
  100. backend_args={{_base_.backend_args}},
  101. )))
  102. val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
  103. test_dataloader = val_dataloader
  104. # optimizer
  105. # Based on the default settings of modern detectors, the SGD effect is better
  106. # than the Adam in the source code, so we use SGD default settings and
  107. # if you use adam+lr5e-4, the map is 29.1.
  108. optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
  109. max_epochs = 28
  110. # learning policy
  111. # Based on the default settings of modern detectors, we added warmup settings.
  112. param_scheduler = [
  113. dict(
  114. type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
  115. end=1000),
  116. dict(
  117. type='MultiStepLR',
  118. begin=0,
  119. end=max_epochs,
  120. by_epoch=True,
  121. milestones=[18, 24], # the real step is [18*5, 24*5]
  122. gamma=0.1)
  123. ]
  124. train_cfg = dict(max_epochs=max_epochs) # the real epoch is 28*5=140
  125. # NOTE: `auto_scale_lr` is for automatically scaling LR,
  126. # USER SHOULD NOT CHANGE ITS VALUES.
  127. # base_batch_size = (8 GPUs) x (16 samples per GPU)
  128. auto_scale_lr = dict(base_batch_size=128)