yolov3_mobilenetv2_8xb24-ms-416-300e_coco.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. _base_ = ['../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py']
  2. # model settings
  3. data_preprocessor = dict(
  4. type='DetDataPreprocessor',
  5. mean=[123.675, 116.28, 103.53],
  6. std=[58.395, 57.12, 57.375],
  7. bgr_to_rgb=True,
  8. pad_size_divisor=32)
  9. model = dict(
  10. type='YOLOV3',
  11. data_preprocessor=data_preprocessor,
  12. backbone=dict(
  13. type='MobileNetV2',
  14. out_indices=(2, 4, 6),
  15. act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
  16. init_cfg=dict(
  17. type='Pretrained', checkpoint='open-mmlab://mmdet/mobilenet_v2')),
  18. neck=dict(
  19. type='YOLOV3Neck',
  20. num_scales=3,
  21. in_channels=[320, 96, 32],
  22. out_channels=[96, 96, 96]),
  23. bbox_head=dict(
  24. type='YOLOV3Head',
  25. num_classes=80,
  26. in_channels=[96, 96, 96],
  27. out_channels=[96, 96, 96],
  28. anchor_generator=dict(
  29. type='YOLOAnchorGenerator',
  30. base_sizes=[[(116, 90), (156, 198), (373, 326)],
  31. [(30, 61), (62, 45), (59, 119)],
  32. [(10, 13), (16, 30), (33, 23)]],
  33. strides=[32, 16, 8]),
  34. bbox_coder=dict(type='YOLOBBoxCoder'),
  35. featmap_strides=[32, 16, 8],
  36. loss_cls=dict(
  37. type='CrossEntropyLoss',
  38. use_sigmoid=True,
  39. loss_weight=1.0,
  40. reduction='sum'),
  41. loss_conf=dict(
  42. type='CrossEntropyLoss',
  43. use_sigmoid=True,
  44. loss_weight=1.0,
  45. reduction='sum'),
  46. loss_xy=dict(
  47. type='CrossEntropyLoss',
  48. use_sigmoid=True,
  49. loss_weight=2.0,
  50. reduction='sum'),
  51. loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum')),
  52. # training and testing settings
  53. train_cfg=dict(
  54. assigner=dict(
  55. type='GridAssigner',
  56. pos_iou_thr=0.5,
  57. neg_iou_thr=0.5,
  58. min_pos_iou=0)),
  59. test_cfg=dict(
  60. nms_pre=1000,
  61. min_bbox_size=0,
  62. score_thr=0.05,
  63. conf_thr=0.005,
  64. nms=dict(type='nms', iou_threshold=0.45),
  65. max_per_img=100))
  66. # dataset settings
  67. dataset_type = 'CocoDataset'
  68. data_root = 'data/coco/'
  69. # Example to use different file client
  70. # Method 1: simply set the data root and let the file I/O module
  71. # automatically infer from prefix (not support LMDB and Memcache yet)
  72. # data_root = 's3://openmmlab/datasets/detection/coco/'
  73. # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
  74. # backend_args = dict(
  75. # backend='petrel',
  76. # path_mapping=dict({
  77. # './data/': 's3://openmmlab/datasets/detection/',
  78. # 'data/': 's3://openmmlab/datasets/detection/'
  79. # }))
  80. backend_args = None
  81. train_pipeline = [
  82. dict(type='LoadImageFromFile', backend_args=backend_args),
  83. dict(type='LoadAnnotations', with_bbox=True),
  84. dict(
  85. type='Expand',
  86. mean=data_preprocessor['mean'],
  87. to_rgb=data_preprocessor['bgr_to_rgb'],
  88. ratio_range=(1, 2)),
  89. dict(
  90. type='MinIoURandomCrop',
  91. min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
  92. min_crop_size=0.3),
  93. dict(type='RandomResize', scale=[(320, 320), (416, 416)], keep_ratio=True),
  94. dict(type='RandomFlip', prob=0.5),
  95. dict(type='PhotoMetricDistortion'),
  96. dict(type='PackDetInputs')
  97. ]
  98. test_pipeline = [
  99. dict(type='LoadImageFromFile', backend_args=backend_args),
  100. dict(type='Resize', scale=(416, 416), keep_ratio=True),
  101. dict(type='LoadAnnotations', with_bbox=True),
  102. dict(
  103. type='PackDetInputs',
  104. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
  105. 'scale_factor'))
  106. ]
  107. train_dataloader = dict(
  108. batch_size=24,
  109. num_workers=4,
  110. persistent_workers=True,
  111. sampler=dict(type='DefaultSampler', shuffle=True),
  112. batch_sampler=dict(type='AspectRatioBatchSampler'),
  113. dataset=dict(
  114. type='RepeatDataset', # use RepeatDataset to speed up training
  115. times=10,
  116. dataset=dict(
  117. type=dataset_type,
  118. data_root=data_root,
  119. ann_file='annotations/instances_train2017.json',
  120. data_prefix=dict(img='train2017/'),
  121. filter_cfg=dict(filter_empty_gt=True, min_size=32),
  122. pipeline=train_pipeline,
  123. backend_args=backend_args)))
  124. val_dataloader = dict(
  125. batch_size=24,
  126. num_workers=4,
  127. persistent_workers=True,
  128. drop_last=False,
  129. sampler=dict(type='DefaultSampler', shuffle=False),
  130. dataset=dict(
  131. type=dataset_type,
  132. data_root=data_root,
  133. ann_file='annotations/instances_val2017.json',
  134. data_prefix=dict(img='val2017/'),
  135. test_mode=True,
  136. pipeline=test_pipeline,
  137. backend_args=backend_args))
  138. test_dataloader = val_dataloader
  139. val_evaluator = dict(
  140. type='CocoMetric',
  141. ann_file=data_root + 'annotations/instances_val2017.json',
  142. metric='bbox',
  143. backend_args=backend_args)
  144. test_evaluator = val_evaluator
  145. train_cfg = dict(max_epochs=30)
  146. # optimizer
  147. optim_wrapper = dict(
  148. type='OptimWrapper',
  149. optimizer=dict(type='SGD', lr=0.003, momentum=0.9, weight_decay=0.0005),
  150. clip_grad=dict(max_norm=35, norm_type=2))
  151. # learning policy
  152. param_scheduler = [
  153. dict(
  154. type='LinearLR',
  155. start_factor=0.0001,
  156. by_epoch=False,
  157. begin=0,
  158. end=4000),
  159. dict(type='MultiStepLR', by_epoch=True, milestones=[24, 28], gamma=0.1)
  160. ]
  161. find_unused_parameters = True
  162. # NOTE: `auto_scale_lr` is for automatically scaling LR,
  163. # USER SHOULD NOT CHANGE ITS VALUES.
  164. # base_batch_size = (8 GPUs) x (24 samples per GPU)
  165. auto_scale_lr = dict(base_batch_size=192)