yolov3_d53_8xb8-ms-608-273e_coco.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. _base_ = ['../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py']
  2. # model settings
  3. data_preprocessor = dict(
  4. type='DetDataPreprocessor',
  5. mean=[0, 0, 0],
  6. std=[255., 255., 255.],
  7. bgr_to_rgb=True,
  8. pad_size_divisor=32)
  9. model = dict(
  10. type='YOLOV3',
  11. data_preprocessor=data_preprocessor,
  12. backbone=dict(
  13. type='Darknet',
  14. depth=53,
  15. out_indices=(3, 4, 5),
  16. init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://darknet53')),
  17. neck=dict(
  18. type='YOLOV3Neck',
  19. num_scales=3,
  20. in_channels=[1024, 512, 256],
  21. out_channels=[512, 256, 128]),
  22. bbox_head=dict(
  23. type='YOLOV3Head',
  24. num_classes=80,
  25. in_channels=[512, 256, 128],
  26. out_channels=[1024, 512, 256],
  27. anchor_generator=dict(
  28. type='YOLOAnchorGenerator',
  29. base_sizes=[[(116, 90), (156, 198), (373, 326)],
  30. [(30, 61), (62, 45), (59, 119)],
  31. [(10, 13), (16, 30), (33, 23)]],
  32. strides=[32, 16, 8]),
  33. bbox_coder=dict(type='YOLOBBoxCoder'),
  34. featmap_strides=[32, 16, 8],
  35. loss_cls=dict(
  36. type='CrossEntropyLoss',
  37. use_sigmoid=True,
  38. loss_weight=1.0,
  39. reduction='sum'),
  40. loss_conf=dict(
  41. type='CrossEntropyLoss',
  42. use_sigmoid=True,
  43. loss_weight=1.0,
  44. reduction='sum'),
  45. loss_xy=dict(
  46. type='CrossEntropyLoss',
  47. use_sigmoid=True,
  48. loss_weight=2.0,
  49. reduction='sum'),
  50. loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum')),
  51. # training and testing settings
  52. train_cfg=dict(
  53. assigner=dict(
  54. type='GridAssigner',
  55. pos_iou_thr=0.5,
  56. neg_iou_thr=0.5,
  57. min_pos_iou=0)),
  58. test_cfg=dict(
  59. nms_pre=1000,
  60. min_bbox_size=0,
  61. score_thr=0.05,
  62. conf_thr=0.005,
  63. nms=dict(type='nms', iou_threshold=0.45),
  64. max_per_img=100))
  65. # dataset settings
  66. dataset_type = 'CocoDataset'
  67. data_root = 'data/coco/'
  68. # Example to use different file client
  69. # Method 1: simply set the data root and let the file I/O module
  70. # automatically infer from prefix (not support LMDB and Memcache yet)
  71. # data_root = 's3://openmmlab/datasets/detection/coco/'
  72. # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
  73. # backend_args = dict(
  74. # backend='petrel',
  75. # path_mapping=dict({
  76. # './data/': 's3://openmmlab/datasets/detection/',
  77. # 'data/': 's3://openmmlab/datasets/detection/'
  78. # }))
  79. backend_args = None
  80. train_pipeline = [
  81. dict(type='LoadImageFromFile', backend_args=backend_args),
  82. dict(type='LoadAnnotations', with_bbox=True),
  83. dict(
  84. type='Expand',
  85. mean=data_preprocessor['mean'],
  86. to_rgb=data_preprocessor['bgr_to_rgb'],
  87. ratio_range=(1, 2)),
  88. dict(
  89. type='MinIoURandomCrop',
  90. min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
  91. min_crop_size=0.3),
  92. dict(type='RandomResize', scale=[(320, 320), (608, 608)], keep_ratio=True),
  93. dict(type='RandomFlip', prob=0.5),
  94. dict(type='PhotoMetricDistortion'),
  95. dict(type='PackDetInputs')
  96. ]
  97. test_pipeline = [
  98. dict(type='LoadImageFromFile', backend_args=backend_args),
  99. dict(type='Resize', scale=(608, 608), keep_ratio=True),
  100. dict(type='LoadAnnotations', with_bbox=True),
  101. dict(
  102. type='PackDetInputs',
  103. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
  104. 'scale_factor'))
  105. ]
  106. train_dataloader = dict(
  107. batch_size=8,
  108. num_workers=4,
  109. persistent_workers=True,
  110. sampler=dict(type='DefaultSampler', shuffle=True),
  111. batch_sampler=dict(type='AspectRatioBatchSampler'),
  112. dataset=dict(
  113. type=dataset_type,
  114. data_root=data_root,
  115. ann_file='annotations/instances_train2017.json',
  116. data_prefix=dict(img='train2017/'),
  117. filter_cfg=dict(filter_empty_gt=True, min_size=32),
  118. pipeline=train_pipeline,
  119. backend_args=backend_args))
  120. val_dataloader = dict(
  121. batch_size=1,
  122. num_workers=2,
  123. persistent_workers=True,
  124. drop_last=False,
  125. sampler=dict(type='DefaultSampler', shuffle=False),
  126. dataset=dict(
  127. type=dataset_type,
  128. data_root=data_root,
  129. ann_file='annotations/instances_val2017.json',
  130. data_prefix=dict(img='val2017/'),
  131. test_mode=True,
  132. pipeline=test_pipeline,
  133. backend_args=backend_args))
  134. test_dataloader = val_dataloader
  135. val_evaluator = dict(
  136. type='CocoMetric',
  137. ann_file=data_root + 'annotations/instances_val2017.json',
  138. metric='bbox',
  139. backend_args=backend_args)
  140. test_evaluator = val_evaluator
  141. train_cfg = dict(max_epochs=273, val_interval=7)
  142. # optimizer
  143. optim_wrapper = dict(
  144. type='OptimWrapper',
  145. optimizer=dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005),
  146. clip_grad=dict(max_norm=35, norm_type=2))
  147. # learning policy
  148. param_scheduler = [
  149. dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=2000),
  150. dict(type='MultiStepLR', by_epoch=True, milestones=[218, 246], gamma=0.1)
  151. ]
  152. default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=7))
  153. # NOTE: `auto_scale_lr` is for automatically scaling LR,
  154. # USER SHOULD NOT CHANGE ITS VALUES.
  155. # base_batch_size = (8 GPUs) x (8 samples per GPU)
  156. auto_scale_lr = dict(base_batch_size=64)