yolov3_d53_320_273e_coco.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. # model settings
  2. model = dict(
  3. type='YOLOV3',
  4. pretrained='open-mmlab://darknet53',
  5. backbone=dict(type='Darknet', depth=53, out_indices=(3, 4, 5)),
  6. neck=dict(
  7. type='YOLOV3Neck',
  8. num_scales=3,
  9. in_channels=[1024, 512, 256],
  10. out_channels=[512, 256, 128]),
  11. bbox_head=dict(
  12. type='YOLOV3Head',
  13. num_classes=80,
  14. in_channels=[512, 256, 128],
  15. out_channels=[1024, 512, 256],
  16. anchor_generator=dict(
  17. type='YOLOAnchorGenerator',
  18. base_sizes=[[(116, 90), (156, 198), (373, 326)],
  19. [(30, 61), (62, 45), (59, 119)],
  20. [(10, 13), (16, 30), (33, 23)]],
  21. strides=[32, 16, 8]),
  22. bbox_coder=dict(type='YOLOBBoxCoder'),
  23. featmap_strides=[32, 16, 8],
  24. loss_cls=dict(
  25. type='CrossEntropyLoss',
  26. use_sigmoid=True,
  27. loss_weight=1.0,
  28. reduction='sum'),
  29. loss_conf=dict(
  30. type='CrossEntropyLoss',
  31. use_sigmoid=True,
  32. loss_weight=1.0,
  33. reduction='sum'),
  34. loss_xy=dict(
  35. type='CrossEntropyLoss',
  36. use_sigmoid=True,
  37. loss_weight=2.0,
  38. reduction='sum'),
  39. loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum')),
  40. # training and testing settings
  41. train_cfg=dict(
  42. assigner=dict(
  43. type='GridAssigner',
  44. pos_iou_thr=0.5,
  45. neg_iou_thr=0.5,
  46. min_pos_iou=0)),
  47. test_cfg=dict(
  48. nms_pre=1000,
  49. min_bbox_size=0,
  50. score_thr=0.05,
  51. conf_thr=0.005,
  52. nms=dict(type='nms', iou_threshold=0.45),
  53. max_per_img=100))
  54. # dataset settings
  55. dataset_type = 'CocoDataset'
  56. data_root = 'data/coco'
  57. img_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True)
  58. train_pipeline = [
  59. dict(type='LoadImageFromFile', to_float32=True),
  60. dict(type='LoadAnnotations', with_bbox=True),
  61. dict(type='PhotoMetricDistortion'),
  62. dict(
  63. type='Expand',
  64. mean=img_norm_cfg['mean'],
  65. to_rgb=img_norm_cfg['to_rgb'],
  66. ratio_range=(1, 2)),
  67. dict(
  68. type='MinIoURandomCrop',
  69. min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
  70. min_crop_size=0.3),
  71. dict(type='Resize', img_scale=(320, 320), keep_ratio=True),
  72. dict(type='RandomFlip', flip_ratio=0.5),
  73. dict(type='Normalize', **img_norm_cfg),
  74. dict(type='Pad', size_divisor=32),
  75. dict(type='DefaultFormatBundle'),
  76. dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
  77. ]
  78. test_pipeline = [
  79. dict(type='LoadImageFromFile'),
  80. dict(
  81. type='MultiScaleFlipAug',
  82. img_scale=(320, 320),
  83. flip=False,
  84. transforms=[
  85. dict(type='Resize', keep_ratio=True),
  86. dict(type='RandomFlip'),
  87. dict(type='Normalize', **img_norm_cfg),
  88. dict(type='Pad', size_divisor=32),
  89. dict(type='DefaultFormatBundle'),
  90. dict(type='Collect', keys=['img'])
  91. ])
  92. ]
  93. data = dict(
  94. samples_per_gpu=8,
  95. workers_per_gpu=4,
  96. train=dict(
  97. type=dataset_type,
  98. ann_file=f'{data_root}/annotations/instances_train2017.json',
  99. img_prefix=f'{data_root}/train2017/',
  100. pipeline=train_pipeline),
  101. val=dict(
  102. type=dataset_type,
  103. ann_file=f'{data_root}/annotations/instances_val2017.json',
  104. img_prefix=f'{data_root}/val2017/',
  105. pipeline=test_pipeline),
  106. test=dict(
  107. type=dataset_type,
  108. ann_file=f'{data_root}/annotations/instances_val2017.json',
  109. img_prefix=f'{data_root}/val2017/',
  110. pipeline=test_pipeline))
  111. # optimizer
  112. optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005)
  113. optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
  114. # learning policy
  115. lr_config = dict(
  116. policy='step',
  117. warmup='linear',
  118. warmup_iters=2000, # same as burn-in in darknet
  119. warmup_ratio=0.1,
  120. step=[218, 246])
  121. # runtime settings
  122. runner = dict(type='EpochBasedRunner', max_epochs=273)
  123. evaluation = dict(interval=1, metric=['bbox'])
  124. checkpoint_config = dict(interval=1)
  125. # yapf:disable
  126. log_config = dict(
  127. interval=50,
  128. hooks=[
  129. dict(type='TextLoggerHook'),
  130. # dict(type='TensorboardLoggerHook')
  131. ])
  132. # yapf:enable
  133. custom_hooks = [dict(type='NumClassCheckHook')]
  134. dist_params = dict(backend='nccl')
  135. log_level = 'INFO'
  136. load_from = None
  137. resume_from = None
  138. workflow = [('train', 1)]