yolof_r50-c5_8xb8-1x_coco.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. _base_ = [
  2. '../_base_/datasets/coco_detection.py',
  3. '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
  4. ]
  5. model = dict(
  6. type='YOLOF',
  7. data_preprocessor=dict(
  8. type='DetDataPreprocessor',
  9. mean=[103.530, 116.280, 123.675],
  10. std=[1.0, 1.0, 1.0],
  11. bgr_to_rgb=False,
  12. pad_size_divisor=32),
  13. backbone=dict(
  14. type='ResNet',
  15. depth=50,
  16. num_stages=4,
  17. out_indices=(3, ),
  18. frozen_stages=1,
  19. norm_cfg=dict(type='BN', requires_grad=False),
  20. norm_eval=True,
  21. style='caffe',
  22. init_cfg=dict(
  23. type='Pretrained',
  24. checkpoint='open-mmlab://detectron/resnet50_caffe')),
  25. neck=dict(
  26. type='DilatedEncoder',
  27. in_channels=2048,
  28. out_channels=512,
  29. block_mid_channels=128,
  30. num_residual_blocks=4,
  31. block_dilations=[2, 4, 6, 8]),
  32. bbox_head=dict(
  33. type='YOLOFHead',
  34. num_classes=80,
  35. in_channels=512,
  36. reg_decoded_bbox=True,
  37. anchor_generator=dict(
  38. type='AnchorGenerator',
  39. ratios=[1.0],
  40. scales=[1, 2, 4, 8, 16],
  41. strides=[32]),
  42. bbox_coder=dict(
  43. type='DeltaXYWHBBoxCoder',
  44. target_means=[.0, .0, .0, .0],
  45. target_stds=[1., 1., 1., 1.],
  46. add_ctr_clamp=True,
  47. ctr_clamp=32),
  48. loss_cls=dict(
  49. type='FocalLoss',
  50. use_sigmoid=True,
  51. gamma=2.0,
  52. alpha=0.25,
  53. loss_weight=1.0),
  54. loss_bbox=dict(type='GIoULoss', loss_weight=1.0)),
  55. # training and testing settings
  56. train_cfg=dict(
  57. assigner=dict(
  58. type='UniformAssigner', pos_ignore_thr=0.15, neg_ignore_thr=0.7),
  59. allowed_border=-1,
  60. pos_weight=-1,
  61. debug=False),
  62. test_cfg=dict(
  63. nms_pre=1000,
  64. min_bbox_size=0,
  65. score_thr=0.05,
  66. nms=dict(type='nms', iou_threshold=0.6),
  67. max_per_img=100))
  68. # optimizer
  69. optim_wrapper = dict(
  70. optimizer=dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=0.0001),
  71. paramwise_cfg=dict(
  72. norm_decay_mult=0., custom_keys={'backbone': dict(lr_mult=1. / 3)}))
  73. # learning rate
  74. param_scheduler = [
  75. dict(
  76. type='LinearLR',
  77. start_factor=0.00066667,
  78. by_epoch=False,
  79. begin=0,
  80. end=1500),
  81. dict(
  82. type='MultiStepLR',
  83. begin=0,
  84. end=12,
  85. by_epoch=True,
  86. milestones=[8, 11],
  87. gamma=0.1)
  88. ]
  89. train_pipeline = [
  90. dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
  91. dict(type='LoadAnnotations', with_bbox=True),
  92. dict(type='Resize', scale=(1333, 800), keep_ratio=True),
  93. dict(type='RandomFlip', prob=0.5),
  94. dict(type='RandomShift', prob=0.5, max_shift_px=32),
  95. dict(type='PackDetInputs')
  96. ]
  97. test_pipeline = [
  98. dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
  99. dict(type='Resize', scale=(1333, 800), keep_ratio=True),
  100. dict(type='LoadAnnotations', with_bbox=True),
  101. dict(
  102. type='PackDetInputs',
  103. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
  104. 'scale_factor'))
  105. ]
  106. train_dataloader = dict(
  107. batch_size=8, num_workers=8, dataset=dict(pipeline=train_pipeline))
  108. val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
  109. test_dataloader = val_dataloader
  110. # NOTE: `auto_scale_lr` is for automatically scaling LR,
  111. # USER SHOULD NOT CHANGE ITS VALUES.
  112. # base_batch_size = (8 GPUs) x (8 samples per GPU)
  113. auto_scale_lr = dict(base_batch_size=64)