crowddet-rcnn_r50_fpn_8xb2-30e_crowdhuman.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. _base_ = ['../_base_/default_runtime.py']
  2. model = dict(
  3. type='CrowdDet',
  4. data_preprocessor=dict(
  5. type='DetDataPreprocessor',
  6. mean=[103.53, 116.28, 123.675],
  7. std=[57.375, 57.12, 58.395],
  8. bgr_to_rgb=False,
  9. pad_size_divisor=64,
  10. # This option is set according to https://github.com/Purkialo/CrowdDet/
  11. # blob/master/lib/data/CrowdHuman.py The images in the entire batch are
  12. # resize together.
  13. batch_augments=[
  14. dict(type='BatchResize', scale=(1400, 800), pad_size_divisor=64)
  15. ]),
  16. backbone=dict(
  17. type='ResNet',
  18. depth=50,
  19. num_stages=4,
  20. out_indices=(0, 1, 2, 3),
  21. frozen_stages=1,
  22. norm_cfg=dict(type='BN', requires_grad=True),
  23. norm_eval=True,
  24. style='pytorch',
  25. init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
  26. neck=dict(
  27. type='FPN',
  28. in_channels=[256, 512, 1024, 2048],
  29. out_channels=256,
  30. num_outs=5,
  31. upsample_cfg=dict(mode='bilinear', align_corners=False)),
  32. rpn_head=dict(
  33. type='RPNHead',
  34. in_channels=256,
  35. feat_channels=256,
  36. anchor_generator=dict(
  37. type='AnchorGenerator',
  38. scales=[8],
  39. ratios=[1.0, 2.0, 3.0],
  40. strides=[4, 8, 16, 32, 64],
  41. centers=[(8, 8), (8, 8), (8, 8), (8, 8), (8, 8)]),
  42. bbox_coder=dict(
  43. type='DeltaXYWHBBoxCoder',
  44. target_means=[0.0, 0.0, 0.0, 0.0],
  45. target_stds=[1.0, 1.0, 1.0, 1.0],
  46. clip_border=False),
  47. loss_cls=dict(type='CrossEntropyLoss', loss_weight=1.0),
  48. loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
  49. roi_head=dict(
  50. type='MultiInstanceRoIHead',
  51. bbox_roi_extractor=dict(
  52. type='SingleRoIExtractor',
  53. roi_layer=dict(
  54. type='RoIAlign',
  55. output_size=7,
  56. sampling_ratio=-1,
  57. aligned=True,
  58. use_torchvision=True),
  59. out_channels=256,
  60. featmap_strides=[4, 8, 16, 32]),
  61. bbox_head=dict(
  62. type='MultiInstanceBBoxHead',
  63. with_refine=False,
  64. num_shared_fcs=2,
  65. in_channels=256,
  66. fc_out_channels=1024,
  67. roi_feat_size=7,
  68. num_classes=1,
  69. bbox_coder=dict(
  70. type='DeltaXYWHBBoxCoder',
  71. target_means=[0., 0., 0., 0.],
  72. target_stds=[0.1, 0.1, 0.2, 0.2]),
  73. reg_class_agnostic=False,
  74. loss_cls=dict(
  75. type='CrossEntropyLoss',
  76. loss_weight=1.0,
  77. use_sigmoid=False,
  78. reduction='none'),
  79. loss_bbox=dict(
  80. type='SmoothL1Loss', loss_weight=1.0, reduction='none'))),
  81. # model training and testing settings
  82. train_cfg=dict(
  83. rpn=dict(
  84. assigner=dict(
  85. type='MaxIoUAssigner',
  86. pos_iou_thr=0.7,
  87. neg_iou_thr=(0.3, 0.7),
  88. min_pos_iou=0.3,
  89. match_low_quality=True,
  90. ignore_iof_thr=-1),
  91. sampler=dict(
  92. type='RandomSampler',
  93. num=256,
  94. pos_fraction=0.5,
  95. neg_pos_ub=-1,
  96. add_gt_as_proposals=False),
  97. allowed_border=-1,
  98. pos_weight=-1,
  99. debug=False),
  100. rpn_proposal=dict(
  101. nms_pre=2400,
  102. max_per_img=2000,
  103. nms=dict(type='nms', iou_threshold=0.7),
  104. min_bbox_size=2),
  105. rcnn=dict(
  106. assigner=dict(
  107. type='MultiInstanceAssigner',
  108. pos_iou_thr=0.5,
  109. neg_iou_thr=0.5,
  110. min_pos_iou=0.3,
  111. match_low_quality=False,
  112. ignore_iof_thr=-1),
  113. sampler=dict(
  114. type='MultiInsRandomSampler',
  115. num=512,
  116. pos_fraction=0.5,
  117. neg_pos_ub=-1,
  118. add_gt_as_proposals=False),
  119. pos_weight=-1,
  120. debug=False)),
  121. test_cfg=dict(
  122. rpn=dict(
  123. nms_pre=1200,
  124. max_per_img=1000,
  125. nms=dict(type='nms', iou_threshold=0.7),
  126. min_bbox_size=2),
  127. rcnn=dict(
  128. nms=dict(type='nms', iou_threshold=0.5),
  129. score_thr=0.01,
  130. max_per_img=500)))
  131. dataset_type = 'CrowdHumanDataset'
  132. data_root = 'data/CrowdHuman/'
  133. # Example to use different file client
  134. # Method 1: simply set the data root and let the file I/O module
  135. # automatically infer from prefix (not support LMDB and Memcache yet)
  136. # data_root = 's3://openmmlab/datasets/tracking/CrowdHuman/'
  137. # Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
  138. # backend_args = dict(
  139. # backend='petrel',
  140. # path_mapping=dict({
  141. # './data/': 's3://openmmlab/datasets/tracking/',
  142. # 'data/': 's3://openmmlab/datasets/tracking/'
  143. # }))
  144. backend_args = None
  145. train_pipeline = [
  146. dict(type='LoadImageFromFile', backend_args=backend_args),
  147. dict(type='LoadAnnotations', with_bbox=True),
  148. dict(type='RandomFlip', prob=0.5),
  149. dict(
  150. type='PackDetInputs',
  151. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
  152. 'flip_direction'))
  153. ]
  154. test_pipeline = [
  155. dict(type='LoadImageFromFile', backend_args=backend_args),
  156. dict(type='Resize', scale=(1400, 800), keep_ratio=True),
  157. # avoid bboxes being resized
  158. dict(type='LoadAnnotations', with_bbox=True),
  159. dict(
  160. type='PackDetInputs',
  161. meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
  162. 'scale_factor'))
  163. ]
  164. train_dataloader = dict(
  165. batch_size=2,
  166. num_workers=4,
  167. persistent_workers=True,
  168. sampler=dict(type='DefaultSampler', shuffle=True),
  169. batch_sampler=None, # The 'batch_sampler' may decrease the precision
  170. dataset=dict(
  171. type=dataset_type,
  172. data_root=data_root,
  173. ann_file='annotation_train.odgt',
  174. data_prefix=dict(img='Images/'),
  175. filter_cfg=dict(filter_empty_gt=True, min_size=32),
  176. pipeline=train_pipeline,
  177. backend_args=backend_args))
  178. val_dataloader = dict(
  179. batch_size=1,
  180. num_workers=2,
  181. persistent_workers=True,
  182. drop_last=False,
  183. sampler=dict(type='DefaultSampler', shuffle=False),
  184. dataset=dict(
  185. type=dataset_type,
  186. data_root=data_root,
  187. ann_file='annotation_val.odgt',
  188. data_prefix=dict(img='Images/'),
  189. test_mode=True,
  190. pipeline=test_pipeline,
  191. backend_args=backend_args))
  192. test_dataloader = val_dataloader
  193. val_evaluator = dict(
  194. type='CrowdHumanMetric',
  195. ann_file=data_root + 'annotation_val.odgt',
  196. metric=['AP', 'MR', 'JI'],
  197. backend_args=backend_args)
  198. test_evaluator = val_evaluator
  199. train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=30, val_interval=1)
  200. val_cfg = dict(type='ValLoop')
  201. test_cfg = dict(type='TestLoop')
  202. param_scheduler = [
  203. dict(
  204. type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=800),
  205. dict(
  206. type='MultiStepLR',
  207. begin=0,
  208. end=30,
  209. by_epoch=True,
  210. milestones=[24, 27],
  211. gamma=0.1)
  212. ]
  213. # optimizer
  214. auto_scale_lr = dict(base_batch_size=16)
  215. optim_wrapper = dict(
  216. type='OptimWrapper',
  217. optimizer=dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001))