cascade-mask-rcnn_r101_fpn_seesaw-loss_sample1e-3-ms-2x_lvis-v1.py 3.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. _base_ = [
  2. '../_base_/models/cascade-mask-rcnn_r50_fpn.py',
  3. '../_base_/datasets/lvis_v1_instance.py',
  4. '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'
  5. ]
  6. model = dict(
  7. backbone=dict(
  8. depth=101,
  9. init_cfg=dict(type='Pretrained',
  10. checkpoint='torchvision://resnet101')),
  11. roi_head=dict(
  12. bbox_head=[
  13. dict(
  14. type='Shared2FCBBoxHead',
  15. in_channels=256,
  16. fc_out_channels=1024,
  17. roi_feat_size=7,
  18. num_classes=1203,
  19. bbox_coder=dict(
  20. type='DeltaXYWHBBoxCoder',
  21. target_means=[0., 0., 0., 0.],
  22. target_stds=[0.1, 0.1, 0.2, 0.2]),
  23. reg_class_agnostic=True,
  24. cls_predictor_cfg=dict(type='NormedLinear', tempearture=20),
  25. loss_cls=dict(
  26. type='SeesawLoss',
  27. p=0.8,
  28. q=2.0,
  29. num_classes=1203,
  30. loss_weight=1.0),
  31. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  32. loss_weight=1.0)),
  33. dict(
  34. type='Shared2FCBBoxHead',
  35. in_channels=256,
  36. fc_out_channels=1024,
  37. roi_feat_size=7,
  38. num_classes=1203,
  39. bbox_coder=dict(
  40. type='DeltaXYWHBBoxCoder',
  41. target_means=[0., 0., 0., 0.],
  42. target_stds=[0.05, 0.05, 0.1, 0.1]),
  43. reg_class_agnostic=True,
  44. cls_predictor_cfg=dict(type='NormedLinear', tempearture=20),
  45. loss_cls=dict(
  46. type='SeesawLoss',
  47. p=0.8,
  48. q=2.0,
  49. num_classes=1203,
  50. loss_weight=1.0),
  51. loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
  52. loss_weight=1.0)),
  53. dict(
  54. type='Shared2FCBBoxHead',
  55. in_channels=256,
  56. fc_out_channels=1024,
  57. roi_feat_size=7,
  58. num_classes=1203,
  59. bbox_coder=dict(
  60. type='DeltaXYWHBBoxCoder',
  61. target_means=[0., 0., 0., 0.],
  62. target_stds=[0.033, 0.033, 0.067, 0.067]),
  63. reg_class_agnostic=True,
  64. cls_predictor_cfg=dict(type='NormedLinear', tempearture=20),
  65. loss_cls=dict(
  66. type='SeesawLoss',
  67. p=0.8,
  68. q=2.0,
  69. num_classes=1203,
  70. loss_weight=1.0),
  71. loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
  72. ],
  73. mask_head=dict(num_classes=1203)),
  74. test_cfg=dict(
  75. rcnn=dict(
  76. score_thr=0.0001,
  77. # LVIS allows up to 300
  78. max_per_img=300)))
  79. # dataset settings
  80. train_pipeline = [
  81. dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
  82. dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
  83. dict(
  84. type='RandomChoiceResize',
  85. scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
  86. (1333, 768), (1333, 800)],
  87. keep_ratio=True),
  88. dict(type='RandomFlip', prob=0.5),
  89. dict(type='PackDetInputs')
  90. ]
  91. train_dataloader = dict(dataset=dict(dataset=dict(pipeline=train_pipeline)))
  92. train_cfg = dict(val_interval=24)