_base_ = './sparse-rcnn_r50_fpn_ms-480-800-3x_coco.py' num_proposals = 300 model = dict( rpn_head=dict(num_proposals=num_proposals), test_cfg=dict( _delete_=True, rpn=None, rcnn=dict(max_per_img=num_proposals))) # augmentation strategy originates from DETR. train_pipeline = [ dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), dict(type='LoadAnnotations', with_bbox=True), dict(type='RandomFlip', prob=0.5), dict( type='RandomChoice', transforms=[[ dict( type='RandomChoiceResize', scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), (608, 1333), (640, 1333), (672, 1333), (704, 1333), (736, 1333), (768, 1333), (800, 1333)], keep_ratio=True) ], [ dict( type='RandomChoiceResize', scales=[(400, 1333), (500, 1333), (600, 1333)], keep_ratio=True), dict( type='RandomCrop', crop_type='absolute_range', crop_size=(384, 600), allow_negative_crop=True), dict( type='RandomChoiceResize', scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), (608, 1333), (640, 1333), (672, 1333), (704, 1333), (736, 1333), (768, 1333), (800, 1333)], keep_ratio=True) ]]), dict(type='PackDetInputs') ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline))