anchor_head.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. import warnings
  3. from typing import List, Optional, Tuple, Union
  4. import torch
  5. import torch.nn as nn
  6. from mmengine.structures import InstanceData
  7. from torch import Tensor
  8. from mmdet.registry import MODELS, TASK_UTILS
  9. from mmdet.structures.bbox import BaseBoxes, cat_boxes, get_box_tensor
  10. from mmdet.utils import (ConfigType, InstanceList, OptConfigType,
  11. OptInstanceList, OptMultiConfig)
  12. from ..task_modules.prior_generators import (AnchorGenerator,
  13. anchor_inside_flags)
  14. from ..task_modules.samplers import PseudoSampler
  15. from ..utils import images_to_levels, multi_apply, unmap
  16. from .base_dense_head import BaseDenseHead
  17. @MODELS.register_module()
  18. class AnchorHead(BaseDenseHead):
  19. """Anchor-based head (RPN, RetinaNet, SSD, etc.).
  20. Args:
  21. num_classes (int): Number of categories excluding the background
  22. category.
  23. in_channels (int): Number of channels in the input feature map.
  24. feat_channels (int): Number of hidden channels. Used in child classes.
  25. anchor_generator (dict): Config dict for anchor generator
  26. bbox_coder (dict): Config of bounding box coder.
  27. reg_decoded_bbox (bool): If true, the regression loss would be
  28. applied directly on decoded bounding boxes, converting both
  29. the predicted boxes and regression targets to absolute
  30. coordinates format. Default False. It should be `True` when
  31. using `IoULoss`, `GIoULoss`, or `DIoULoss` in the bbox head.
  32. loss_cls (dict): Config of classification loss.
  33. loss_bbox (dict): Config of localization loss.
  34. train_cfg (dict): Training config of anchor head.
  35. test_cfg (dict): Testing config of anchor head.
  36. init_cfg (dict or list[dict], optional): Initialization config dict.
  37. """ # noqa: W605
  38. def __init__(
  39. self,
  40. num_classes: int,
  41. in_channels: int,
  42. feat_channels: int = 256,
  43. anchor_generator: ConfigType = dict(
  44. type='AnchorGenerator',
  45. scales=[8, 16, 32],
  46. ratios=[0.5, 1.0, 2.0],
  47. strides=[4, 8, 16, 32, 64]),
  48. bbox_coder: ConfigType = dict(
  49. type='DeltaXYWHBBoxCoder',
  50. clip_border=True,
  51. target_means=(.0, .0, .0, .0),
  52. target_stds=(1.0, 1.0, 1.0, 1.0)),
  53. reg_decoded_bbox: bool = False,
  54. loss_cls: ConfigType = dict(
  55. type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
  56. loss_bbox: ConfigType = dict(
  57. type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
  58. train_cfg: OptConfigType = None,
  59. test_cfg: OptConfigType = None,
  60. init_cfg: OptMultiConfig = dict(
  61. type='Normal', layer='Conv2d', std=0.01)
  62. ) -> None:
  63. super().__init__(init_cfg=init_cfg)
  64. self.in_channels = in_channels
  65. self.num_classes = num_classes
  66. self.feat_channels = feat_channels
  67. self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
  68. if self.use_sigmoid_cls:
  69. self.cls_out_channels = num_classes
  70. else:
  71. self.cls_out_channels = num_classes + 1
  72. if self.cls_out_channels <= 0:
  73. raise ValueError(f'num_classes={num_classes} is too small')
  74. self.reg_decoded_bbox = reg_decoded_bbox
  75. self.bbox_coder = TASK_UTILS.build(bbox_coder)
  76. self.loss_cls = MODELS.build(loss_cls)
  77. self.loss_bbox = MODELS.build(loss_bbox)
  78. self.train_cfg = train_cfg
  79. self.test_cfg = test_cfg
  80. if self.train_cfg:
  81. self.assigner = TASK_UTILS.build(self.train_cfg['assigner'])
  82. if train_cfg.get('sampler', None) is not None:
  83. self.sampler = TASK_UTILS.build(
  84. self.train_cfg['sampler'], default_args=dict(context=self))
  85. else:
  86. self.sampler = PseudoSampler(context=self)
  87. self.fp16_enabled = False
  88. self.prior_generator = TASK_UTILS.build(anchor_generator)
  89. # Usually the numbers of anchors for each level are the same
  90. # except SSD detectors. So it is an int in the most dense
  91. # heads but a list of int in SSDHead
  92. self.num_base_priors = self.prior_generator.num_base_priors[0]
  93. self._init_layers()
  94. @property
  95. def num_anchors(self) -> int:
  96. warnings.warn('DeprecationWarning: `num_anchors` is deprecated, '
  97. 'for consistency or also use '
  98. '`num_base_priors` instead')
  99. return self.prior_generator.num_base_priors[0]
  100. @property
  101. def anchor_generator(self) -> AnchorGenerator:
  102. warnings.warn('DeprecationWarning: anchor_generator is deprecated, '
  103. 'please use "prior_generator" instead')
  104. return self.prior_generator
  105. def _init_layers(self) -> None:
  106. """Initialize layers of the head."""
  107. self.conv_cls = nn.Conv2d(self.in_channels,
  108. self.num_base_priors * self.cls_out_channels,
  109. 1)
  110. reg_dim = self.bbox_coder.encode_size
  111. self.conv_reg = nn.Conv2d(self.in_channels,
  112. self.num_base_priors * reg_dim, 1)
  113. def forward_single(self, x: Tensor) -> Tuple[Tensor, Tensor]:
  114. """Forward feature of a single scale level.
  115. Args:
  116. x (Tensor): Features of a single scale level.
  117. Returns:
  118. tuple:
  119. cls_score (Tensor): Cls scores for a single scale level \
  120. the channels number is num_base_priors * num_classes.
  121. bbox_pred (Tensor): Box energies / deltas for a single scale \
  122. level, the channels number is num_base_priors * 4.
  123. """
  124. cls_score = self.conv_cls(x)
  125. bbox_pred = self.conv_reg(x)
  126. return cls_score, bbox_pred
  127. def forward(self, x: Tuple[Tensor]) -> Tuple[List[Tensor]]:
  128. """Forward features from the upstream network.
  129. Args:
  130. x (tuple[Tensor]): Features from the upstream network, each is
  131. a 4D-tensor.
  132. Returns:
  133. tuple: A tuple of classification scores and bbox prediction.
  134. - cls_scores (list[Tensor]): Classification scores for all \
  135. scale levels, each is a 4D-tensor, the channels number \
  136. is num_base_priors * num_classes.
  137. - bbox_preds (list[Tensor]): Box energies / deltas for all \
  138. scale levels, each is a 4D-tensor, the channels number \
  139. is num_base_priors * 4.
  140. """
  141. return multi_apply(self.forward_single, x)
  142. def get_anchors(self,
  143. featmap_sizes: List[tuple],
  144. batch_img_metas: List[dict],
  145. device: Union[torch.device, str] = 'cuda') \
  146. -> Tuple[List[List[Tensor]], List[List[Tensor]]]:
  147. """Get anchors according to feature map sizes.
  148. Args:
  149. featmap_sizes (list[tuple]): Multi-level feature map sizes.
  150. batch_img_metas (list[dict]): Image meta info.
  151. device (torch.device | str): Device for returned tensors.
  152. Defaults to cuda.
  153. Returns:
  154. tuple:
  155. - anchor_list (list[list[Tensor]]): Anchors of each image.
  156. - valid_flag_list (list[list[Tensor]]): Valid flags of each
  157. image.
  158. """
  159. num_imgs = len(batch_img_metas)
  160. # since feature map sizes of all images are the same, we only compute
  161. # anchors for one time
  162. multi_level_anchors = self.prior_generator.grid_priors(
  163. featmap_sizes, device=device)
  164. anchor_list = [multi_level_anchors for _ in range(num_imgs)]
  165. # for each image, we compute valid flags of multi level anchors
  166. valid_flag_list = []
  167. for img_id, img_meta in enumerate(batch_img_metas):
  168. multi_level_flags = self.prior_generator.valid_flags(
  169. featmap_sizes, img_meta['pad_shape'], device)
  170. valid_flag_list.append(multi_level_flags)
  171. return anchor_list, valid_flag_list
  172. def _get_targets_single(self,
  173. flat_anchors: Union[Tensor, BaseBoxes],
  174. valid_flags: Tensor,
  175. gt_instances: InstanceData,
  176. img_meta: dict,
  177. gt_instances_ignore: Optional[InstanceData] = None,
  178. unmap_outputs: bool = True) -> tuple:
  179. """Compute regression and classification targets for anchors in a
  180. single image.
  181. Args:
  182. flat_anchors (Tensor or :obj:`BaseBoxes`): Multi-level anchors
  183. of the image, which are concatenated into a single tensor
  184. or box type of shape (num_anchors, 4)
  185. valid_flags (Tensor): Multi level valid flags of the image,
  186. which are concatenated into a single tensor of
  187. shape (num_anchors, ).
  188. gt_instances (:obj:`InstanceData`): Ground truth of instance
  189. annotations. It should includes ``bboxes`` and ``labels``
  190. attributes.
  191. img_meta (dict): Meta information for current image.
  192. gt_instances_ignore (:obj:`InstanceData`, optional): Instances
  193. to be ignored during training. It includes ``bboxes`` attribute
  194. data that is ignored during training and testing.
  195. Defaults to None.
  196. unmap_outputs (bool): Whether to map outputs back to the original
  197. set of anchors. Defaults to True.
  198. Returns:
  199. tuple:
  200. - labels (Tensor): Labels of each level.
  201. - label_weights (Tensor): Label weights of each level.
  202. - bbox_targets (Tensor): BBox targets of each level.
  203. - bbox_weights (Tensor): BBox weights of each level.
  204. - pos_inds (Tensor): positive samples indexes.
  205. - neg_inds (Tensor): negative samples indexes.
  206. - sampling_result (:obj:`SamplingResult`): Sampling results.
  207. """
  208. inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
  209. img_meta['img_shape'][:2],
  210. self.train_cfg['allowed_border'])
  211. if not inside_flags.any():
  212. raise ValueError(
  213. 'There is no valid anchor inside the image boundary. Please '
  214. 'check the image size and anchor sizes, or set '
  215. '``allowed_border`` to -1 to skip the condition.')
  216. # assign gt and sample anchors
  217. anchors = flat_anchors[inside_flags]
  218. pred_instances = InstanceData(priors=anchors)
  219. assign_result = self.assigner.assign(pred_instances, gt_instances,
  220. gt_instances_ignore)
  221. # No sampling is required except for RPN and
  222. # Guided Anchoring algorithms
  223. sampling_result = self.sampler.sample(assign_result, pred_instances,
  224. gt_instances)
  225. num_valid_anchors = anchors.shape[0]
  226. target_dim = gt_instances.bboxes.size(-1) if self.reg_decoded_bbox \
  227. else self.bbox_coder.encode_size
  228. bbox_targets = anchors.new_zeros(num_valid_anchors, target_dim)
  229. bbox_weights = anchors.new_zeros(num_valid_anchors, target_dim)
  230. # TODO: Considering saving memory, is it necessary to be long?
  231. labels = anchors.new_full((num_valid_anchors, ),
  232. self.num_classes,
  233. dtype=torch.long)
  234. label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
  235. pos_inds = sampling_result.pos_inds
  236. neg_inds = sampling_result.neg_inds
  237. # `bbox_coder.encode` accepts tensor or box type inputs and generates
  238. # tensor targets. If regressing decoded boxes, the code will convert
  239. # box type `pos_bbox_targets` to tensor.
  240. if len(pos_inds) > 0:
  241. if not self.reg_decoded_bbox:
  242. pos_bbox_targets = self.bbox_coder.encode(
  243. sampling_result.pos_priors, sampling_result.pos_gt_bboxes)
  244. else:
  245. pos_bbox_targets = sampling_result.pos_gt_bboxes
  246. pos_bbox_targets = get_box_tensor(pos_bbox_targets)
  247. bbox_targets[pos_inds, :] = pos_bbox_targets
  248. bbox_weights[pos_inds, :] = 1.0
  249. labels[pos_inds] = sampling_result.pos_gt_labels
  250. if self.train_cfg['pos_weight'] <= 0:
  251. label_weights[pos_inds] = 1.0
  252. else:
  253. label_weights[pos_inds] = self.train_cfg['pos_weight']
  254. if len(neg_inds) > 0:
  255. label_weights[neg_inds] = 1.0
  256. # map up to original set of anchors
  257. if unmap_outputs:
  258. num_total_anchors = flat_anchors.size(0)
  259. labels = unmap(
  260. labels, num_total_anchors, inside_flags,
  261. fill=self.num_classes) # fill bg label
  262. label_weights = unmap(label_weights, num_total_anchors,
  263. inside_flags)
  264. bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
  265. bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
  266. return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
  267. neg_inds, sampling_result)
  268. def get_targets(self,
  269. anchor_list: List[List[Tensor]],
  270. valid_flag_list: List[List[Tensor]],
  271. batch_gt_instances: InstanceList,
  272. batch_img_metas: List[dict],
  273. batch_gt_instances_ignore: OptInstanceList = None,
  274. unmap_outputs: bool = True,
  275. return_sampling_results: bool = False) -> tuple:
  276. """Compute regression and classification targets for anchors in
  277. multiple images.
  278. Args:
  279. anchor_list (list[list[Tensor]]): Multi level anchors of each
  280. image. The outer list indicates images, and the inner list
  281. corresponds to feature levels of the image. Each element of
  282. the inner list is a tensor of shape (num_anchors, 4).
  283. valid_flag_list (list[list[Tensor]]): Multi level valid flags of
  284. each image. The outer list indicates images, and the inner list
  285. corresponds to feature levels of the image. Each element of
  286. the inner list is a tensor of shape (num_anchors, )
  287. batch_gt_instances (list[:obj:`InstanceData`]): Batch of
  288. gt_instance. It usually includes ``bboxes`` and ``labels``
  289. attributes.
  290. batch_img_metas (list[dict]): Meta information of each image, e.g.,
  291. image size, scaling factor, etc.
  292. batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
  293. Batch of gt_instances_ignore. It includes ``bboxes`` attribute
  294. data that is ignored during training and testing.
  295. Defaults to None.
  296. unmap_outputs (bool): Whether to map outputs back to the original
  297. set of anchors. Defaults to True.
  298. return_sampling_results (bool): Whether to return the sampling
  299. results. Defaults to False.
  300. Returns:
  301. tuple: Usually returns a tuple containing learning targets.
  302. - labels_list (list[Tensor]): Labels of each level.
  303. - label_weights_list (list[Tensor]): Label weights of each
  304. level.
  305. - bbox_targets_list (list[Tensor]): BBox targets of each level.
  306. - bbox_weights_list (list[Tensor]): BBox weights of each level.
  307. - avg_factor (int): Average factor that is used to average
  308. the loss. When using sampling method, avg_factor is usually
  309. the sum of positive and negative priors. When using
  310. `PseudoSampler`, `avg_factor` is usually equal to the number
  311. of positive priors.
  312. additional_returns: This function enables user-defined returns from
  313. `self._get_targets_single`. These returns are currently refined
  314. to properties at each feature map (i.e. having HxW dimension).
  315. The results will be concatenated after the end
  316. """
  317. num_imgs = len(batch_img_metas)
  318. assert len(anchor_list) == len(valid_flag_list) == num_imgs
  319. if batch_gt_instances_ignore is None:
  320. batch_gt_instances_ignore = [None] * num_imgs
  321. # anchor number of multi levels
  322. num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
  323. # concat all level anchors to a single tensor
  324. concat_anchor_list = []
  325. concat_valid_flag_list = []
  326. for i in range(num_imgs):
  327. assert len(anchor_list[i]) == len(valid_flag_list[i])
  328. concat_anchor_list.append(cat_boxes(anchor_list[i]))
  329. concat_valid_flag_list.append(torch.cat(valid_flag_list[i]))
  330. # compute targets for each image
  331. results = multi_apply(
  332. self._get_targets_single,
  333. concat_anchor_list,
  334. concat_valid_flag_list,
  335. batch_gt_instances,
  336. batch_img_metas,
  337. batch_gt_instances_ignore,
  338. unmap_outputs=unmap_outputs)
  339. (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
  340. pos_inds_list, neg_inds_list, sampling_results_list) = results[:7]
  341. rest_results = list(results[7:]) # user-added return values
  342. # Get `avg_factor` of all images, which calculate in `SamplingResult`.
  343. # When using sampling method, avg_factor is usually the sum of
  344. # positive and negative priors. When using `PseudoSampler`,
  345. # `avg_factor` is usually equal to the number of positive priors.
  346. avg_factor = sum(
  347. [results.avg_factor for results in sampling_results_list])
  348. # update `_raw_positive_infos`, which will be used when calling
  349. # `get_positive_infos`.
  350. self._raw_positive_infos.update(sampling_results=sampling_results_list)
  351. # split targets to a list w.r.t. multiple levels
  352. labels_list = images_to_levels(all_labels, num_level_anchors)
  353. label_weights_list = images_to_levels(all_label_weights,
  354. num_level_anchors)
  355. bbox_targets_list = images_to_levels(all_bbox_targets,
  356. num_level_anchors)
  357. bbox_weights_list = images_to_levels(all_bbox_weights,
  358. num_level_anchors)
  359. res = (labels_list, label_weights_list, bbox_targets_list,
  360. bbox_weights_list, avg_factor)
  361. if return_sampling_results:
  362. res = res + (sampling_results_list, )
  363. for i, r in enumerate(rest_results): # user-added return values
  364. rest_results[i] = images_to_levels(r, num_level_anchors)
  365. return res + tuple(rest_results)
  366. def loss_by_feat_single(self, cls_score: Tensor, bbox_pred: Tensor,
  367. anchors: Tensor, labels: Tensor,
  368. label_weights: Tensor, bbox_targets: Tensor,
  369. bbox_weights: Tensor, avg_factor: int) -> tuple:
  370. """Calculate the loss of a single scale level based on the features
  371. extracted by the detection head.
  372. Args:
  373. cls_score (Tensor): Box scores for each scale level
  374. Has shape (N, num_anchors * num_classes, H, W).
  375. bbox_pred (Tensor): Box energies / deltas for each scale
  376. level with shape (N, num_anchors * 4, H, W).
  377. anchors (Tensor): Box reference for each scale level with shape
  378. (N, num_total_anchors, 4).
  379. labels (Tensor): Labels of each anchors with shape
  380. (N, num_total_anchors).
  381. label_weights (Tensor): Label weights of each anchor with shape
  382. (N, num_total_anchors)
  383. bbox_targets (Tensor): BBox regression targets of each anchor
  384. weight shape (N, num_total_anchors, 4).
  385. bbox_weights (Tensor): BBox regression loss weights of each anchor
  386. with shape (N, num_total_anchors, 4).
  387. avg_factor (int): Average factor that is used to average the loss.
  388. Returns:
  389. tuple: loss components.
  390. """
  391. # classification loss
  392. labels = labels.reshape(-1)
  393. label_weights = label_weights.reshape(-1)
  394. cls_score = cls_score.permute(0, 2, 3,
  395. 1).reshape(-1, self.cls_out_channels)
  396. loss_cls = self.loss_cls(
  397. cls_score, labels, label_weights, avg_factor=avg_factor)
  398. # regression loss
  399. target_dim = bbox_targets.size(-1)
  400. bbox_targets = bbox_targets.reshape(-1, target_dim)
  401. bbox_weights = bbox_weights.reshape(-1, target_dim)
  402. bbox_pred = bbox_pred.permute(0, 2, 3,
  403. 1).reshape(-1,
  404. self.bbox_coder.encode_size)
  405. if self.reg_decoded_bbox:
  406. # When the regression loss (e.g. `IouLoss`, `GIouLoss`)
  407. # is applied directly on the decoded bounding boxes, it
  408. # decodes the already encoded coordinates to absolute format.
  409. anchors = anchors.reshape(-1, anchors.size(-1))
  410. bbox_pred = self.bbox_coder.decode(anchors, bbox_pred)
  411. bbox_pred = get_box_tensor(bbox_pred)
  412. loss_bbox = self.loss_bbox(
  413. bbox_pred, bbox_targets, bbox_weights, avg_factor=avg_factor)
  414. return loss_cls, loss_bbox
  415. def loss_by_feat(
  416. self,
  417. cls_scores: List[Tensor],
  418. bbox_preds: List[Tensor],
  419. batch_gt_instances: InstanceList,
  420. batch_img_metas: List[dict],
  421. batch_gt_instances_ignore: OptInstanceList = None) -> dict:
  422. """Calculate the loss based on the features extracted by the detection
  423. head.
  424. Args:
  425. cls_scores (list[Tensor]): Box scores for each scale level
  426. has shape (N, num_anchors * num_classes, H, W).
  427. bbox_preds (list[Tensor]): Box energies / deltas for each scale
  428. level with shape (N, num_anchors * 4, H, W).
  429. batch_gt_instances (list[:obj:`InstanceData`]): Batch of
  430. gt_instance. It usually includes ``bboxes`` and ``labels``
  431. attributes.
  432. batch_img_metas (list[dict]): Meta information of each image, e.g.,
  433. image size, scaling factor, etc.
  434. batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
  435. Batch of gt_instances_ignore. It includes ``bboxes`` attribute
  436. data that is ignored during training and testing.
  437. Defaults to None.
  438. Returns:
  439. dict: A dictionary of loss components.
  440. """
  441. featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
  442. assert len(featmap_sizes) == self.prior_generator.num_levels
  443. device = cls_scores[0].device
  444. anchor_list, valid_flag_list = self.get_anchors(
  445. featmap_sizes, batch_img_metas, device=device)
  446. cls_reg_targets = self.get_targets(
  447. anchor_list,
  448. valid_flag_list,
  449. batch_gt_instances,
  450. batch_img_metas,
  451. batch_gt_instances_ignore=batch_gt_instances_ignore)
  452. (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
  453. avg_factor) = cls_reg_targets
  454. # anchor number of multi levels
  455. num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
  456. # concat all level anchors and flags to a single tensor
  457. concat_anchor_list = []
  458. for i in range(len(anchor_list)):
  459. concat_anchor_list.append(cat_boxes(anchor_list[i]))
  460. all_anchor_list = images_to_levels(concat_anchor_list,
  461. num_level_anchors)
  462. losses_cls, losses_bbox = multi_apply(
  463. self.loss_by_feat_single,
  464. cls_scores,
  465. bbox_preds,
  466. all_anchor_list,
  467. labels_list,
  468. label_weights_list,
  469. bbox_targets_list,
  470. bbox_weights_list,
  471. avg_factor=avg_factor)
  472. return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)