123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176 |
- # Copyright (c) OpenMMLab. All rights reserved.
- import copy
- import warnings
- from collections import OrderedDict
- from typing import List, Optional, Sequence, Union
- import numpy as np
- from mmengine.evaluator import BaseMetric
- from mmengine.logging import MMLogger
- from mmdet.registry import METRICS
- from ..functional import eval_map, eval_recalls
- @METRICS.register_module()
- class VOCMetric(BaseMetric):
- """Pascal VOC evaluation metric.
- Args:
- iou_thrs (float or List[float]): IoU threshold. Defaults to 0.5.
- scale_ranges (List[tuple], optional): Scale ranges for evaluating
- mAP. If not specified, all bounding boxes would be included in
- evaluation. Defaults to None.
- metric (str | list[str]): Metrics to be evaluated. Options are
- 'mAP', 'recall'. If is list, the first setting in the list will
- be used to evaluate metric.
- proposal_nums (Sequence[int]): Proposal number used for evaluating
- recalls, such as recall@100, recall@1000.
- Default: (100, 300, 1000).
- eval_mode (str): 'area' or '11points', 'area' means calculating the
- area under precision-recall curve, '11points' means calculating
- the average precision of recalls at [0, 0.1, ..., 1].
- The PASCAL VOC2007 defaults to use '11points', while PASCAL
- VOC2012 defaults to use 'area'.
- collect_device (str): Device name used for collecting results from
- different ranks during distributed training. Must be 'cpu' or
- 'gpu'. Defaults to 'cpu'.
- prefix (str, optional): The prefix that will be added in the metric
- names to disambiguate homonymous metrics of different evaluators.
- If prefix is not provided in the argument, self.default_prefix
- will be used instead. Defaults to None.
- """
- default_prefix: Optional[str] = 'pascal_voc'
- def __init__(self,
- iou_thrs: Union[float, List[float]] = 0.5,
- scale_ranges: Optional[List[tuple]] = None,
- metric: Union[str, List[str]] = 'mAP',
- proposal_nums: Sequence[int] = (100, 300, 1000),
- eval_mode: str = '11points',
- collect_device: str = 'cpu',
- prefix: Optional[str] = None) -> None:
- super().__init__(collect_device=collect_device, prefix=prefix)
- self.iou_thrs = [iou_thrs] if isinstance(iou_thrs, float) \
- else iou_thrs
- self.scale_ranges = scale_ranges
- # voc evaluation metrics
- if not isinstance(metric, str):
- assert len(metric) == 1
- metric = metric[0]
- allowed_metrics = ['recall', 'mAP']
- if metric not in allowed_metrics:
- raise KeyError(
- f"metric should be one of 'recall', 'mAP', but got {metric}.")
- self.metric = metric
- self.proposal_nums = proposal_nums
- assert eval_mode in ['area', '11points'], \
- 'Unrecognized mode, only "area" and "11points" are supported'
- self.eval_mode = eval_mode
- # TODO: data_batch is no longer needed, consider adjusting the
- # parameter position
- def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
- """Process one batch of data samples and predictions. The processed
- results should be stored in ``self.results``, which will be used to
- compute the metrics when all batches have been processed.
- Args:
- data_batch (dict): A batch of data from the dataloader.
- data_samples (Sequence[dict]): A batch of data samples that
- contain annotations and predictions.
- """
- for data_sample in data_samples:
- gt = copy.deepcopy(data_sample)
- # TODO: Need to refactor to support LoadAnnotations
- gt_instances = gt['gt_instances']
- gt_ignore_instances = gt['ignored_instances']
- ann = dict(
- labels=gt_instances['labels'].cpu().numpy(),
- bboxes=gt_instances['bboxes'].cpu().numpy(),
- bboxes_ignore=gt_ignore_instances['bboxes'].cpu().numpy(),
- labels_ignore=gt_ignore_instances['labels'].cpu().numpy())
- pred = data_sample['pred_instances']
- pred_bboxes = pred['bboxes'].cpu().numpy()
- pred_scores = pred['scores'].cpu().numpy()
- pred_labels = pred['labels'].cpu().numpy()
- dets = []
- for label in range(len(self.dataset_meta['classes'])):
- index = np.where(pred_labels == label)[0]
- pred_bbox_scores = np.hstack(
- [pred_bboxes[index], pred_scores[index].reshape((-1, 1))])
- dets.append(pred_bbox_scores)
- self.results.append((ann, dets))
- def compute_metrics(self, results: list) -> dict:
- """Compute the metrics from processed results.
- Args:
- results (list): The processed results of each batch.
- Returns:
- dict: The computed metrics. The keys are the names of the metrics,
- and the values are corresponding results.
- """
- logger: MMLogger = MMLogger.get_current_instance()
- gts, preds = zip(*results)
- eval_results = OrderedDict()
- if self.metric == 'mAP':
- assert isinstance(self.iou_thrs, list)
- dataset_type = self.dataset_meta.get('dataset_type')
- if dataset_type in ['VOC2007', 'VOC2012']:
- dataset_name = 'voc'
- if dataset_type == 'VOC2007' and self.eval_mode != '11points':
- warnings.warn('Pascal VOC2007 uses `11points` as default '
- 'evaluate mode, but you are using '
- f'{self.eval_mode}.')
- elif dataset_type == 'VOC2012' and self.eval_mode != 'area':
- warnings.warn('Pascal VOC2012 uses `area` as default '
- 'evaluate mode, but you are using '
- f'{self.eval_mode}.')
- else:
- dataset_name = self.dataset_meta['classes']
- mean_aps = []
- for iou_thr in self.iou_thrs:
- logger.info(f'\n{"-" * 15}iou_thr: {iou_thr}{"-" * 15}')
- # Follow the official implementation,
- # http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar
- # we should use the legacy coordinate system in mmdet 1.x,
- # which means w, h should be computed as 'x2 - x1 + 1` and
- # `y2 - y1 + 1`
- mean_ap, _ = eval_map(
- preds,
- gts,
- scale_ranges=self.scale_ranges,
- iou_thr=iou_thr,
- dataset=dataset_name,
- logger=logger,
- eval_mode=self.eval_mode,
- use_legacy_coordinate=True)
- mean_aps.append(mean_ap)
- eval_results[f'AP{int(iou_thr * 100):02d}'] = round(mean_ap, 3)
- eval_results['mAP'] = sum(mean_aps) / len(mean_aps)
- eval_results.move_to_end('mAP', last=False)
- elif self.metric == 'recall':
- # TODO: Currently not checked.
- gt_bboxes = [ann['bboxes'] for ann in self.annotations]
- recalls = eval_recalls(
- gt_bboxes,
- results,
- self.proposal_nums,
- self.iou_thrs,
- logger=logger,
- use_legacy_coordinate=True)
- for i, num in enumerate(self.proposal_nums):
- for j, iou_thr in enumerate(self.iou_thrs):
- eval_results[f'recall@{num}@{iou_thr}'] = recalls[i, j]
- if recalls.shape[1] > 1:
- ar = recalls.mean(axis=1)
- for i, num in enumerate(self.proposal_nums):
- eval_results[f'AR@{num}'] = ar[i]
- return eval_results
|