get_crowdhuman_id_hw.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. """Get image shape on CrowdHuman dataset.
  3. Here is an example to run this script.
  4. Example:
  5. python tools/misc/get_crowdhuman_id_hw.py ${CONFIG} \
  6. --dataset ${DATASET_TYPE}
  7. """
  8. import argparse
  9. import json
  10. import logging
  11. import os.path as osp
  12. from multiprocessing import Pool
  13. import mmcv
  14. from mmengine.config import Config
  15. from mmengine.fileio import dump, get, get_text
  16. from mmengine.logging import print_log
  17. def parse_args():
  18. parser = argparse.ArgumentParser(description='Collect image metas')
  19. parser.add_argument('config', help='Config file path')
  20. parser.add_argument(
  21. '--dataset',
  22. choices=['train', 'val'],
  23. help='Collect image metas from which dataset')
  24. parser.add_argument(
  25. '--nproc',
  26. default=10,
  27. type=int,
  28. help='Processes used for get image metas')
  29. args = parser.parse_args()
  30. return args
  31. def get_image_metas(anno_str, img_prefix):
  32. id_hw = {}
  33. anno_dict = json.loads(anno_str)
  34. img_path = osp.join(img_prefix, f"{anno_dict['ID']}.jpg")
  35. img_id = anno_dict['ID']
  36. img_bytes = get(img_path)
  37. img = mmcv.imfrombytes(img_bytes, backend='cv2')
  38. id_hw[img_id] = img.shape[:2]
  39. return id_hw
  40. def main():
  41. args = parse_args()
  42. # get ann_file and img_prefix from config files
  43. cfg = Config.fromfile(args.config)
  44. dataset = args.dataset
  45. dataloader_cfg = cfg.get(f'{dataset}_dataloader')
  46. ann_file = osp.join(dataloader_cfg.dataset.data_root,
  47. dataloader_cfg.dataset.ann_file)
  48. img_prefix = osp.join(dataloader_cfg.dataset.data_root,
  49. dataloader_cfg.dataset.data_prefix['img'])
  50. # load image metas
  51. print_log(
  52. f'loading CrowdHuman {dataset} annotation...', level=logging.INFO)
  53. anno_strs = get_text(ann_file).strip().split('\n')
  54. pool = Pool(args.nproc)
  55. # get image metas with multiple processes
  56. id_hw_temp = pool.starmap(
  57. get_image_metas,
  58. zip(anno_strs, [img_prefix for _ in range(len(anno_strs))]),
  59. )
  60. pool.close()
  61. # save image metas
  62. id_hw = {}
  63. for sub_dict in id_hw_temp:
  64. id_hw.update(sub_dict)
  65. data_root = osp.dirname(ann_file)
  66. save_path = osp.join(data_root, f'id_hw_{dataset}.json')
  67. print_log(
  68. f'\nsaving "id_hw_{dataset}.json" in "{data_root}"',
  69. level=logging.INFO)
  70. dump(id_hw, save_path, file_format='json')
  71. if __name__ == '__main__':
  72. main()