get_image_metas.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. """Get image metas on a specific dataset.
  3. Here is an example to run this script.
  4. Example:
  5. python tools/misc/get_image_metas.py ${CONFIG} \
  6. --out ${OUTPUT FILE NAME}
  7. """
  8. import argparse
  9. import csv
  10. import os.path as osp
  11. from multiprocessing import Pool
  12. import mmcv
  13. from mmengine.config import Config
  14. from mmengine.fileio import dump, get
  15. def parse_args():
  16. parser = argparse.ArgumentParser(description='Collect image metas')
  17. parser.add_argument('config', help='Config file path')
  18. parser.add_argument(
  19. '--dataset',
  20. default='val',
  21. choices=['train', 'val', 'test'],
  22. help='Collect image metas from which dataset')
  23. parser.add_argument(
  24. '--out',
  25. default='validation-image-metas.pkl',
  26. help='The output image metas file name. The save dir is in the '
  27. 'same directory as `dataset.ann_file` path')
  28. parser.add_argument(
  29. '--nproc',
  30. default=4,
  31. type=int,
  32. help='Processes used for get image metas')
  33. args = parser.parse_args()
  34. return args
  35. def get_metas_from_csv_style_ann_file(ann_file):
  36. data_infos = []
  37. cp_filename = None
  38. with open(ann_file, 'r') as f:
  39. reader = csv.reader(f)
  40. for i, line in enumerate(reader):
  41. if i == 0:
  42. continue
  43. img_id = line[0]
  44. filename = f'{img_id}.jpg'
  45. if filename != cp_filename:
  46. data_infos.append(dict(filename=filename))
  47. cp_filename = filename
  48. return data_infos
  49. def get_metas_from_txt_style_ann_file(ann_file):
  50. with open(ann_file) as f:
  51. lines = f.readlines()
  52. i = 0
  53. data_infos = []
  54. while i < len(lines):
  55. filename = lines[i].rstrip()
  56. data_infos.append(dict(filename=filename))
  57. skip_lines = int(lines[i + 2]) + 3
  58. i += skip_lines
  59. return data_infos
  60. def get_image_metas(data_info, img_prefix):
  61. filename = data_info.get('filename', None)
  62. if filename is not None:
  63. if img_prefix is not None:
  64. filename = osp.join(img_prefix, filename)
  65. img_bytes = get(filename)
  66. img = mmcv.imfrombytes(img_bytes, flag='color')
  67. shape = img.shape
  68. meta = dict(filename=filename, ori_shape=shape)
  69. else:
  70. raise NotImplementedError('Missing `filename` in data_info')
  71. return meta
  72. def main():
  73. args = parse_args()
  74. assert args.out.endswith('pkl'), 'The output file name must be pkl suffix'
  75. # load config files
  76. cfg = Config.fromfile(args.config)
  77. dataloader_cfg = cfg.get(f'{args.dataset}_dataloader')
  78. ann_file = osp.join(dataloader_cfg.dataset.data_root,
  79. dataloader_cfg.dataset.ann_file)
  80. img_prefix = osp.join(dataloader_cfg.dataset.data_root,
  81. dataloader_cfg.dataset.data_prefix['img'])
  82. print(f'{"-" * 5} Start Processing {"-" * 5}')
  83. if ann_file.endswith('csv'):
  84. data_infos = get_metas_from_csv_style_ann_file(ann_file)
  85. elif ann_file.endswith('txt'):
  86. data_infos = get_metas_from_txt_style_ann_file(ann_file)
  87. else:
  88. shuffix = ann_file.split('.')[-1]
  89. raise NotImplementedError('File name must be csv or txt suffix but '
  90. f'get {shuffix}')
  91. print(f'Successfully load annotation file from {ann_file}')
  92. print(f'Processing {len(data_infos)} images...')
  93. pool = Pool(args.nproc)
  94. # get image metas with multiple processes
  95. image_metas = pool.starmap(
  96. get_image_metas,
  97. zip(data_infos, [img_prefix for _ in range(len(data_infos))]),
  98. )
  99. pool.close()
  100. # save image metas
  101. root_path = dataloader_cfg.dataset.ann_file.rsplit('/', 1)[0]
  102. save_path = osp.join(root_path, args.out)
  103. dump(image_metas, save_path, protocol=4)
  104. print(f'Image meta file save to: {save_path}')
  105. if __name__ == '__main__':
  106. main()