images2coco.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. import argparse
  3. import os
  4. from mmengine.fileio import dump, list_from_file
  5. from mmengine.utils import mkdir_or_exist, scandir, track_iter_progress
  6. from PIL import Image
  7. def parse_args():
  8. parser = argparse.ArgumentParser(
  9. description='Convert images to coco format without annotations')
  10. parser.add_argument('img_path', help='The root path of images')
  11. parser.add_argument(
  12. 'classes', type=str, help='The text file name of storage class list')
  13. parser.add_argument(
  14. 'out',
  15. type=str,
  16. help='The output annotation json file name, The save dir is in the '
  17. 'same directory as img_path')
  18. parser.add_argument(
  19. '-e',
  20. '--exclude-extensions',
  21. type=str,
  22. nargs='+',
  23. help='The suffix of images to be excluded, such as "png" and "bmp"')
  24. args = parser.parse_args()
  25. return args
  26. def collect_image_infos(path, exclude_extensions=None):
  27. img_infos = []
  28. images_generator = scandir(path, recursive=True)
  29. for image_path in track_iter_progress(list(images_generator)):
  30. if exclude_extensions is None or (
  31. exclude_extensions is not None
  32. and not image_path.lower().endswith(exclude_extensions)):
  33. image_path = os.path.join(path, image_path)
  34. img_pillow = Image.open(image_path)
  35. img_info = {
  36. 'filename': image_path,
  37. 'width': img_pillow.width,
  38. 'height': img_pillow.height,
  39. }
  40. img_infos.append(img_info)
  41. return img_infos
  42. def cvt_to_coco_json(img_infos, classes):
  43. image_id = 0
  44. coco = dict()
  45. coco['images'] = []
  46. coco['type'] = 'instance'
  47. coco['categories'] = []
  48. coco['annotations'] = []
  49. image_set = set()
  50. for category_id, name in enumerate(classes):
  51. category_item = dict()
  52. category_item['supercategory'] = str('none')
  53. category_item['id'] = int(category_id)
  54. category_item['name'] = str(name)
  55. coco['categories'].append(category_item)
  56. for img_dict in img_infos:
  57. file_name = img_dict['filename']
  58. assert file_name not in image_set
  59. image_item = dict()
  60. image_item['id'] = int(image_id)
  61. image_item['file_name'] = str(file_name)
  62. image_item['height'] = int(img_dict['height'])
  63. image_item['width'] = int(img_dict['width'])
  64. coco['images'].append(image_item)
  65. image_set.add(file_name)
  66. image_id += 1
  67. return coco
  68. def main():
  69. args = parse_args()
  70. assert args.out.endswith(
  71. 'json'), 'The output file name must be json suffix'
  72. # 1 load image list info
  73. img_infos = collect_image_infos(args.img_path, args.exclude_extensions)
  74. # 2 convert to coco format data
  75. classes = list_from_file(args.classes)
  76. coco_info = cvt_to_coco_json(img_infos, classes)
  77. # 3 dump
  78. save_dir = os.path.join(args.img_path, '..', 'annotations')
  79. mkdir_or_exist(save_dir)
  80. save_path = os.path.join(save_dir, args.out)
  81. dump(coco_info, save_path)
  82. print(f'save json file: {save_path}')
  83. if __name__ == '__main__':
  84. main()