collect_modelzoo.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. #!/usr/bin/env python
  2. # Copyright (c) OpenMMLab. All rights reserved.
  3. import os
  4. import os.path as osp
  5. import re
  6. from collections import defaultdict
  7. from glob import glob
  8. from addict import Addict
  9. from titlecase import titlecase
  10. def _get_model_docs():
  11. """Get all model document files.
  12. Returns:
  13. list[str]: file paths
  14. """
  15. config_root = osp.join('..', '..', 'configs')
  16. pattern = osp.sep.join(['*'] * 4) + '.md'
  17. docs = glob(osp.join(config_root, pattern))
  18. docs = [doc for doc in docs if '_base_' not in doc]
  19. return docs
  20. def _parse_model_doc_path(path):
  21. """Parse doc file path.
  22. Typical path would be like:
  23. configs/<task>/<algorithm>/<dataset>/<setting>.md
  24. An example is:
  25. "configs/animal_2d_keypoint/topdown_heatmap/
  26. animalpose/resnet_animalpose.md"
  27. Returns:
  28. tuple:
  29. - task (str): e.g. ``'Animal 2D Keypoint'``
  30. - dataset (str): e.g. ``'animalpose'``
  31. - keywords (tuple): e.g. ``('topdown heatmap', 'resnet')``
  32. """
  33. _path = path.split(osp.sep)
  34. _rel_path = _path[_path.index('configs'):]
  35. # get task
  36. def _titlecase_callback(word, **kwargs):
  37. if word == '2d':
  38. return '2D'
  39. if word == '3d':
  40. return '3D'
  41. task = titlecase(
  42. _rel_path[1].replace('_', ' '), callback=_titlecase_callback)
  43. # get dataset
  44. dataset = _rel_path[3]
  45. # get keywords
  46. keywords_algo = (_rel_path[2], )
  47. keywords_setting = tuple(_rel_path[4][:-3].split('_'))
  48. keywords = keywords_algo + keywords_setting
  49. return task, dataset, keywords
  50. def _get_paper_refs():
  51. """Get all paper references.
  52. Returns:
  53. Dict[str, List[str]]: keys are paper categories and values are lists
  54. of paper paths.
  55. """
  56. papers = glob('../src/papers/*/*.md')
  57. paper_refs = defaultdict(list)
  58. for fn in papers:
  59. category = fn.split(osp.sep)[3]
  60. paper_refs[category].append(fn)
  61. return paper_refs
  62. def _parse_paper_ref(fn):
  63. """Get paper name and indicator pattern from a paper reference file.
  64. Returns:
  65. tuple:
  66. - paper_name (str)
  67. - paper_indicator (str)
  68. """
  69. indicator = None
  70. with open(fn, 'r', encoding='utf-8') as f:
  71. for line in f.readlines():
  72. if line.startswith('<summary'):
  73. indicator = line
  74. break
  75. if indicator is None:
  76. raise ValueError(f'Invalid paper reference file {fn}')
  77. paper_name = re.sub(r'\<.*?\>', '', indicator).strip()
  78. return paper_name, indicator
  79. def main():
  80. # Build output folders
  81. os.makedirs('model_zoo', exist_ok=True)
  82. os.makedirs('model_zoo_papers', exist_ok=True)
  83. # Collect all document contents
  84. model_doc_list = _get_model_docs()
  85. model_docs = Addict()
  86. for path in model_doc_list:
  87. task, dataset, keywords = _parse_model_doc_path(path)
  88. with open(path, 'r', encoding='utf-8') as f:
  89. doc = {
  90. 'task': task,
  91. 'dataset': dataset,
  92. 'keywords': keywords,
  93. 'path': path,
  94. 'content': f.read()
  95. }
  96. model_docs[task][dataset][keywords] = doc
  97. # Write files by task
  98. for task, dataset_dict in model_docs.items():
  99. lines = [f'# {task}', '']
  100. for dataset, keywords_dict in dataset_dict.items():
  101. lines += [
  102. '<hr/>', '<br/><br/>', '', f'## {titlecase(dataset)} Dataset',
  103. ''
  104. ]
  105. for keywords, doc in keywords_dict.items():
  106. keyword_strs = [
  107. titlecase(x.replace('_', ' ')) for x in keywords
  108. ]
  109. dataset_str = titlecase(dataset)
  110. if dataset_str in keyword_strs:
  111. keyword_strs.remove(dataset_str)
  112. lines += [
  113. '<br/>', '',
  114. (f'### {" + ".join(keyword_strs)}'
  115. f' on {dataset_str}'), '', doc['content'], ''
  116. ]
  117. fn = osp.join('model_zoo', f'{task.replace(" ", "_").lower()}.md')
  118. with open(fn, 'w', encoding='utf-8') as f:
  119. f.write('\n'.join(lines))
  120. # Write files by paper
  121. paper_refs = _get_paper_refs()
  122. for paper_cat, paper_list in paper_refs.items():
  123. lines = []
  124. for paper_fn in paper_list:
  125. paper_name, indicator = _parse_paper_ref(paper_fn)
  126. paperlines = []
  127. for task, dataset_dict in model_docs.items():
  128. for dataset, keywords_dict in dataset_dict.items():
  129. for keywords, doc_info in keywords_dict.items():
  130. if indicator not in doc_info['content']:
  131. continue
  132. keyword_strs = [
  133. titlecase(x.replace('_', ' ')) for x in keywords
  134. ]
  135. dataset_str = titlecase(dataset)
  136. if dataset_str in keyword_strs:
  137. keyword_strs.remove(dataset_str)
  138. paperlines += [
  139. '<br/>', '',
  140. (f'### {" + ".join(keyword_strs)}'
  141. f' on {dataset_str}'), '', doc_info['content'], ''
  142. ]
  143. if paperlines:
  144. lines += ['<hr/>', '<br/><br/>', '', f'## {paper_name}', '']
  145. lines += paperlines
  146. if lines:
  147. lines = [f'# {titlecase(paper_cat)}', ''] + lines
  148. with open(
  149. osp.join('model_zoo_papers', f'{paper_cat.lower()}.md'),
  150. 'w',
  151. encoding='utf-8') as f:
  152. f.write('\n'.join(lines))
  153. if __name__ == '__main__':
  154. print('collect model zoo documents')
  155. main()