stats.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. #!/usr/bin/env python
  2. # Copyright (c) OpenMMLab. All rights reserved.
  3. import functools as func
  4. import glob
  5. import re
  6. from os.path import basename, splitext
  7. import numpy as np
  8. import titlecase
  9. def anchor(name):
  10. return re.sub(r'-+', '-', re.sub(r'[^a-zA-Z0-9]', '-',
  11. name.strip().lower())).strip('-')
  12. # Count algorithms
  13. files = sorted(glob.glob('model_zoo/*.md'))
  14. stats = []
  15. for f in files:
  16. with open(f, 'r') as content_file:
  17. content = content_file.read()
  18. # title
  19. title = content.split('\n')[0].replace('#', '')
  20. # count papers
  21. papers = set(
  22. (papertype, titlecase.titlecase(paper.lower().strip()))
  23. for (papertype, paper) in re.findall(
  24. r'<!--\s*\[([A-Z]*?)\]\s*-->\s*\n.*?\btitle\s*=\s*{(.*?)}',
  25. content, re.DOTALL))
  26. # paper links
  27. revcontent = '\n'.join(list(reversed(content.splitlines())))
  28. paperlinks = {}
  29. for _, p in papers:
  30. # print(p)
  31. paperlinks[p] = ', '.join(
  32. ((f'[{paperlink} ⇨]'
  33. f'(model_zoo/{splitext(basename(f))[0]}.html#'
  34. f'{anchor(paperlink)})') for paperlink in re.findall(
  35. rf'\btitle\s*=\s*{{\s*{p}\s*}}.*?\n### (.*?)\s*[,;]?\s*\n',
  36. revcontent, re.DOTALL | re.IGNORECASE)))
  37. # print(' ', paperlinks[p])
  38. paperlist = '\n'.join(
  39. sorted(f' - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
  40. # count configs
  41. configs = set(x.lower().strip()
  42. for x in re.findall(r'.*configs/.*\.py', content))
  43. # count ckpts
  44. ckpts = set(x.lower().strip()
  45. for x in re.findall(r'https://download.*\.pth', content)
  46. if 'mmpose' in x)
  47. statsmsg = f"""
  48. ## [{title}]({f})
  49. * 模型权重文件数量: {len(ckpts)}
  50. * 配置文件数量: {len(configs)}
  51. * 论文数量: {len(papers)}
  52. {paperlist}
  53. """
  54. stats.append((papers, configs, ckpts, statsmsg))
  55. allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _, _ in stats])
  56. allconfigs = func.reduce(lambda a, b: a.union(b), [c for _, c, _, _ in stats])
  57. allckpts = func.reduce(lambda a, b: a.union(b), [c for _, _, c, _ in stats])
  58. # Summarize
  59. msglist = '\n'.join(x for _, _, _, x in stats)
  60. papertypes, papercounts = np.unique([t for t, _ in allpapers],
  61. return_counts=True)
  62. countstr = '\n'.join(
  63. [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
  64. modelzoo = f"""
  65. # 概览
  66. * 模型权重文件数量: {len(allckpts)}
  67. * 配置文件数量: {len(allconfigs)}
  68. * 论文数量: {len(allpapers)}
  69. {countstr}
  70. 已支持的数据集详细信息请见 [数据集](dataset_zoo.md).
  71. {msglist}
  72. """
  73. with open('model_zoo.md', 'w') as f:
  74. f.write(modelzoo)
  75. # Count datasets
  76. files = sorted(glob.glob('model_zoo/*.md'))
  77. # files = sorted(glob.glob('docs/tasks/*.md'))
  78. datastats = []
  79. for f in files:
  80. with open(f, 'r') as content_file:
  81. content = content_file.read()
  82. # title
  83. title = content.split('\n')[0].replace('#', '')
  84. # count papers
  85. papers = set(
  86. (papertype, titlecase.titlecase(paper.lower().strip()))
  87. for (papertype, paper) in re.findall(
  88. r'<!--\s*\[([A-Z]*?)\]\s*-->\s*\n.*?\btitle\s*=\s*{(.*?)}',
  89. content, re.DOTALL))
  90. # paper links
  91. revcontent = '\n'.join(list(reversed(content.splitlines())))
  92. paperlinks = {}
  93. for _, p in papers:
  94. # print(p)
  95. paperlinks[p] = ', '.join(
  96. (f'[{p} ⇨](model_zoo/{splitext(basename(f))[0]}.html#'
  97. f'{anchor(p)})' for p in re.findall(
  98. rf'\btitle\s*=\s*{{\s*{p}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
  99. revcontent, re.DOTALL | re.IGNORECASE)))
  100. # print(' ', paperlinks[p])
  101. paperlist = '\n'.join(
  102. sorted(f' - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
  103. # count configs
  104. configs = set(x.lower().strip()
  105. for x in re.findall(r'https.*configs/.*\.py', content))
  106. # count ckpts
  107. ckpts = set(x.lower().strip()
  108. for x in re.findall(r'https://download.*\.pth', content)
  109. if 'mmpose' in x)
  110. statsmsg = f"""
  111. ## [{title}]({f})
  112. * 论文数量: {len(papers)}
  113. {paperlist}
  114. """
  115. datastats.append((papers, configs, ckpts, statsmsg))
  116. alldatapapers = func.reduce(lambda a, b: a.union(b),
  117. [p for p, _, _, _ in datastats])
  118. # Summarize
  119. msglist = '\n'.join(x for _, _, _, x in stats)
  120. datamsglist = '\n'.join(x for _, _, _, x in datastats)
  121. papertypes, papercounts = np.unique([t for t, _ in alldatapapers],
  122. return_counts=True)
  123. countstr = '\n'.join(
  124. [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
  125. dataset_zoo = f"""
  126. # 概览
  127. * 论文数量: {len(alldatapapers)}
  128. {countstr}
  129. 已支持的算法详细信息请见 [模型池](model_zoo.md).
  130. {datamsglist}
  131. """
  132. with open('dataset_zoo.md', 'w') as f:
  133. f.write(dataset_zoo)