stats.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. #!/usr/bin/env python
  2. # Copyright (c) OpenMMLab. All rights reserved.
  3. import functools as func
  4. import glob
  5. import re
  6. from os.path import basename, splitext
  7. import numpy as np
  8. import titlecase
  9. def anchor(name):
  10. return re.sub(r'-+', '-', re.sub(r'[^a-zA-Z0-9]', '-',
  11. name.strip().lower())).strip('-')
  12. # Count algorithms
  13. files = sorted(glob.glob('model_zoo/*.md'))
  14. stats = []
  15. for f in files:
  16. with open(f, 'r') as content_file:
  17. content = content_file.read()
  18. # title
  19. title = content.split('\n')[0].replace('#', '')
  20. # count papers
  21. papers = set(
  22. (papertype, titlecase.titlecase(paper.lower().strip()))
  23. for (papertype, paper) in re.findall(
  24. r'<!--\s*\[([A-Z]*?)\]\s*-->\s*\n.*?\btitle\s*=\s*{(.*?)}',
  25. content, re.DOTALL))
  26. # paper links
  27. revcontent = '\n'.join(list(reversed(content.splitlines())))
  28. paperlinks = {}
  29. for _, p in papers:
  30. # print(p)
  31. paperlinks[p] = ', '.join(
  32. ((f'[{paperlink} ⇨]'
  33. f'(model_zoo/{splitext(basename(f))[0]}.html#'
  34. f'{anchor(paperlink)})') for paperlink in re.findall(
  35. rf'\btitle\s*=\s*{{\s*{p}\s*}}.*?\n### (.*?)\s*[,;]?\s*\n',
  36. revcontent, re.DOTALL | re.IGNORECASE)))
  37. # print(' ', paperlinks[p])
  38. paperlist = '\n'.join(
  39. sorted(f' - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
  40. # count configs
  41. configs = set(x.lower().strip()
  42. for x in re.findall(r'.*configs/.*\.py', content))
  43. # count ckpts
  44. ckpts = set(x.lower().strip()
  45. for x in re.findall(r'https://download.*\.pth', content)
  46. if 'mmpose' in x)
  47. statsmsg = f"""
  48. ## [{title}]({f})
  49. * Number of checkpoints: {len(ckpts)}
  50. * Number of configs: {len(configs)}
  51. * Number of papers: {len(papers)}
  52. {paperlist}
  53. """
  54. stats.append((papers, configs, ckpts, statsmsg))
  55. allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _, _ in stats])
  56. allconfigs = func.reduce(lambda a, b: a.union(b), [c for _, c, _, _ in stats])
  57. allckpts = func.reduce(lambda a, b: a.union(b), [c for _, _, c, _ in stats])
  58. # Summarize
  59. msglist = '\n'.join(x for _, _, _, x in stats)
  60. papertypes, papercounts = np.unique([t for t, _ in allpapers],
  61. return_counts=True)
  62. countstr = '\n'.join(
  63. [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
  64. modelzoo = f"""
  65. # Overview
  66. * Number of checkpoints: {len(allckpts)}
  67. * Number of configs: {len(allconfigs)}
  68. * Number of papers: {len(allpapers)}
  69. {countstr}
  70. For supported datasets, see [datasets overview](dataset_zoo.md).
  71. {msglist}
  72. """
  73. with open('model_zoo.md', 'w') as f:
  74. f.write(modelzoo)
  75. # Count datasets
  76. files = sorted(glob.glob('model_zoo/*.md'))
  77. # files = sorted(glob.glob('docs/tasks/*.md'))
  78. datastats = []
  79. for f in files:
  80. with open(f, 'r') as content_file:
  81. content = content_file.read()
  82. # title
  83. title = content.split('\n')[0].replace('#', '')
  84. # count papers
  85. papers = set(
  86. (papertype, titlecase.titlecase(paper.lower().strip()))
  87. for (papertype, paper) in re.findall(
  88. r'<!--\s*\[([A-Z]*?)\]\s*-->\s*\n.*?\btitle\s*=\s*{(.*?)}',
  89. content, re.DOTALL))
  90. # paper links
  91. revcontent = '\n'.join(list(reversed(content.splitlines())))
  92. paperlinks = {}
  93. for _, p in papers:
  94. # print(p)
  95. paperlinks[p] = ', '.join(
  96. (f'[{p} ⇨](model_zoo/{splitext(basename(f))[0]}.html#'
  97. f'{anchor(p)})' for p in re.findall(
  98. rf'\btitle\s*=\s*{{\s*{p}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
  99. revcontent, re.DOTALL | re.IGNORECASE)))
  100. # print(' ', paperlinks[p])
  101. paperlist = '\n'.join(
  102. sorted(f' - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
  103. # count configs
  104. configs = set(x.lower().strip()
  105. for x in re.findall(r'https.*configs/.*\.py', content))
  106. # count ckpts
  107. ckpts = set(x.lower().strip()
  108. for x in re.findall(r'https://download.*\.pth', content)
  109. if 'mmpose' in x)
  110. statsmsg = f"""
  111. ## [{title}]({f})
  112. * Number of papers: {len(papers)}
  113. {paperlist}
  114. """
  115. datastats.append((papers, configs, ckpts, statsmsg))
  116. alldatapapers = func.reduce(lambda a, b: a.union(b),
  117. [p for p, _, _, _ in datastats])
  118. # Summarize
  119. msglist = '\n'.join(x for _, _, _, x in stats)
  120. datamsglist = '\n'.join(x for _, _, _, x in datastats)
  121. papertypes, papercounts = np.unique([t for t, _ in alldatapapers],
  122. return_counts=True)
  123. countstr = '\n'.join(
  124. [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
  125. dataset_zoo = f"""
  126. # Overview
  127. * Number of papers: {len(alldatapapers)}
  128. {countstr}
  129. For supported pose algorithms, see [modelzoo overview](model_zoo.md).
  130. {datamsglist}
  131. """
  132. with open('dataset_zoo.md', 'w') as f:
  133. f.write(dataset_zoo)