stat.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. #!/usr/bin/env python
  2. import functools as func
  3. import glob
  4. import os.path as osp
  5. import re
  6. import numpy as np
  7. url_prefix = 'https://github.com/open-mmlab/mmdetection/blob/main/'
  8. files = sorted(glob.glob('../configs/*/README.md'))
  9. stats = []
  10. titles = []
  11. num_ckpts = 0
  12. for f in files:
  13. url = osp.dirname(f.replace('../', url_prefix))
  14. with open(f, 'r') as content_file:
  15. content = content_file.read()
  16. title = content.split('\n')[0].replace('# ', '').strip()
  17. ckpts = set(x.lower().strip()
  18. for x in re.findall(r'\[model\]\((https?.*)\)', content))
  19. if len(ckpts) == 0:
  20. continue
  21. _papertype = [x for x in re.findall(r'\[([A-Z]+)\]', content)]
  22. assert len(_papertype) > 0
  23. papertype = _papertype[0]
  24. paper = set([(papertype, title)])
  25. titles.append(title)
  26. num_ckpts += len(ckpts)
  27. statsmsg = f"""
  28. \t* [{papertype}] [{title}]({url}) ({len(ckpts)} ckpts)
  29. """
  30. stats.append((paper, ckpts, statsmsg))
  31. allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats])
  32. msglist = '\n'.join(x for _, _, x in stats)
  33. papertypes, papercounts = np.unique([t for t, _ in allpapers],
  34. return_counts=True)
  35. countstr = '\n'.join(
  36. [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
  37. modelzoo = f"""
  38. # Model Zoo Statistics
  39. * Number of papers: {len(set(titles))}
  40. {countstr}
  41. * Number of checkpoints: {num_ckpts}
  42. {msglist}
  43. """
  44. with open('modelzoo_statistics.md', 'w') as f:
  45. f.write(modelzoo)