benchmark_train.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. import logging
  3. import os
  4. import os.path as osp
  5. from argparse import ArgumentParser
  6. from mmengine.config import Config, DictAction
  7. from mmengine.logging import MMLogger, print_log
  8. from mmengine.registry import RUNNERS
  9. from mmengine.runner import Runner
  10. from mmdet.testing import replace_to_ceph
  11. from mmdet.utils import register_all_modules, replace_cfg_vals
  12. def parse_args():
  13. parser = ArgumentParser()
  14. parser.add_argument('config', help='test config file path')
  15. parser.add_argument('--work-dir', help='the dir to save logs and models')
  16. parser.add_argument('--ceph', action='store_true')
  17. parser.add_argument('--save-ckpt', action='store_true')
  18. parser.add_argument(
  19. '--amp',
  20. action='store_true',
  21. default=False,
  22. help='enable automatic-mixed-precision training')
  23. parser.add_argument(
  24. '--auto-scale-lr',
  25. action='store_true',
  26. help='enable automatically scaling LR.')
  27. parser.add_argument(
  28. '--resume',
  29. action='store_true',
  30. help='resume from the latest checkpoint in the work_dir automatically')
  31. parser.add_argument(
  32. '--cfg-options',
  33. nargs='+',
  34. action=DictAction,
  35. help='override some settings in the used config, the key-value pair '
  36. 'in xxx=yyy format will be merged into config file. If the value to '
  37. 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
  38. 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
  39. 'Note that the quotation marks are necessary and that no white space '
  40. 'is allowed.')
  41. parser.add_argument(
  42. '--launcher',
  43. choices=['none', 'pytorch', 'slurm', 'mpi'],
  44. default='none',
  45. help='job launcher')
  46. parser.add_argument('--local_rank', type=int, default=0)
  47. args = parser.parse_args()
  48. if 'LOCAL_RANK' not in os.environ:
  49. os.environ['LOCAL_RANK'] = str(args.local_rank)
  50. args = parser.parse_args()
  51. return args
  52. # TODO: Need to refactor train.py so that it can be reused.
  53. def fast_train_model(config_name, args, logger=None):
  54. cfg = Config.fromfile(config_name)
  55. cfg = replace_cfg_vals(cfg)
  56. cfg.launcher = args.launcher
  57. if args.cfg_options is not None:
  58. cfg.merge_from_dict(args.cfg_options)
  59. # work_dir is determined in this priority: CLI > segment in file > filename
  60. if args.work_dir is not None:
  61. # update configs according to CLI args if args.work_dir is not None
  62. cfg.work_dir = osp.join(args.work_dir,
  63. osp.splitext(osp.basename(config_name))[0])
  64. elif cfg.get('work_dir', None) is None:
  65. # use config filename as default work_dir if cfg.work_dir is None
  66. cfg.work_dir = osp.join('./work_dirs',
  67. osp.splitext(osp.basename(config_name))[0])
  68. ckpt_hook = cfg.default_hooks.checkpoint
  69. by_epoch = ckpt_hook.get('by_epoch', True)
  70. fast_stop_hook = dict(type='FastStopTrainingHook')
  71. fast_stop_hook['by_epoch'] = by_epoch
  72. if args.save_ckpt:
  73. if by_epoch:
  74. interval = 1
  75. stop_iter_or_epoch = 2
  76. else:
  77. interval = 4
  78. stop_iter_or_epoch = 10
  79. fast_stop_hook['stop_iter_or_epoch'] = stop_iter_or_epoch
  80. fast_stop_hook['save_ckpt'] = True
  81. ckpt_hook.interval = interval
  82. if 'custom_hooks' in cfg:
  83. cfg.custom_hooks.append(fast_stop_hook)
  84. else:
  85. custom_hooks = [fast_stop_hook]
  86. cfg.custom_hooks = custom_hooks
  87. # TODO: temporary plan
  88. if 'visualizer' in cfg:
  89. if 'name' in cfg.visualizer:
  90. del cfg.visualizer.name
  91. # enable automatic-mixed-precision training
  92. if args.amp is True:
  93. optim_wrapper = cfg.optim_wrapper.type
  94. if optim_wrapper == 'AmpOptimWrapper':
  95. print_log(
  96. 'AMP training is already enabled in your config.',
  97. logger='current',
  98. level=logging.WARNING)
  99. else:
  100. assert optim_wrapper == 'OptimWrapper', (
  101. '`--amp` is only supported when the optimizer wrapper type is '
  102. f'`OptimWrapper` but got {optim_wrapper}.')
  103. cfg.optim_wrapper.type = 'AmpOptimWrapper'
  104. cfg.optim_wrapper.loss_scale = 'dynamic'
  105. # enable automatically scaling LR
  106. if args.auto_scale_lr:
  107. if 'auto_scale_lr' in cfg and \
  108. 'enable' in cfg.auto_scale_lr and \
  109. 'base_batch_size' in cfg.auto_scale_lr:
  110. cfg.auto_scale_lr.enable = True
  111. else:
  112. raise RuntimeError('Can not find "auto_scale_lr" or '
  113. '"auto_scale_lr.enable" or '
  114. '"auto_scale_lr.base_batch_size" in your'
  115. ' configuration file.')
  116. if args.ceph:
  117. replace_to_ceph(cfg)
  118. cfg.resume = args.resume
  119. # build the runner from config
  120. if 'runner_type' not in cfg:
  121. # build the default runner
  122. runner = Runner.from_cfg(cfg)
  123. else:
  124. # build customized runner from the registry
  125. # if 'runner_type' is set in the cfg
  126. runner = RUNNERS.build(cfg)
  127. runner.train()
  128. # Sample test whether the train code is correct
  129. def main(args):
  130. # register all modules in mmdet into the registries
  131. register_all_modules(init_default_scope=False)
  132. config = Config.fromfile(args.config)
  133. # test all model
  134. logger = MMLogger.get_instance(
  135. name='MMLogger',
  136. log_file='benchmark_train.log',
  137. log_level=logging.ERROR)
  138. for model_key in config:
  139. model_infos = config[model_key]
  140. if not isinstance(model_infos, list):
  141. model_infos = [model_infos]
  142. for model_info in model_infos:
  143. print('processing: ', model_info['config'], flush=True)
  144. config_name = model_info['config'].strip()
  145. try:
  146. fast_train_model(config_name, args, logger)
  147. except RuntimeError as e:
  148. # quick exit is the normal exit message
  149. if 'quick exit' not in repr(e):
  150. logger.error(f'{config_name} " : {repr(e)}')
  151. except Exception as e:
  152. logger.error(f'{config_name} " : {repr(e)}')
  153. if __name__ == '__main__':
  154. args = parse_args()
  155. main(args)