layer_decay_optimizer_constructor.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # Copyright (c) OpenMMLab. All rights reserved.
  2. import json
  3. from typing import List
  4. import torch.nn as nn
  5. from mmengine.dist import get_dist_info
  6. from mmengine.logging import MMLogger
  7. from mmengine.optim import DefaultOptimWrapperConstructor
  8. from mmdet.registry import OPTIM_WRAPPER_CONSTRUCTORS
  9. def get_layer_id_for_convnext(var_name, max_layer_id):
  10. """Get the layer id to set the different learning rates in ``layer_wise``
  11. decay_type.
  12. Args:
  13. var_name (str): The key of the model.
  14. max_layer_id (int): Maximum layer id.
  15. Returns:
  16. int: The id number corresponding to different learning rate in
  17. ``LearningRateDecayOptimizerConstructor``.
  18. """
  19. if var_name in ('backbone.cls_token', 'backbone.mask_token',
  20. 'backbone.pos_embed'):
  21. return 0
  22. elif var_name.startswith('backbone.downsample_layers'):
  23. stage_id = int(var_name.split('.')[2])
  24. if stage_id == 0:
  25. layer_id = 0
  26. elif stage_id == 1:
  27. layer_id = 2
  28. elif stage_id == 2:
  29. layer_id = 3
  30. elif stage_id == 3:
  31. layer_id = max_layer_id
  32. return layer_id
  33. elif var_name.startswith('backbone.stages'):
  34. stage_id = int(var_name.split('.')[2])
  35. block_id = int(var_name.split('.')[3])
  36. if stage_id == 0:
  37. layer_id = 1
  38. elif stage_id == 1:
  39. layer_id = 2
  40. elif stage_id == 2:
  41. layer_id = 3 + block_id // 3
  42. elif stage_id == 3:
  43. layer_id = max_layer_id
  44. return layer_id
  45. else:
  46. return max_layer_id + 1
  47. def get_stage_id_for_convnext(var_name, max_stage_id):
  48. """Get the stage id to set the different learning rates in ``stage_wise``
  49. decay_type.
  50. Args:
  51. var_name (str): The key of the model.
  52. max_stage_id (int): Maximum stage id.
  53. Returns:
  54. int: The id number corresponding to different learning rate in
  55. ``LearningRateDecayOptimizerConstructor``.
  56. """
  57. if var_name in ('backbone.cls_token', 'backbone.mask_token',
  58. 'backbone.pos_embed'):
  59. return 0
  60. elif var_name.startswith('backbone.downsample_layers'):
  61. return 0
  62. elif var_name.startswith('backbone.stages'):
  63. stage_id = int(var_name.split('.')[2])
  64. return stage_id + 1
  65. else:
  66. return max_stage_id - 1
  67. @OPTIM_WRAPPER_CONSTRUCTORS.register_module()
  68. class LearningRateDecayOptimizerConstructor(DefaultOptimWrapperConstructor):
  69. # Different learning rates are set for different layers of backbone.
  70. # Note: Currently, this optimizer constructor is built for ConvNeXt.
  71. def add_params(self, params: List[dict], module: nn.Module,
  72. **kwargs) -> None:
  73. """Add all parameters of module to the params list.
  74. The parameters of the given module will be added to the list of param
  75. groups, with specific rules defined by paramwise_cfg.
  76. Args:
  77. params (list[dict]): A list of param groups, it will be modified
  78. in place.
  79. module (nn.Module): The module to be added.
  80. """
  81. logger = MMLogger.get_current_instance()
  82. parameter_groups = {}
  83. logger.info(f'self.paramwise_cfg is {self.paramwise_cfg}')
  84. num_layers = self.paramwise_cfg.get('num_layers') + 2
  85. decay_rate = self.paramwise_cfg.get('decay_rate')
  86. decay_type = self.paramwise_cfg.get('decay_type', 'layer_wise')
  87. logger.info('Build LearningRateDecayOptimizerConstructor '
  88. f'{decay_type} {decay_rate} - {num_layers}')
  89. weight_decay = self.base_wd
  90. for name, param in module.named_parameters():
  91. if not param.requires_grad:
  92. continue # frozen weights
  93. if len(param.shape) == 1 or name.endswith('.bias') or name in (
  94. 'pos_embed', 'cls_token'):
  95. group_name = 'no_decay'
  96. this_weight_decay = 0.
  97. else:
  98. group_name = 'decay'
  99. this_weight_decay = weight_decay
  100. if 'layer_wise' in decay_type:
  101. if 'ConvNeXt' in module.backbone.__class__.__name__:
  102. layer_id = get_layer_id_for_convnext(
  103. name, self.paramwise_cfg.get('num_layers'))
  104. logger.info(f'set param {name} as id {layer_id}')
  105. else:
  106. raise NotImplementedError()
  107. elif decay_type == 'stage_wise':
  108. if 'ConvNeXt' in module.backbone.__class__.__name__:
  109. layer_id = get_stage_id_for_convnext(name, num_layers)
  110. logger.info(f'set param {name} as id {layer_id}')
  111. else:
  112. raise NotImplementedError()
  113. group_name = f'layer_{layer_id}_{group_name}'
  114. if group_name not in parameter_groups:
  115. scale = decay_rate**(num_layers - layer_id - 1)
  116. parameter_groups[group_name] = {
  117. 'weight_decay': this_weight_decay,
  118. 'params': [],
  119. 'param_names': [],
  120. 'lr_scale': scale,
  121. 'group_name': group_name,
  122. 'lr': scale * self.base_lr,
  123. }
  124. parameter_groups[group_name]['params'].append(param)
  125. parameter_groups[group_name]['param_names'].append(name)
  126. rank, _ = get_dist_info()
  127. if rank == 0:
  128. to_display = {}
  129. for key in parameter_groups:
  130. to_display[key] = {
  131. 'param_names': parameter_groups[key]['param_names'],
  132. 'lr_scale': parameter_groups[key]['lr_scale'],
  133. 'lr': parameter_groups[key]['lr'],
  134. 'weight_decay': parameter_groups[key]['weight_decay'],
  135. }
  136. logger.info(f'Param groups = {json.dumps(to_display, indent=2)}')
  137. params.extend(parameter_groups.values())