import argparse import numpy as np import torch from import py_checkpoint_reader torch.set_printoptions(precision=20) def tf2pth(v): if v.ndim == 4: return np.ascontiguousarray(v.transpose(3, 2, 0, 1)) elif v.ndim == 2: return np.ascontiguousarray(v.transpose()) return v def convert_key(model_name, bifpn_repeats, weights): p6_w1 = [ torch.tensor([-1e4, -1e4], dtype=torch.float64) for _ in range(bifpn_repeats) ] p5_w1 = [ torch.tensor([-1e4, -1e4], dtype=torch.float64) for _ in range(bifpn_repeats) ] p4_w1 = [ torch.tensor([-1e4, -1e4], dtype=torch.float64) for _ in range(bifpn_repeats) ] p3_w1 = [ torch.tensor([-1e4, -1e4], dtype=torch.float64) for _ in range(bifpn_repeats) ] p4_w2 = [ torch.tensor([-1e4, -1e4, -1e4], dtype=torch.float64) for _ in range(bifpn_repeats) ] p5_w2 = [ torch.tensor([-1e4, -1e4, -1e4], dtype=torch.float64) for _ in range(bifpn_repeats) ] p6_w2 = [ torch.tensor([-1e4, -1e4, -1e4], dtype=torch.float64) for _ in range(bifpn_repeats) ] p7_w2 = [ torch.tensor([-1e4, -1e4], dtype=torch.float64) for _ in range(bifpn_repeats) ] idx2key = { 0: '1.0', 1: '2.0', 2: '2.1', 3: '3.0', 4: '3.1', 5: '4.0', 6: '4.1', 7: '4.2', 8: '4.3', 9: '4.4', 10: '4.5', 11: '5.0', 12: '5.1', 13: '5.2', 14: '5.3', 15: '5.4' } m = dict() for k, v in weights.items(): if 'Exponential' in k or 'global_step' in k: continue seg = k.split('/') if len(seg) == 1: continue if seg[2] == 'depthwise_conv2d': v = v.transpose(1, 0) if seg[0] == model_name: if seg[1] == 'stem': prefix = 'backbone.layers.0' mapping = { 'conv2d/kernel': 'conv.weight', 'tpu_batch_normalization/beta': 'bn.bias', 'tpu_batch_normalization/gamma': 'bn.weight', 'tpu_batch_normalization/moving_mean': 'bn.running_mean', 'tpu_batch_normalization/moving_variance': 'bn.running_var', } suffix = mapping['/'.join(seg[2:])] m[prefix + '.' + suffix] = v elif seg[1].startswith('blocks_'): idx = int(seg[1][7:]) prefix = '.'.join(['backbone', 'layers', idx2key[idx]]) base_mapping = { 'depthwise_conv2d/depthwise_kernel': 'depthwise_conv.conv.weight', 'se/conv2d/kernel': 'se.conv1.conv.weight', 'se/conv2d/bias': 'se.conv1.conv.bias', 'se/conv2d_1/kernel': 'se.conv2.conv.weight', 'se/conv2d_1/bias': 'se.conv2.conv.bias' } if idx == 0: mapping = { 'conv2d/kernel': 'linear_conv.conv.weight', 'tpu_batch_normalization/beta': '', 'tpu_batch_normalization/gamma': '', 'tpu_batch_normalization/moving_mean': '', 'tpu_batch_normalization/moving_variance': '', 'tpu_batch_normalization_1/beta': '', 'tpu_batch_normalization_1/gamma': '', 'tpu_batch_normalization_1/moving_mean': '', 'tpu_batch_normalization_1/moving_variance': '', } else: mapping = { 'depthwise_conv2d/depthwise_kernel': 'depthwise_conv.conv.weight', 'conv2d/kernel': 'expand_conv.conv.weight', 'conv2d_1/kernel': 'linear_conv.conv.weight', 'tpu_batch_normalization/beta': '', 'tpu_batch_normalization/gamma': '', 'tpu_batch_normalization/moving_mean': '', 'tpu_batch_normalization/moving_variance': '', 'tpu_batch_normalization_1/beta': '', 'tpu_batch_normalization_1/gamma': '', 'tpu_batch_normalization_1/moving_mean': '', 'tpu_batch_normalization_1/moving_variance': '', 'tpu_batch_normalization_2/beta': '', 'tpu_batch_normalization_2/gamma': '', 'tpu_batch_normalization_2/moving_mean': '', 'tpu_batch_normalization_2/moving_variance': '', } mapping.update(base_mapping) suffix = mapping['/'.join(seg[2:])] m[prefix + '.' + suffix] = v elif seg[0] == 'resample_p6': prefix = 'neck.bifpn.0.p5_to_p6.0' mapping = { 'conv2d/kernel': 'down_conv.weight', 'conv2d/bias': 'down_conv.bias', 'bn/beta': 'bn.bias', 'bn/gamma': 'bn.weight', 'bn/moving_mean': 'bn.running_mean', 'bn/moving_variance': 'bn.running_var', } suffix = mapping['/'.join(seg[1:])] m[prefix + '.' + suffix] = v elif seg[0] == 'fpn_cells': fpn_idx = int(seg[1][5:]) prefix = '.'.join(['neck', 'bifpn', str(fpn_idx)]) fnode_id = int(seg[2][5]) if fnode_id == 0: mapping = { 'op_after_combine5/conv/depthwise_kernel': 'conv6_up.depthwise_conv.weight', 'op_after_combine5/conv/pointwise_kernel': 'conv6_up.pointwise_conv.weight', 'op_after_combine5/conv/bias': 'conv6_up.pointwise_conv.bias', 'op_after_combine5/bn/beta': '', 'op_after_combine5/bn/gamma': '', 'op_after_combine5/bn/moving_mean': '', 'op_after_combine5/bn/moving_variance': '', } if seg[3] != 'WSM' and seg[3] != 'WSM_1': suffix = mapping['/'.join(seg[3:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v elif seg[3] == 'WSM': p6_w1[fpn_idx][0] = v elif seg[3] == 'WSM_1': p6_w1[fpn_idx][1] = v if torch.min(p6_w1[fpn_idx]) > -1e4: m[prefix + '.p6_w1'] = p6_w1[fpn_idx] elif fnode_id == 1: base_mapping = { 'op_after_combine6/conv/depthwise_kernel': 'conv5_up.depthwise_conv.weight', 'op_after_combine6/conv/pointwise_kernel': 'conv5_up.pointwise_conv.weight', 'op_after_combine6/conv/bias': 'conv5_up.pointwise_conv.bias', 'op_after_combine6/bn/beta': '', 'op_after_combine6/bn/gamma': '', 'op_after_combine6/bn/moving_mean': '', 'op_after_combine6/bn/moving_variance': '', } if fpn_idx == 0: mapping = { 'resample_0_2_6/conv2d/kernel': 'p5_down_channel.down_conv.weight', 'resample_0_2_6/conv2d/bias': 'p5_down_channel.down_conv.bias', 'resample_0_2_6/bn/beta': '', 'resample_0_2_6/bn/gamma': '', 'resample_0_2_6/bn/moving_mean': '', 'resample_0_2_6/bn/moving_variance': '', } base_mapping.update(mapping) if seg[3] != 'WSM' and seg[3] != 'WSM_1': suffix = base_mapping['/'.join(seg[3:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v elif seg[3] == 'WSM': p5_w1[fpn_idx][0] = v elif seg[3] == 'WSM_1': p5_w1[fpn_idx][1] = v if torch.min(p5_w1[fpn_idx]) > -1e4: m[prefix + '.p5_w1'] = p5_w1[fpn_idx] elif fnode_id == 2: base_mapping = { 'op_after_combine7/conv/depthwise_kernel': 'conv4_up.depthwise_conv.weight', 'op_after_combine7/conv/pointwise_kernel': 'conv4_up.pointwise_conv.weight', 'op_after_combine7/conv/bias': 'conv4_up.pointwise_conv.bias', 'op_after_combine7/bn/beta': '', 'op_after_combine7/bn/gamma': '', 'op_after_combine7/bn/moving_mean': '', 'op_after_combine7/bn/moving_variance': '', } if fpn_idx == 0: mapping = { 'resample_0_1_7/conv2d/kernel': 'p4_down_channel.down_conv.weight', 'resample_0_1_7/conv2d/bias': 'p4_down_channel.down_conv.bias', 'resample_0_1_7/bn/beta': '', 'resample_0_1_7/bn/gamma': '', 'resample_0_1_7/bn/moving_mean': '', 'resample_0_1_7/bn/moving_variance': '', } base_mapping.update(mapping) if seg[3] != 'WSM' and seg[3] != 'WSM_1': suffix = base_mapping['/'.join(seg[3:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v elif seg[3] == 'WSM': p4_w1[fpn_idx][0] = v elif seg[3] == 'WSM_1': p4_w1[fpn_idx][1] = v if torch.min(p4_w1[fpn_idx]) > -1e4: m[prefix + '.p4_w1'] = p4_w1[fpn_idx] elif fnode_id == 3: base_mapping = { 'op_after_combine8/conv/depthwise_kernel': 'conv3_up.depthwise_conv.weight', 'op_after_combine8/conv/pointwise_kernel': 'conv3_up.pointwise_conv.weight', 'op_after_combine8/conv/bias': 'conv3_up.pointwise_conv.bias', 'op_after_combine8/bn/beta': '', 'op_after_combine8/bn/gamma': '', 'op_after_combine8/bn/moving_mean': '', 'op_after_combine8/bn/moving_variance': '', } if fpn_idx == 0: mapping = { 'resample_0_0_8/conv2d/kernel': 'p3_down_channel.down_conv.weight', 'resample_0_0_8/conv2d/bias': 'p3_down_channel.down_conv.bias', 'resample_0_0_8/bn/beta': '', 'resample_0_0_8/bn/gamma': '', 'resample_0_0_8/bn/moving_mean': '', 'resample_0_0_8/bn/moving_variance': '', } base_mapping.update(mapping) if seg[3] != 'WSM' and seg[3] != 'WSM_1': suffix = base_mapping['/'.join(seg[3:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v elif seg[3] == 'WSM': p3_w1[fpn_idx][0] = v elif seg[3] == 'WSM_1': p3_w1[fpn_idx][1] = v if torch.min(p3_w1[fpn_idx]) > -1e4: m[prefix + '.p3_w1'] = p3_w1[fpn_idx] elif fnode_id == 4: base_mapping = { 'op_after_combine9/conv/depthwise_kernel': 'conv4_down.depthwise_conv.weight', 'op_after_combine9/conv/pointwise_kernel': 'conv4_down.pointwise_conv.weight', 'op_after_combine9/conv/bias': 'conv4_down.pointwise_conv.bias', 'op_after_combine9/bn/beta': '', 'op_after_combine9/bn/gamma': '', 'op_after_combine9/bn/moving_mean': '', 'op_after_combine9/bn/moving_variance': '', } if fpn_idx == 0: mapping = { 'resample_0_1_9/conv2d/kernel': 'p4_level_connection.down_conv.weight', 'resample_0_1_9/conv2d/bias': 'p4_level_connection.down_conv.bias', 'resample_0_1_9/bn/beta': '', 'resample_0_1_9/bn/gamma': '', 'resample_0_1_9/bn/moving_mean': '', 'resample_0_1_9/bn/moving_variance': '', } base_mapping.update(mapping) if seg[3] != 'WSM' and seg[3] != 'WSM_1' and seg[3] != 'WSM_2': suffix = base_mapping['/'.join(seg[3:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v elif seg[3] == 'WSM': p4_w2[fpn_idx][0] = v elif seg[3] == 'WSM_1': p4_w2[fpn_idx][1] = v elif seg[3] == 'WSM_2': p4_w2[fpn_idx][2] = v if torch.min(p4_w2[fpn_idx]) > -1e4: m[prefix + '.p4_w2'] = p4_w2[fpn_idx] elif fnode_id == 5: base_mapping = { 'op_after_combine10/conv/depthwise_kernel': 'conv5_down.depthwise_conv.weight', 'op_after_combine10/conv/pointwise_kernel': 'conv5_down.pointwise_conv.weight', 'op_after_combine10/conv/bias': 'conv5_down.pointwise_conv.bias', 'op_after_combine10/bn/beta': '', 'op_after_combine10/bn/gamma': '', 'op_after_combine10/bn/moving_mean': '', 'op_after_combine10/bn/moving_variance': '', } if fpn_idx == 0: mapping = { 'resample_0_2_10/conv2d/kernel': 'p5_level_connection.down_conv.weight', 'resample_0_2_10/conv2d/bias': 'p5_level_connection.down_conv.bias', 'resample_0_2_10/bn/beta': '', 'resample_0_2_10/bn/gamma': '', 'resample_0_2_10/bn/moving_mean': '', 'resample_0_2_10/bn/moving_variance': '', } base_mapping.update(mapping) if seg[3] != 'WSM' and seg[3] != 'WSM_1' and seg[3] != 'WSM_2': suffix = base_mapping['/'.join(seg[3:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v elif seg[3] == 'WSM': p5_w2[fpn_idx][0] = v elif seg[3] == 'WSM_1': p5_w2[fpn_idx][1] = v elif seg[3] == 'WSM_2': p5_w2[fpn_idx][2] = v if torch.min(p5_w2[fpn_idx]) > -1e4: m[prefix + '.p5_w2'] = p5_w2[fpn_idx] elif fnode_id == 6: base_mapping = { 'op_after_combine11/conv/depthwise_kernel': 'conv6_down.depthwise_conv.weight', 'op_after_combine11/conv/pointwise_kernel': 'conv6_down.pointwise_conv.weight', 'op_after_combine11/conv/bias': 'conv6_down.pointwise_conv.bias', 'op_after_combine11/bn/beta': '', 'op_after_combine11/bn/gamma': '', 'op_after_combine11/bn/moving_mean': '', 'op_after_combine11/bn/moving_variance': '', } if seg[3] != 'WSM' and seg[3] != 'WSM_1' and seg[3] != 'WSM_2': suffix = base_mapping['/'.join(seg[3:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v elif seg[3] == 'WSM': p6_w2[fpn_idx][0] = v elif seg[3] == 'WSM_1': p6_w2[fpn_idx][1] = v elif seg[3] == 'WSM_2': p6_w2[fpn_idx][2] = v if torch.min(p6_w2[fpn_idx]) > -1e4: m[prefix + '.p6_w2'] = p6_w2[fpn_idx] elif fnode_id == 7: base_mapping = { 'op_after_combine12/conv/depthwise_kernel': 'conv7_down.depthwise_conv.weight', 'op_after_combine12/conv/pointwise_kernel': 'conv7_down.pointwise_conv.weight', 'op_after_combine12/conv/bias': 'conv7_down.pointwise_conv.bias', 'op_after_combine12/bn/beta': '', 'op_after_combine12/bn/gamma': '', 'op_after_combine12/bn/moving_mean': '', 'op_after_combine12/bn/moving_variance': '', } if seg[3] != 'WSM' and seg[3] != 'WSM_1' and seg[3] != 'WSM_2': suffix = base_mapping['/'.join(seg[3:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v elif seg[3] == 'WSM': p7_w2[fpn_idx][0] = v elif seg[3] == 'WSM_1': p7_w2[fpn_idx][1] = v if torch.min(p7_w2[fpn_idx]) > -1e4: m[prefix + '.p7_w2'] = p7_w2[fpn_idx] elif seg[0] == 'box_net': if 'box-predict' in seg[1]: prefix = '.'.join(['bbox_head', 'reg_header']) base_mapping = { 'depthwise_kernel': 'depthwise_conv.weight', 'pointwise_kernel': 'pointwise_conv.weight', 'bias': 'pointwise_conv.bias' } suffix = base_mapping['/'.join(seg[2:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v elif 'bn' in seg[1]: bbox_conv_idx = int(seg[1][4]) bbox_bn_idx = int(seg[1][9]) - 3 prefix = '.'.join([ 'bbox_head', 'reg_bn_list', str(bbox_conv_idx), str(bbox_bn_idx) ]) base_mapping = { 'beta': 'bias', 'gamma': 'weight', 'moving_mean': 'running_mean', 'moving_variance': 'running_var' } suffix = base_mapping['/'.join(seg[2:])] m[prefix + '.' + suffix] = v else: bbox_conv_idx = int(seg[1][4]) prefix = '.'.join( ['bbox_head', 'reg_conv_list', str(bbox_conv_idx)]) base_mapping = { 'depthwise_kernel': 'depthwise_conv.weight', 'pointwise_kernel': 'pointwise_conv.weight', 'bias': 'pointwise_conv.bias' } suffix = base_mapping['/'.join(seg[2:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v elif seg[0] == 'class_net': if 'class-predict' in seg[1]: prefix = '.'.join(['bbox_head', 'cls_header']) base_mapping = { 'depthwise_kernel': 'depthwise_conv.weight', 'pointwise_kernel': 'pointwise_conv.weight', 'bias': 'pointwise_conv.bias' } suffix = base_mapping['/'.join(seg[2:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v elif 'bn' in seg[1]: cls_conv_idx = int(seg[1][6]) cls_bn_idx = int(seg[1][11]) - 3 prefix = '.'.join([ 'bbox_head', 'cls_bn_list', str(cls_conv_idx), str(cls_bn_idx) ]) base_mapping = { 'beta': 'bias', 'gamma': 'weight', 'moving_mean': 'running_mean', 'moving_variance': 'running_var' } suffix = base_mapping['/'.join(seg[2:])] m[prefix + '.' + suffix] = v else: cls_conv_idx = int(seg[1][6]) prefix = '.'.join( ['bbox_head', 'cls_conv_list', str(cls_conv_idx)]) base_mapping = { 'depthwise_kernel': 'depthwise_conv.weight', 'pointwise_kernel': 'pointwise_conv.weight', 'bias': 'pointwise_conv.bias' } suffix = base_mapping['/'.join(seg[2:])] if 'depthwise_conv' in suffix: v = v.transpose(1, 0) m[prefix + '.' + suffix] = v return m def parse_args(): parser = argparse.ArgumentParser( description='convert efficientdet weight from tensorflow to pytorch') parser.add_argument( '--backbone', type=str, help='efficientnet model name, like efficientnet-b0') parser.add_argument( '--tensorflow_weight', type=str, help='efficientdet tensorflow weight name, like efficientdet-d0/model') parser.add_argument( '--out_weight', type=str, help='efficientdet pytorch weight name like demo.pth') args = parser.parse_args() return args def main(): args = parse_args() model_name = args.backbone ori_weight_name = args.tensorflow_weight out_name = args.out_weight repeat_map = { 0: 3, 1: 4, 2: 5, 3: 6, 4: 7, 5: 7, 6: 8, 7: 8, } reader = py_checkpoint_reader.NewCheckpointReader(ori_weight_name) weights = { n: torch.as_tensor(tf2pth(reader.get_tensor(n))) for (n, _) in reader.get_variable_to_shape_map().items() } bifpn_repeats = repeat_map[int(model_name[14])] out = convert_key(model_name, bifpn_repeats, weights) result = {'state_dict': out}, out_name) if __name__ == '__main__': main()