zhijian.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. # -*- coding: UTF-8 -*-
  2. '''
  3. @Project :jh5sonar_newest
  4. @File :zhijian.py
  5. @Author :Fang
  6. @Date :2024/7/26
  7. '''
  8. from hard_disk_storage import HardDiskStorage
  9. import pandas as pd
  10. hard_disk_db = HardDiskStorage()
  11. sql = "SELECT image_name, datetime FROM sonar_data ORDER BY datetime ASC LIMIT 10000;"
  12. var = ()
  13. res = hard_disk_db.execute_sql(sql, var)
  14. # 转换为 DataFrame
  15. df = pd.DataFrame(res)
  16. # 提取时间戳部分
  17. df['timestamp'] = df['image_name'].apply(lambda x: x.split('_')[0])
  18. df['label'] = df['image_name'].apply(lambda x: x.split('_')[1])
  19. # 统计每个时间戳的数据条数
  20. timestamp_counts = df.groupby('timestamp').size().reset_index(name='count')
  21. # 转换 datetime 字段为 datetime 类型
  22. df['datetime'] = pd.to_datetime(df['datetime'])
  23. timestamp_label = df.groupby(['timestamp', 'label']).size().unstack(fill_value=0)
  24. # 计算每组的最后一个 datetime 和下一组的第一个 datetime 的时间差
  25. timestamps = df['timestamp'].unique()
  26. time_differences = []
  27. for i in range(len(timestamps)):
  28. if i == 0:
  29. group_1_num = timestamp_counts[timestamp_counts['timestamp'] == timestamps[i]]['count'].values[0]
  30. group_1_first = df[df['timestamp'] == timestamps[i]]['datetime'].min()
  31. group_1_last = df[df['timestamp'] == timestamps[i]]['datetime'].max()
  32. local_time_diff = group_1_last - group_1_first
  33. local_total_seconds = local_time_diff.total_seconds()
  34. local_minutes = int(local_total_seconds // 60)
  35. local_seconds = int(local_total_seconds % 60)
  36. time_error_flag = ''
  37. num_error_flag = ''
  38. local_time_error_flag = ''
  39. minutes = 0
  40. seconds = 0
  41. time_differences.append({
  42. '时间戳': timestamps[i],
  43. '图片张数': group_1_num,
  44. 's数量':timestamp_label.loc[timestamps[i]].loc['s'],
  45. 'b数量':timestamp_label.loc[timestamps[i]].loc['b'],
  46. '第一组': group_1_first.strftime('%d日 %H:%M:%S'),
  47. '最后一组': group_1_last.strftime('%d日 %H:%M:%S'),
  48. '花费时间': f'{local_minutes} 分 {local_seconds} 秒',
  49. '与上一组间隔时间': f'{minutes} 分 {seconds} 秒',
  50. '间隔时间异常': time_error_flag,
  51. '采集数量异常': num_error_flag,
  52. '采集时间异常': local_time_error_flag,
  53. })
  54. continue
  55. time_error_flag = ''
  56. num_error_flag = ''
  57. local_time_error_flag = ''
  58. group_1_last = df[df['timestamp'] == timestamps[i]]['datetime'].max()
  59. group_1_first = df[df['timestamp'] == timestamps[i]]['datetime'].min()
  60. local_time_diff = group_1_last - group_1_first
  61. local_total_seconds = local_time_diff.total_seconds()
  62. local_minutes = int(local_total_seconds // 60)
  63. local_seconds = int(local_total_seconds % 60)
  64. group_2_last = df[df['timestamp'] == timestamps[i - 1]]['datetime'].max()
  65. time_diff = group_1_first - group_2_last
  66. # 转换时间间隔为秒
  67. total_seconds = time_diff.total_seconds()
  68. minutes = int(total_seconds // 60)
  69. seconds = int(total_seconds % 60)
  70. group_1_num = timestamp_counts[timestamp_counts['timestamp'] == timestamps[i]]['count'].values[0]
  71. group_2_num = timestamp_counts[timestamp_counts['timestamp'] == timestamps[i - 1]]['count'].values[0]
  72. num_diff = abs(group_1_num - group_2_num)
  73. # 判断是否大于5分钟
  74. if local_time_diff > pd.Timedelta(seconds=40) or local_time_diff < pd.Timedelta(seconds=20):
  75. local_time_error_flag = True
  76. if time_diff > pd.Timedelta(minutes=5):
  77. time_error_flag = True
  78. if num_diff > 10:
  79. num_error_flag = True
  80. time_differences.append({
  81. '时间戳': timestamps[i],
  82. '图片张数': group_1_num,
  83. 's数量': timestamp_label.loc[timestamps[i]].loc['s'],
  84. 'b数量': timestamp_label.loc[timestamps[i]].loc['b'],
  85. '第一组':group_1_first.strftime('%d日 %H:%M:%S'),
  86. '最后一组':group_1_last.strftime('%d日 %H:%M:%S'),
  87. '花费时间':f'{local_minutes} 分 {local_seconds} 秒',
  88. '与上一组间隔时间': f'{minutes} 分 {seconds} 秒',
  89. '间隔时间异常':time_error_flag,
  90. '采集数量异常':num_error_flag,
  91. '采集时间异常': local_time_error_flag,
  92. })
  93. # 自定义居中对齐的输出格式,确保表头和数据列对齐
  94. def print_centered(df):
  95. # 计算每列的最大宽度,考虑到列标题和数据的最大值
  96. col_widths = [max(df[col].astype(str).map(len).max(), len(col)) for col in df.columns]
  97. # 打印表头,并且确保宽度和数据一致
  98. header = " | ".join([f"{col:^{width}}" for col, width in zip(df.columns, col_widths)])
  99. print(header)
  100. print("-" * len(header)) # 打印分隔符,长度与表头一致
  101. # 打印每一行数据,保持列宽与表头一致
  102. for _, row in df.iterrows():
  103. print(" | ".join([f"{str(val):^{width}}" for val, width in zip(row, col_widths)]))
  104. # 转换为 DataFrame
  105. time_diff_df = pd.DataFrame(time_differences)
  106. # 显示时间间隔的结果
  107. print("每两组时间戳的时间间隔(分和秒):")
  108. print_centered(time_diff_df)
  109. # time_diff_df.to_excel('2024_08_21_001声呐.xlsx',index=False)