数据分布与统计

import re
import matplotlib.pyplot as plt
import numpy as np

log_file = "net8.0\output.log"  # 替换为实际日志文件路径

latencies = []
pattern = re.compile(r"Audit Prompt End, latency: (\d+)")

with open(log_file, "r", encoding="utf-8") as f:
    for line in f:
        match = pattern.search(line)
        if match:
            latencies.append(int(match.group(1)))

if not latencies:
    print("未找到任何 latency 数据")
    exit(0)

# 打印统计数据
latencies_np = np.array(latencies)
print(f"总数: {len(latencies)}")
print(f"平均值: {latencies_np.mean():.2f} ms")
print(f"最大值: {latencies_np.max()} ms")
print(f"最小值: {latencies_np.min()} ms")
print(f"中位数: {np.median(latencies_np):.2f} ms")
print(f"标准差: {latencies_np.std():.2f} ms")

# 分桶
bins = list(range(0, max(latencies) + 100, 100))
counts, edges, patches = plt.hist(latencies, bins=bins, edgecolor='black')
plt.xlabel('Latency (ms)')
plt.ylabel('Count')
plt.title('Audit Prompt End Latency Distribution')
plt.grid(axis='y')
plt.tight_layout()

# 在每个柱子上方标注数量
for count, edge in zip(counts, edges[:-1]):
    plt.text(edge + 50, count, str(int(count)), ha='center', va='bottom')

plt.show()

posted @ 2025-06-24 16:18  Josen_Earth  阅读(5)  评论(0)    收藏  举报