11.29混淆矩阵展示

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.preprocessing import KBinsDiscretizer

# 提供文件的绝对路径
file_path = r'D:\BP_R_Data.xlsx' # 请替换为实际路径

# 尝试读取Excel文件
try:
df = pd.read_excel(file_path, sheet_name='Sheet1', engine='openpyxl')

# 假设最后一列为目标变量(混凝土抗压强度),其余为特征
target_column = df.columns[-1]
feature_columns = df.columns[:-1]

# 将目标变量进行离散化处理,分为多个等级
discretizer = KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='uniform')
y_discretized = discretizer.fit_transform(df[[target_column]]).astype(int).ravel()

# 分离特征和目标变量
X = df[feature_columns]
y = y_discretized

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 训练分类模型(这里使用随机森林分类器作为示例)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 在测试集上进行预测
y_pred = model.predict(X_test)

# 生成混淆矩阵
cm = confusion_matrix(y_test, y_pred)

# 使用matplotlib和seaborn绘制混淆矩阵
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.arange(discretizer.n_bins))

fig, ax = plt.subplots(figsize=(8, 6))
disp.plot(ax=ax, cmap='Blues')
plt.title('Confusion Matrix for Concrete Compressive Strength Classes')
plt.show()

except FileNotFoundError:
print(f"错误:无法找到文件,请检查路径是否正确:{file_path}")
except Exception as e:
print(f"发生错误:{e}")
posted @ 2024-12-29 15:03  liuxuechao  阅读(19)  评论(0)    收藏  举报