机器学习任务7

# 导入必要的库

import numpy as np

import pandas as pd

from sklearn import datasets

from sklearn.model_selection import train_test_split

from sklearn.cluster import KMeans

from sklearn.metrics import silhouette_score, classification_report, accuracy_score

 

# 加载 iris 数据集

iris = datasets.load_iris()

X = iris.data  # 特征

y_true = iris.target  # 真实标签

 

# 使用留出法留出 1/3 的样本作为测试集

X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.33, random_state=42, stratify=y_true)

 

print(f"训练集样本数: {len(y_train)}, 测试集样本数: {len(y_test)}")

 

# 使用 K 均值聚类算法,类别数为 3

kmeans = KMeans(n_clusters=3, random_state=42)

 

# 训练模型

kmeans.fit(X_train)

 

# 计算训练集的轮廓系数

train_labels = kmeans.predict(X_train)

train_silhouette = silhouette_score(X_train, train_labels)

print(f"训练集轮廓系数: {train_silhouette:.4f}")

 

# 使用测试集测试模型性能

y_pred = kmeans.predict(X_test)

 

# 为聚类结果生成分类报告

# 注意:由于 K 均值聚类的输出与真实标签不一定相同,实际使用中需对聚类结果进行标签映射

report = classification_report(y_test, y_pred, output_dict=True)

 

# 输出测试集结果

print(f"\n测试集分类报告:\n {classification_report(y_test, y_pred)}")

print(f"准确度: {accuracy_score(y_test, y_pred)}")

print(f"聚类结果:\n{y_pred}")

posted @ 2024-11-08 10:53  芊羽鱼  阅读(9)  评论(0)    收藏  举报