import numpy as np
# 数据集,一个二维数组,每一行代表一个样本,每一列代表一个特征
data = np.array([
   [11.51, 529.83, 526.49, 93.00, 71.45, 208.25],
   [12.09, 526.70, 524.66, 92.03, 84.59, 221.19],
   [12.40, 539.70, 539.50, 91.24, 91.00, 219.45],
   [12.94, 539.10, 539.30, 90.95, 92.40, 224.30]
])
# X = np.array([
#     [12.826, 533.63, 534.28, 2.0460, 244.24, 94.19],
#     [12.834, 530.88, 530.72, 1.8100, 238.24, 94.39],
#     [12.810, 535.02, 534.37, 1.6200, 233.73, 95.31],
#     [12.817, 534.02, 534.66, 1.4290, 227.45, 95.11]
# ])
# 标准化数据
data_std = (data - np.mean(data, axis=0)) / np.std(data, ddof=1, axis=0)
# # 计算协方差矩阵
cov_matrix = np.cov(data_std, rowvar=False)
# # 计算特征值和特征向量
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
#按特征值从大到小排序
indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[indices]
eigenvectors = eigenvectors[:, indices]
# 输出前几个主成分的解释方差比例
PCA_rate = eigenvalues/sum(eigenvalues)
print("各主成分的解释方差比例:")
for i, cv in enumerate(PCA_rate):
   print(f"前 {i+1} 个主成分: {cv:.4f}")
#第一主成分(对应最大特征值的特征向量)
pca1 = eigenvectors[:, 0]
# # 计算每个样本在第一主成分上的得分
pca1_scores = np.dot(data_std , pca1)
pca2 = eigenvectors[:, 1]
pca2_scores = np.dot(data_std , pca2)
pca_result = pca1_scores*PCA_rate[0]+pca2_scores*PCA_rate[1]
print(pca_result)