sklearn中pca降维

# coding: utf-8
# @Author  : lishipu
# @File    : 06_principal_component_analyze.py

# -*- coding: utf-8 -*-

# 代码4-6 主成分分析降维
import pandas as pd
import xlwt
# 参数初始化
inputfile = '../data/principal_component.xls'
outputfile = '../tmp/dimention_reducted.xls'  # 降维后的数据

data = pd.read_excel(inputfile, header = None)  # 读入数据

print(data)

from sklearn.decomposition import PCA

pca = PCA()
pca.fit(data)
data_new=pca.fit_transform(data)
print(pd.DataFrame(data_new))
print(pca.components_)  # 返回模型的各个特征向量
print(pca.explained_variance_ratio_)  # 返回各个成分各自的方差百分比




# 代码4-7 计算成分结果
pca = PCA(3)# 指定每一行的数据都降维到3个
pca.fit(data)
low_d = pca.transform(data)  # 用它来降低维度
print(pd.DataFrame(low_d))
pd.DataFrame(low_d).to_excel(outputfile,engine='openpyxl')  # 保存结果
pca.inverse_transform(low_d)  # 必要时可以用inverse_transform()函数来复原数据


       0     1     2     3     4     5      6     7
0   40.4  24.7   7.2   6.1   8.3   8.7  2.442  20.0
1   25.0  12.7  11.2  11.0  12.9  20.2  3.542   9.1
2   13.2   3.3   3.9   4.3   4.4   5.5  0.578   3.6
3   22.3   6.7   5.6   3.7   6.0   7.4  0.176   7.3
4   34.3  11.8   7.1   7.1   8.0   8.9  1.726  27.5
5   35.6  12.5  16.4  16.7  22.8  29.3  3.017  26.6
6   22.0   7.8   9.9  10.2  12.6  17.6  0.847  10.6
7   48.4  13.4  10.9   9.9  10.9  13.9  1.772  17.8
8   40.6  19.1  19.8  19.0  29.7  39.6  2.449  35.8
9   24.8   8.0   9.8   8.9  11.9  16.2  0.789  13.7
10  12.5   9.7   4.2   4.2   4.6   6.5  0.874   3.9
11   1.8   0.6   0.7   0.7   0.8   1.1  0.056   1.0
12  32.3  13.9   9.4   8.3   9.8  13.3  2.126  17.1
13  38.5   9.1  11.3   9.5  12.2  16.4  1.327  11.6
            0          1         2  ...         5         6         7
0    8.191337  16.904028  3.909910  ... -0.406842 -0.237412 -0.020154
1    0.285274  -6.480750 -4.628704  ...  0.827234 -0.172319 -0.158161
2  -23.707391  -2.852457 -0.496523  ... -0.513568 -0.207532 -0.214846
3  -14.432026   2.299173 -1.502722  ...  0.358937 -0.268193  0.184805
4    5.430457  10.007041  9.520869  ...  0.319293  0.140059 -0.100282
5   24.159559  -9.364286  0.726579  ... -0.767726 -0.552418  0.271990
6   -3.661346  -7.601986 -2.364399  ... -0.561515  0.625865 -0.110557
7   13.967612  13.891240 -6.449178  ... -0.163364  0.146968 -0.293035
8   40.880936 -13.256853  4.165394  ...  0.314046  0.063878 -0.239905
9   -1.748877  -4.231123 -0.589810  ...  0.078159  0.387908  0.274539
10 -21.943220  -2.366459  1.332038  ... -0.331192  0.324014  0.066672
11 -36.708681  -6.005366  3.971835  ...  0.251422 -0.313702 -0.147066
12   3.287507   4.863809  1.004247  ...  0.430980  0.253854  0.412293
13   5.998859   4.193989 -8.599537  ...  0.164137 -0.190969  0.073707

[14 rows x 8 columns]
            0          1         2
0    8.191337  16.904028  3.909910
1    0.285274  -6.480750 -4.628704
2  -23.707391  -2.852457 -0.496523
3  -14.432026   2.299173 -1.502722
4    5.430457  10.007041  9.520869
5   24.159559  -9.364286  0.726579
6   -3.661346  -7.601986 -2.364399
7   13.967612  13.891240 -6.449178
8   40.880936 -13.256853  4.165394
9   -1.748877  -4.231123 -0.589810
10 -21.943220  -2.366459  1.332038
11 -36.708681  -6.005366  3.971835
12   3.287507   4.863809  1.004247
13   5.998859   4.193989 -8.599537

posted @ 2023-11-03 21:00  lipu123  阅读(56)  评论(0)    收藏  举报