# coding: utf-8
# @Author : lishipu
# @File : 06_principal_component_analyze.py
# -*- coding: utf-8 -*-
# 代码4-6 主成分分析降维
import pandas as pd
import xlwt
# 参数初始化
inputfile = '../data/principal_component.xls'
outputfile = '../tmp/dimention_reducted.xls' # 降维后的数据
data = pd.read_excel(inputfile, header = None) # 读入数据
print(data)
from sklearn.decomposition import PCA
pca = PCA()
pca.fit(data)
data_new=pca.fit_transform(data)
print(pd.DataFrame(data_new))
print(pca.components_) # 返回模型的各个特征向量
print(pca.explained_variance_ratio_) # 返回各个成分各自的方差百分比
# 代码4-7 计算成分结果
pca = PCA(3)# 指定每一行的数据都降维到3个
pca.fit(data)
low_d = pca.transform(data) # 用它来降低维度
print(pd.DataFrame(low_d))
pd.DataFrame(low_d).to_excel(outputfile,engine='openpyxl') # 保存结果
pca.inverse_transform(low_d) # 必要时可以用inverse_transform()函数来复原数据
0 1 2 3 4 5 6 7
0 40.4 24.7 7.2 6.1 8.3 8.7 2.442 20.0
1 25.0 12.7 11.2 11.0 12.9 20.2 3.542 9.1
2 13.2 3.3 3.9 4.3 4.4 5.5 0.578 3.6
3 22.3 6.7 5.6 3.7 6.0 7.4 0.176 7.3
4 34.3 11.8 7.1 7.1 8.0 8.9 1.726 27.5
5 35.6 12.5 16.4 16.7 22.8 29.3 3.017 26.6
6 22.0 7.8 9.9 10.2 12.6 17.6 0.847 10.6
7 48.4 13.4 10.9 9.9 10.9 13.9 1.772 17.8
8 40.6 19.1 19.8 19.0 29.7 39.6 2.449 35.8
9 24.8 8.0 9.8 8.9 11.9 16.2 0.789 13.7
10 12.5 9.7 4.2 4.2 4.6 6.5 0.874 3.9
11 1.8 0.6 0.7 0.7 0.8 1.1 0.056 1.0
12 32.3 13.9 9.4 8.3 9.8 13.3 2.126 17.1
13 38.5 9.1 11.3 9.5 12.2 16.4 1.327 11.6
0 1 2 ... 5 6 7
0 8.191337 16.904028 3.909910 ... -0.406842 -0.237412 -0.020154
1 0.285274 -6.480750 -4.628704 ... 0.827234 -0.172319 -0.158161
2 -23.707391 -2.852457 -0.496523 ... -0.513568 -0.207532 -0.214846
3 -14.432026 2.299173 -1.502722 ... 0.358937 -0.268193 0.184805
4 5.430457 10.007041 9.520869 ... 0.319293 0.140059 -0.100282
5 24.159559 -9.364286 0.726579 ... -0.767726 -0.552418 0.271990
6 -3.661346 -7.601986 -2.364399 ... -0.561515 0.625865 -0.110557
7 13.967612 13.891240 -6.449178 ... -0.163364 0.146968 -0.293035
8 40.880936 -13.256853 4.165394 ... 0.314046 0.063878 -0.239905
9 -1.748877 -4.231123 -0.589810 ... 0.078159 0.387908 0.274539
10 -21.943220 -2.366459 1.332038 ... -0.331192 0.324014 0.066672
11 -36.708681 -6.005366 3.971835 ... 0.251422 -0.313702 -0.147066
12 3.287507 4.863809 1.004247 ... 0.430980 0.253854 0.412293
13 5.998859 4.193989 -8.599537 ... 0.164137 -0.190969 0.073707
[14 rows x 8 columns]
0 1 2
0 8.191337 16.904028 3.909910
1 0.285274 -6.480750 -4.628704
2 -23.707391 -2.852457 -0.496523
3 -14.432026 2.299173 -1.502722
4 5.430457 10.007041 9.520869
5 24.159559 -9.364286 0.726579
6 -3.661346 -7.601986 -2.364399
7 13.967612 13.891240 -6.449178
8 40.880936 -13.256853 4.165394
9 -1.748877 -4.231123 -0.589810
10 -21.943220 -2.366459 1.332038
11 -36.708681 -6.005366 3.971835
12 3.287507 4.863809 1.004247
13 5.998859 4.193989 -8.599537