数据挖掘作业

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
inputfile = "C:/Users/23505/Desktop/学习资料/data.csv"
data = pd.read_csv(inputfile)
description = [data.min(),data.max(),data.mean(),data.std()]
description = pd.DataFrame(description,index = ['Min','Max','Mean','STD']).T
print('描述性统计结果:\n',np.round(description,2))

corr = data.corr(method = 'pearson')
print('相关系数矩阵为:\n',np.round(corr,2))
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.subplots(figsize = (10,10))
sns.heatmap(corr,annot = True,vmax = 1,square = True,cmap = 'Greens')
plt.title('学号2020310143047')
plt.show()
plt.close

 

 

import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso

inputfile = "C:/Users/23505/Desktop/学习资料/data.csv"
data = pd.read_csv(inputfile)
lasso = Lasso(1000)
lasso.fit(data.iloc[:,0:13],data['y'])
print('相关系数为:',np.round(lasso.coef_,5))
print('相关系数非零个数为:',np.sum(lasso.coef_ != 0))
mask = lasso.coef_ != 0
mask = np.append(mask,True)
print('相关系数是否为零:',mask)
outputfile = 'D:/anaconda/data/new_reg_data.csv'
new_reg_data = data.iloc[:,mask]
new_reg_data.to_csv(outputfile)
print('输出数据的维度为:',new_reg_data.shape)

 

posted @ 2023-03-06 12:42  永远幺幺  阅读(24)  评论(0)    收藏  举报