机器学习 day1

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import seaborn as sns

#data = np.array([[3, -1.5, 2, -5.4], [0, 4, -0.3, 2.1], [1, 3.3, -1.9, -4.3]])
#对数据的预处理 求均值 标准差 标准化
'''   
print(data)
print("Mean: ",data.mean(axis = 0))  #main函数用于返回 算数平均数
#axis声明了函数计算所作用的数据轴 (0表示列, 1表示行)
print("Standard Deviation: ",data.std(axis = 0))  #std用于返回标准差
data_standardized = preprocessing.scale(data)
#scale 函数对数据进行标准化操作 即:以均值为中心点,并调整大小得到单位方差
print("Mean: ",data_standardized.mean(axis = 0))
print("Standard Deviation: ",data_standardized.std(axis = 0))
'''
#数据缩放
#先看看未缩放前:
'''
print("Min: ", data.min(axis = 0))
print("Max: ", data.max(axis = 0))
data_scaler = preprocessing.MinMaxScaler(feature_range = (0, 1))
data_scaled = data_scaler.fit_transform(data)
#这里是缩放之后的:
print("Min: ", data_scaled.min(axis = 0))
print("Max: ", data_scaled.max(axis = 0))
print(data_scaled)
'''
#归一化(失败了)
'''
data_normalized = preprocessing.normalize(data, axis = 0)
#书上代码是这样 但这样会报错……
data_normalized = preprocessing.normalize(data, norm = '11', axis = 0)
print(data_normalized)
#校验数组各列数据总和是否为1
data_norm_abs = np.abs(data_normalized)
print(data_norm_abs.sum(axis = 0))
'''
#二值化(用于数字图像处理领域)
'''
data_binarized = preprocessing.Binarizer(threshold = 1.4).transform(data)
print(data_binarized)
'''
#one-hot编码(用于稀疏的数据)
'''
data = np.array([[1, 1, 2], [0, 2, 3], [1, 0, 1], [0, 1, 0]])
print(data)
encoder = preprocessing.OneHotEncoder()
encoder.fit(data)
encoded_vector = encoder.transform([[1, 2, 3]]).toarray()
print(encoded_vector)
'''
#读取csv后缀的文件
'''
data = pd.read_csv('VehiclesItaly.csv')
data.head()
data.info()

x = data[['x']]
y = data[['y']]

lf = LinearRegression()
lf.fit(x,y)
print(lf.coef_)
print(lf.intercept_)

pred = pd.DataFrame(lf.predict(x))
print(pred.columns.values)
sns.relplot(x='x',y='y',data=data)
sns.lineplot(x=data['x'],y=pred[0])
'''

 

posted @ 2023-05-27 17:28  啥都不会的灰太狼  阅读(30)  评论(0)    收藏  举报