逻辑回归是分类算法,对数据的类别进行判断;
输入:csv文件的名字,并对标签设定一个阈值
输出:分类的准确性
代码:
import numpy as np
import pandas as pd
class LR:
def init(self, traits,iter_nums=10000):
self.iter_nums = iter_nums
self.w = np.ones(traits)
self.learnrate = 0.1
def fit(self, x_train, y_train):
i = 0
rate = self.learnrate # 学习率
while True:
# 计算预测值与真实值的误差
error = self.sigmoid(np.dot(self.w, x_train.T)) - y_train
# 更新权重
self.w = self.w - rate * np.mean(error[:, np.newaxis] * x_train, axis=0)
rate = rate * 0.99 # 学习率衰减
i += 1
if i > self.iter_nums:
break
def predict(self, x_train):
z = x_train.dot(self.w)
y = self.sigmoid(z)
return (y > 0.5).astype(float) # 返回一个数组,表示每个样本的预测结果
def sigmoid(self, z):
return 1 / (1 + np.exp(-z)) # 修复了 sigmoid 函数中的符号错误
def score(self, X, Y):
n_samples = len(X)
fold_size = n_samples // 10
accuracies = []
for i in range(10):
test_start = i * fold_size
test_end = test_start + fold_size
X_test = X[test_start:test_end]
Y_test = Y[test_start:test_end]
X_train = np.concatenate((X[:test_start], X[test_end:]), axis=0)
Y_train = np.concatenate((Y[:test_start], Y[test_end:]), axis=0)
# 模型训练
self.fit(X_train, Y_train)
# 模型预测
Y_pred = self.predict(X_test)
# 计算准确率
acc = np.mean(Y_pred == Y_test)
accuracies.append(acc)
# 返回平均准确率
return np.mean(accuracies)
数据加载与预处理
data = pd.read_csv('winequality-red.csv')
data.dropna(inplace=True)
data['quality'] = data['quality'].fillna(data['quality'].median())
y = (data['quality'] > 5).astype(int).values
X = data.drop('quality', axis=1).values
feature_count = X.shape[1]
初始化模型并计算得分
leger = LR(traits=feature_count)
print(leger.score(X, y))
浙公网安备 33010602011771号