逻辑回归是分类算法,对数据的类别进行判断;
输入:csv文件的名字,并对标签设定一个阈值
输出:分类的准确性

代码:
import numpy as np
import pandas as pd

class LR:
def init(self, traits,iter_nums=10000):
self.iter_nums = iter_nums
self.w = np.ones(traits)
self.learnrate = 0.1

def fit(self, x_train, y_train):
    i = 0
    rate = self.learnrate  # 学习率
    while True:
        # 计算预测值与真实值的误差
        error = self.sigmoid(np.dot(self.w, x_train.T)) - y_train
        # 更新权重
        self.w = self.w - rate * np.mean(error[:, np.newaxis] * x_train, axis=0)
        rate = rate * 0.99  # 学习率衰减
        i += 1
        if i > self.iter_nums:
            break

def predict(self, x_train):
    z = x_train.dot(self.w)
    y = self.sigmoid(z)
    return (y > 0.5).astype(float)  # 返回一个数组,表示每个样本的预测结果

def sigmoid(self, z):
    return 1 / (1 + np.exp(-z))  # 修复了 sigmoid 函数中的符号错误

def score(self, X, Y):
    n_samples = len(X)
    fold_size = n_samples // 10
    accuracies = []

    for i in range(10):
        test_start = i * fold_size
        test_end = test_start + fold_size

        X_test = X[test_start:test_end]
        Y_test = Y[test_start:test_end]

        X_train = np.concatenate((X[:test_start], X[test_end:]), axis=0)
        Y_train = np.concatenate((Y[:test_start], Y[test_end:]), axis=0)

        # 模型训练
        self.fit(X_train, Y_train)

        # 模型预测
        Y_pred = self.predict(X_test)

        # 计算准确率
        acc = np.mean(Y_pred == Y_test)  
        accuracies.append(acc)

    # 返回平均准确率
    return np.mean(accuracies)

数据加载与预处理

data = pd.read_csv('winequality-red.csv')
data.dropna(inplace=True)

data['quality'] = data['quality'].fillna(data['quality'].median())
y = (data['quality'] > 5).astype(int).values
X = data.drop('quality', axis=1).values
feature_count = X.shape[1]

初始化模型并计算得分

leger = LR(traits=feature_count)
print(leger.score(X, y))

posted on 2025-03-30 18:08  诗酒古今  阅读(21)  评论(0)    收藏  举报