逻辑回归推导以及代码

本文包含逻辑回归推导过程以及Python实现的代码

最终要求得一条曲线将图上的两类点分开
假设这条曲线为 1*w0 + x1*w1 + x2*w2 = 0

预测的概率函数为

\[f(W) = \frac{1}{1 + e^{-XW}} \]

\[\begin{align*} \theta &= X * W \\ f(\theta) &= \frac{1}{1 + e^{-\theta}} \\ \frac{\partial f(\theta)}{\theta} &= \frac{-1}{(1 + e^{-\theta})^2} * e^{-\theta} * (-1) \\ &= \frac{1 + e^{-\theta} - 1}{(1 + e^{-\theta}) * (1 + e^{-\theta})} \\ &= \frac{1}{1 + e^{-\theta}} * \frac{(1 + e^{-\theta}) - 1}{(1 + e^{-\theta})} \\ &= f(\theta) * (1 - f(\theta)) \\ \end{align*} \]

采用交叉熵作为损失函数

\[L(\theta) = -\frac{1}{m}\sum_{i=0}^{m} (1-y_i)(log(1 - f(\theta))) + y_i * (log(f(\theta))) \]

\[\begin{align*} l(\theta) &= (1-y)log(1 - f(\theta)) + y * log(f(\theta)) \\ \end{align*} \]

那么

\[\begin{align*} \frac{\partial l(\theta)}{\partial \theta} &= (1 - y)\frac{1}{1 - f(\theta)} * (-\frac{\partial f(\theta)}{\partial \theta}) + y * \frac{1}{f(\theta)} * \frac{\partial f(\theta)}{\partial \theta} \\ &= (y - 1) * \frac{1}{1 - f(\theta)} * f(\theta) * (1 - f(\theta)) + y * \frac{1}{f(\theta)} * f(\theta) * (1 - f(\theta))\\ &= (y - 1) * f(\theta) + y*(1 - f(\theta))\\ &= y * f(\theta) - f(\theta) + y - y * f(\theta) \\ &= y - f(\theta)\\ \end{align*} \]

\[\frac{\partial L(\theta)}{\partial \theta} = -\frac{1}{m} \sum_{1}^{m} y_i - f(\theta) \]

\[\begin{align*} \frac{\partial l(W)}{\partial W} &= \frac{\partial f(\theta)}{\partial \theta} * \frac{\partial \theta}{\partial W} \\ &= (y - \frac{1}{1 + e^{-XW}}) * X^T \end{align*} \]

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time
import numpy as np
import pandas as pd
import threading
import matplotlib.pyplot as plt

from utils import Ploter


def sigmoid(x):
    """
    sigmoid'(x) = sigmoid(x) * ( 1 - sigmoid(x))
    """
    return 1.0 / (1 + np.exp(-x))


def gradient_descent(X, y, W):
    return -1.0 / X.shape[0] * np.dot(X.T, (y - sigmoid(np.dot(X, W))))


def get_loss(X, y, W):
    f_theta = sigmoid(np.dot(X, W))
    return -1.0 / X.shape[0] * np.sum((1 - y) * np.log(1 - f_theta) + y * np.log(f_theta))


class LogisticRegression(object):
    def __init__(self, learning_rate=0.001, iterations=10):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.W = None

    def fit(self, X, y):
        X = np.hstack((np.ones((X.shape[0], 1)), X))
        self.W = np.random.rand(X.shape[1], 1)
        y = y.values.reshape(y.shape[0], 1)
        """
        X = [[1, x1_1, x2_1],
             ...
             [1, x1_n, x2_n]]
        self.W = [[w0],
                  [w1],
                  [w2]],
        y = [[y1],
             ...
             [yn]]
        """
        for i in range(self.iterations):
            gradient = gradient_descent(X, y, self.W)
            self.W = self.W - self.learning_rate * gradient
            time.sleep(0.001)
            if i % 100 == 0:
                print('iteration: {} loss: {}'.format(i, get_loss(X, y, self.W)))
            # print('self.W in fit: {}'.format(self.W))

    def set_line(self, plot):
        while True:
            if self.W is not None:
                # w0 * 1 + x1 * w1 + x2 * w2 = 0
                xs = np.linspace(0, 5, 10)
                ys = (np.array([0 - self.W[0][0]] * 10) - self.W[1][0] * xs) / self.W[2][0]
                plot.set_figtext('y = {}x + {}'.format(-self.W[1][0] / self.W[2][0], -self.W[0][0] / self.W[2][0]))
                plot.set_lines(xs, ys)
            time.sleep(1)

    def predict(self, X):
        pass


def main():
    plot = Ploter('logistic_regression')

    train_data = pd.read_csv('train.csv')
    X = train_data[['x1', 'x2']]
    y = train_data['y']

    lr = LogisticRegression(learning_rate=0.01, iterations=40000)

    plot.set_scatter(train_data['x1'], train_data['x2'], train_data['y'])
    threading.Thread(target=lr.fit, args=(X, y)).start()
    threading.Thread(target=lr.set_line, args=(plot, )).start()
    time.sleep(1)
    plt.show()


if __name__ == '__main__':
    main()

未经允许禁止转载 https://spxcds.com/2019/02/22/logistic_regression)

posted @ 2019-02-25 10:02  spxcds  阅读(245)  评论(0编辑  收藏  举报