逻辑回归推导以及代码
本文包含逻辑回归推导过程以及Python实现的代码
最终要求得一条曲线将图上的两类点分开
假设这条曲线为 1*w0 + x1*w1 + x2*w2 = 0
预测的概率函数为
\[f(W) = \frac{1}{1 + e^{-XW}}
\]
\[\begin{align*}
\theta &= X * W \\
f(\theta) &= \frac{1}{1 + e^{-\theta}} \\
\frac{\partial f(\theta)}{\theta} &= \frac{-1}{(1 + e^{-\theta})^2} * e^{-\theta} * (-1) \\
&= \frac{1 + e^{-\theta} - 1}{(1 + e^{-\theta}) * (1 + e^{-\theta})} \\
&= \frac{1}{1 + e^{-\theta}} * \frac{(1 + e^{-\theta}) - 1}{(1 + e^{-\theta})} \\
&= f(\theta) * (1 - f(\theta)) \\
\end{align*}
\]
采用交叉熵作为损失函数
\[L(\theta) = -\frac{1}{m}\sum_{i=0}^{m} (1-y_i)(log(1 - f(\theta))) + y_i * (log(f(\theta)))
\]
设
\[\begin{align*}
l(\theta) &= (1-y)log(1 - f(\theta)) + y * log(f(\theta)) \\
\end{align*}
\]
那么
\[\begin{align*}
\frac{\partial l(\theta)}{\partial \theta} &= (1 - y)\frac{1}{1 - f(\theta)} * (-\frac{\partial f(\theta)}{\partial \theta}) + y * \frac{1}{f(\theta)} * \frac{\partial f(\theta)}{\partial \theta} \\
&= (y - 1) * \frac{1}{1 - f(\theta)} * f(\theta) * (1 - f(\theta)) + y * \frac{1}{f(\theta)} * f(\theta) * (1 - f(\theta))\\
&= (y - 1) * f(\theta) + y*(1 - f(\theta))\\
&= y * f(\theta) - f(\theta) + y - y * f(\theta) \\
&= y - f(\theta)\\
\end{align*}
\]
则
\[\frac{\partial L(\theta)}{\partial \theta} = -\frac{1}{m} \sum_{1}^{m} y_i - f(\theta)
\]
\[\begin{align*}
\frac{\partial l(W)}{\partial W} &= \frac{\partial f(\theta)}{\partial \theta} * \frac{\partial \theta}{\partial W} \\
&= (y - \frac{1}{1 + e^{-XW}}) * X^T
\end{align*}
\]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time
import numpy as np
import pandas as pd
import threading
import matplotlib.pyplot as plt
from utils import Ploter
def sigmoid(x):
"""
sigmoid'(x) = sigmoid(x) * ( 1 - sigmoid(x))
"""
return 1.0 / (1 + np.exp(-x))
def gradient_descent(X, y, W):
return -1.0 / X.shape[0] * np.dot(X.T, (y - sigmoid(np.dot(X, W))))
def get_loss(X, y, W):
f_theta = sigmoid(np.dot(X, W))
return -1.0 / X.shape[0] * np.sum((1 - y) * np.log(1 - f_theta) + y * np.log(f_theta))
class LogisticRegression(object):
def __init__(self, learning_rate=0.001, iterations=10):
self.learning_rate = learning_rate
self.iterations = iterations
self.W = None
def fit(self, X, y):
X = np.hstack((np.ones((X.shape[0], 1)), X))
self.W = np.random.rand(X.shape[1], 1)
y = y.values.reshape(y.shape[0], 1)
"""
X = [[1, x1_1, x2_1],
...
[1, x1_n, x2_n]]
self.W = [[w0],
[w1],
[w2]],
y = [[y1],
...
[yn]]
"""
for i in range(self.iterations):
gradient = gradient_descent(X, y, self.W)
self.W = self.W - self.learning_rate * gradient
time.sleep(0.001)
if i % 100 == 0:
print('iteration: {} loss: {}'.format(i, get_loss(X, y, self.W)))
# print('self.W in fit: {}'.format(self.W))
def set_line(self, plot):
while True:
if self.W is not None:
# w0 * 1 + x1 * w1 + x2 * w2 = 0
xs = np.linspace(0, 5, 10)
ys = (np.array([0 - self.W[0][0]] * 10) - self.W[1][0] * xs) / self.W[2][0]
plot.set_figtext('y = {}x + {}'.format(-self.W[1][0] / self.W[2][0], -self.W[0][0] / self.W[2][0]))
plot.set_lines(xs, ys)
time.sleep(1)
def predict(self, X):
pass
def main():
plot = Ploter('logistic_regression')
train_data = pd.read_csv('train.csv')
X = train_data[['x1', 'x2']]
y = train_data['y']
lr = LogisticRegression(learning_rate=0.01, iterations=40000)
plot.set_scatter(train_data['x1'], train_data['x2'], train_data['y'])
threading.Thread(target=lr.fit, args=(X, y)).start()
threading.Thread(target=lr.set_line, args=(plot, )).start()
time.sleep(1)
plt.show()
if __name__ == '__main__':
main()