python 实现梯度下降

在多元线性回归中会用到梯度下降来计算参数值。这里我用python实现一个梯度下降版本。

这里多元线性方程为 y = A0+A1*x1+...+An* xn

数据输入格式,y表示

y \t x1 \t x2 \t .... xn

代码如下:

import os
import sys

theta = []
training_data = []
h_value = []
alpha = 0.0000009 

def load(path):
	f = open(path,'r')
	for x in f:
		x = x.strip('\r\n')
		field = x.split('\t')
		v_list = []
		for v in field:
			v_list.append(int(v))
		training_data.append(v_list)
	f.close()
	for x in training_data:
		h_value.append(0.0)

def init(path,theta_num):
	for x in range(theta_num):
		theta.append(1.0)
	load(path);

def gradient():
	i = 0
	loss = 100.0
	theta_num = len(theta)
	data_num = len(training_data)
	while i < 3000 and loss > 0.0001:
		#compute hvalue
		for index in range(data_num):
			hv = theta[0]
			for k in range(1,theta_num):
				hv += theta[k]*training_data[index][k]
			h_value[index] = hv
		#update theta
		for index in range(theta_num):
			s = 0.0
			for k in range(data_num):
				if index == 0:
					s += (h_value[k] - training_data[k][0])*1
				else:
					s += (h_value[k] - training_data[k][0])*training_data[k][index]
			theta[index] = theta[index] - alpha * 1/data_num * (s)
		#compute loss
		loss = 0.0
		for index in range(data_num):
			hv = theta[0] / (2*data_num)
			for k in range(1,theta_num):
				hv += theta[k]*training_data[index][k]
			loss +=  pow((hv - training_data[index][0]),2)/(2*data_num)
		print loss
		i += 1
	for x in theta:
	   print x,

if __name__=='__main__':
	path = sys.argv[1]
	init(path,int(sys.argv[2]))
	gradient()
	sys.exit(0)
	

  

posted @ 2014-03-11 22:06  anexplore  阅读(641)  评论(0)    收藏  举报