构建神经网络(day 1)
第一个任务“Hello,world”:波士顿房价预测任务
numpy实现梯度下降
具体很多细节和具体证明之类的暂且省略
数据读入
1 #First time using numpym,a day to be recognized 2026/4/01/not a joke i mean 2 import numpy as np 3 import json 4 datafile='housing.data' #load data 5 data=np.fromfile(datafile,sep=' ') #read 6 feature_names=['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE','DIS','RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] 7 feature_num=len(feature_names) 8 data=data.reshape(data.shape[0]//feature_num,feature_num) #reshape the data,let it become an array 9 #load over 10 """ 11 a possible way to check 12 13 x = data[0] 14 print(x.shape) 15 print(x) 16 17 """ 18 19 """ 20 for this task test output should be 21 22 [6.320e-03 1.800e+01 2.310e+00 0.000e+00 5.380e-01 6.575e+00 6.520e+01 23 4.090e+00 1.000e+00 2.960e+02 1.530e+01 3.969e+02 4.980e+00 2.400e+01] 24 25 """ 26 # data process 27 """ 28 For most of the time we need to seperate the data into two part 29 One part for training,another for testing 30 For this task,we use 80% for training, 20% for testing 31 Notice! Training data and testing data should have nothing the same.Another way,they should be independent 32 """ 33 # Now seperate data 34 ratio=0.8 # 80% for training 35 offset =int(data.shape[0]*ratio) #Get 80% of the data 36 training_data=data[:offset] 37 38 # Calculate the maxnum and the minnum of the training data 39 maximums=training_data.max(axis=0) 40 minimums=training_data.min(axis=0) 41 42 # Normalization all the data 43 for i in range(feature_num): 44 data[:,i] = (data[:,i]-minimums[i])/(maximums[i]-minimums[i]) 45 # Seperate the percent of the data 46 training_data=data[:offset] 47 test_data=data[offset:] 48 x=training_data[:,:-1] 49 y=training_data[:,-1:] 50 # check the data 51 print(x[0]) 52 print(y[0]) 53 54 #Then loud them into a function
数据加工
import numpy as np def __init__(self,num_of_weights): # Randomly set w # To maintain a standard data,set a accurate num np.random.seed(0) self.w = np.random.randn(num_of_weights,1) self.b=0 def forward(self,x): z=np.dot(x,self.w)+self.b # linear regression return z def loss (self,z,y): error=z-y cost = error * error # Calculate the "loss",which is exactly a cost function for deep learning cost = np.mean(cost) return cost # Reload them as a function
梯度下降
# Gradient descent here use a function called "broadcast" in numpy import numpy as np def gradient(self,x,y): z=self.forward(x) gradient_w = (z-y)*x gradient_w = np.mean(gradient_w, axis=0) gradient_w = gradient_w[:,np.newaxis] gradient_b = (z-y) gradient_b = np.mean(gradient_b) return gradient_w,gradient_b
神经网络
import numpy as np import json import matplotlib.pyplot as plt def load_data(): # Reloud data from file datafile='housing.data' data=np.fromfile(datafile,sep=' ') # All the data include 14 elements,13 of them in the front is the influence,14th is the middle num of the price feature_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE','DIS','RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] feature_num=len(feature_names) # Reshape the formal data,change it into a shape of [N,14] data=data.reshape(data.shape[0]//feature_num,feature_num) # Seperate the data into training data and testing data ratio=0.8 offset=int(data.shape[0]*ratio) training_data=data[:offset] # Calculate the max and min maximums=training_data.max(axis=0) minimums=training_data.min(axis=0) # Normalization the data for i in range(feature_num): data[:,i]=(data[:,i]-minimums[i])/(maximums[i]-minimums[i]) # Percentage of the data training_data=data[:offset] test_data=data[offset:] return training_data,test_data class Network(object): def __init__(self,num_of_weights): # Randomly generate the initial value of w # To maintain consistency in the results of each program run, a fixed random number seed is set here. np.random.seed(0) self.w = np.random.randn(num_of_weights,1) self.b=0 def forward(self,x): z=np.dot(x,self.w)+self.b # linear regression return z def loss(self,z,y): error = z-y # Cost function num_samples = error.shape[0] cost = error * error cost = np.sum(cost)/num_samples return cost def gradient(self,x,y): z = self.forward(x) # Gradient descent gradient_w = (z-y)*x gradient_w = np.mean(gradient_w,axis=0) gradient_w = gradient_w[:,np.newaxis] gradient_b = (z-y) gradient_b = np.mean(gradient_b) return gradient_w,gradient_b def update(self,gradient_w,gradient_b, lr=0.01): self.w = self.w-lr*gradient_w # Advance with learning rate as the step size parameter self.b = self.b-lr*gradient_b # Advance with learning rate as the step size parameter def train(self, x , y , iterations=100, lr=0.01): losses = [] for i in range(iterations): z = self.forward(x) L = self.loss(z,y) gradient_w,gradient_b = self.gradient(x,y) self.update(gradient_w,gradient_b,lr) losses.append(L) # Add data to list if(i+1) % 10 == 0 : print('iter {},loss {}'.format(i,L)) return losses # Get data train_data,test_data = load_data() x = train_data[:,:-1] y = train_data[:,-1:] # Creat a Network net= Network(13) num_iterations=1000 # Start the training losses = net.train(x,y,iterations=num_iterations,lr=0.01) plot_x = np.arange(num_iterations) plot_y = np.array(losses) plt.plot(plot_x,plot_y) plt.show()
SGD版本神经网络
import numpy as np import json import matplotlib.pyplot as plt def load_data(): datafile='housing.data' data=np.fromfile(datafile,sep=' ') feature_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE','DIS','RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'] feature_num=len(feature_names) data=data.reshape(data.shape[0]//feature_num,feature_num) ratio=0.8 offset=int(data.shape[0]*ratio) training_data=data[:offset] maximums=training_data.max(axis=0) minimums=training_data.min(axis=0) for i in range(feature_num): data[:,i]=(data[:,i]-minimums[i])/(maximums[i]-minimums[i]) training_data=data[:offset] test_data=data[offset:] return training_data,test_data class Network(object): def __init__(self,num_of_weights): self.w=np.random.randn(num_of_weights,1) self.b=0 def forward(self,x): z=np.dot(x,self.w)+self.b return z def loss(self,z,y): error=z-y num_samples=error.shape[0] cost=error*error cost=np.sum(cost)/num_samples return cost def gradient(self,x,y): z=self.forward(x) N=x.shape[0] gradient_w=1./N*np.sum((z-y)*x,axis=0) gradient_w=gradient_w[:,np.newaxis] gradient_b=1./N*np.sum(z-y) return gradient_w,gradient_b def update(self,gradient_w,gradient_b,eta=0.01): self.w=self.w-eta*gradient_w self.b=self.b-eta*gradient_b def train(self,training_data,num_epochs,batch_size=10,eta=0.01): n=len(training_data) losses=[] for epoch_id in range(num_epochs): np.random.shuffle(training_data) mini_batches=[training_data[k:k+batch_size] for k in range (0,n,batch_size)] for iter_id,mini_batch in enumerate(mini_batches): x=mini_batch[:,:-1] y=mini_batch[:,-1:] a=self.forward(x) loss=self.loss(a,y) gradient_w,gradient_b=self.gradient(x,y) self.update(gradient_w,gradient_b,eta) losses.append(loss) print('Epoch {:3d} / iter {:3d}, loss = {:.4f}'. format(epoch_id, iter_id, loss)) return losses # 获取数据 train_data, test_data = load_data() # 创建网络 net = Network(13) # 启动训练 losses = net.train(train_data, num_epochs=50, batch_size=100, eta=0.1) # 画出损失函数的变化趋势 plot_x = np.arange(len(losses)) plot_y = np.array(losses) plt.plot(plot_x, plot_y) plt.show()

浙公网安备 33010602011771号