import numpy as np
import paddle
from paddle.nn import Linear
import paddle.nn.functional as F
import os
import random
import json
def load_data():
datafile='housing.data'
data=np.fromfile(datafile,sep=' ',dtype=np.float32)
feature_names=[ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
feature_num=len(feature_names)
data=data.reshape([data.shape[0]//feature_num,feature_num])
ratio=0.8
offset=int(data.shape[0]*ratio)
training_data=data[:offset]
maximums,minimums=training_data.max(axis=0),training_data.min(axis=0)
global max_values
global min_values
max_values=maximums
min_values=minimums
for i in range(feature_num):
data[:,i]=(data[:,i]-min_values[i])/(maximums[i]-minimums[i])
training_data=data[:offset]
test_data=data[offset:]
return training_data,test_data
#test insure the correctness of data reading
"""
training_data,test_data=load_data()
print(training_data.shape)
print(training_data[1,:])
"""
#Output shoule be
"""
(404, 14)
[2.35922547e-04 0.00000000e+00 2.62405723e-01 0.00000000e+00
1.72839552e-01 5.47997713e-01 7.82698274e-01 3.48961979e-01
4.34782617e-02 1.14822544e-01 5.53191364e-01 1.00000000e+00
2.04470202e-01 3.68888885e-01]
"""
class Regressor(paddle.nn.Layer):
#self represents the instance of the class itself
def __init__(self):
#init函数:在类的初始化函数中声明每一层网络的实现函数
#在房价预测任务中,只需要定义一层全连接层
#Initialize some parameters in the parent class
super(Regressor,self).__init__()
#Define a fully connected layer with an input dimension of 13 and an output dimension of 1.
self.fc=Linear(in_features=13,out_features=1)
#这里的forward函数,在构建神经网络时实现前向计算过程,并返回预测结果,本任务中返回房价预测结果
#Forward computation of networks
def forward(self,inputs):
x=self.fc(inputs)
return x
"""
训练配置过程
1. 指定运行训练的机器资源
2. 声明模型实例
3. 加载训练和测试数据
4. 设置优化算法和学习率
具体流程
声明定义好的回归模型实例为Regressor,并将模型的状态设置为train
使用load_data函数加载训练数据和测试数据
设置优化算法和学习率,优化算法使用随机梯度下降,学习率同手写,设置为0.01
"""
# Declare a predefined linear regression model
model = Regressor()
# Enable model training mode and set the model status to train.
training_data,test_data=load_data()
# Define the optimization algorithm, using stochastic gradient descent (SGD).
# Set learning rate as 0.01
opt = paddle.optimizer.SGD(learning_rate=0.01,parameters=model.parameters())
epoch_num=20 # Set the model training rounds
batch_size=10 # Set the batch size, which is the number of samples used in one model training session.
# Define the model training epoch (outer loop).
for epoch_id in range(epoch_num):
# Before each iteration, the training set data is shuffled.
np.random.shuffle(training_data)
# The training set data is split, and the batch size is set to 10.
mini_batches = [training_data[k:k+batch_size] for k in range(0,len(training_data),batch_size)]
# Define model training (circle inside)
for iter_id,mini_batch in enumerate(mini_batches):
# Convert the data on factors influencing housing prices in the current batch into np.array format.
x=np.array(mini_batch[:,:-1])
# Convert the current batch of labeled data (actual house prices) into np.array format.
y=np.array(mini_batch[:,-1:])
# Convert np.array data to tensor format.
house_features=paddle.to_tensor(x,dtype='float32')
prices = paddle.to_tensor(y,dtype='float32')
"""
注意为什么要转换成tensor格式
tensor和数组一样可以存储数据
同时,同时,支持自动求导,这一点是paddle简化代码的关键
可以在GPU上运行
经常用于深度学习训练
需要知道,数据来源于文件(txt/csv)/图片/数据库
这些数据无法直接转换成tensor,所以先改成numpy再改成tensor
一个常用的流程
data = np.fromfile(...)
x = paddle.to_tensor(data)
"""
# Forward computation
predicts = model(house_features)
# The loss is calculated using the squared error cost as the loss function.
loss = F.square_error_cost(predicts,label=prices)
avg_loss = paddle.mean(loss) # paddle.mean()函数对tensor所有元素求平均值
if iter_id%20==0:
print("epoch:{},iter:{},loss is: {}".format(epoch_id,iter_id,avg_loss.numpy()))
# Back broadcast,calculate the gradient of all factors of each stairs
avg_loss.backward()
# Refresh the factors,Iterate one step based on the set learning rate.
opt.step()
# Clear gradient variables and proceed to the next round of calculations.
opt.clear_grad()
#保存模型训练成功的参数
paddle.save(model.state_dict(), 'LR_model.pdparams')
print("模型保存成功, 模型参数保存在LR_model.pdparams中")
def load_one_example():
# Randomly get a piece of data from test data
idx=np.random.randint(0,test_data.shape[0])
ind=-10
one_data,label=test_data[idx,:-1],test_data[idx,-1]
#transform
one_data=one_data.reshape([1,-1])# 此处向下的参数-1表示自动计算
return one_data,label
model_dict=paddle.load('LR_model.pdparams') # 保存模型训练参数
model.load_dict(model_dict)
model.eval()
one_data,label=load_one_example()
one_data=paddle.to_tensor(one_data,dtype="float32")
predict=model(one_data)
predict=predict*(max_values[-1]-min_values[-1])+min_values[-1]
label=label*(max_values[-1]-min_values[-1])+min_values[-1]
print("Inference result is {},the corresbonding label is {}".format(predict.numpy(),label))