使用paddle实现深度网络构建(day 2)

使用paddle低层实现波士顿房价预测

import numpy as np
import paddle
from paddle.nn import Linear
import paddle.nn.functional as F
import os 
import random
import json
def load_data():
    datafile='housing.data'
    data=np.fromfile(datafile,sep=' ',dtype=np.float32)
    feature_names=[ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]
    feature_num=len(feature_names)
    data=data.reshape([data.shape[0]//feature_num,feature_num])
    ratio=0.8
    offset=int(data.shape[0]*ratio)
    training_data=data[:offset]
    maximums,minimums=training_data.max(axis=0),training_data.min(axis=0)
    global max_values
    global min_values
    max_values=maximums
    min_values=minimums
    for i in range(feature_num):
        data[:,i]=(data[:,i]-min_values[i])/(maximums[i]-minimums[i])
    training_data=data[:offset]
    test_data=data[offset:]
    return training_data,test_data
#test insure the correctness of data reading
"""
training_data,test_data=load_data()
print(training_data.shape)
print(training_data[1,:])
"""
#Output shoule be
"""
(404, 14)
[2.35922547e-04 0.00000000e+00 2.62405723e-01 0.00000000e+00
 1.72839552e-01 5.47997713e-01 7.82698274e-01 3.48961979e-01
 4.34782617e-02 1.14822544e-01 5.53191364e-01 1.00000000e+00
 2.04470202e-01 3.68888885e-01]
"""
class Regressor(paddle.nn.Layer):
    #self represents the instance of the class itself
    def __init__(self):
        #init函数:在类的初始化函数中声明每一层网络的实现函数
        #在房价预测任务中,只需要定义一层全连接层
        #Initialize some parameters in the parent class
        super(Regressor,self).__init__()
        #Define a fully connected layer with an input dimension of 13 and an output dimension of 1.
        self.fc=Linear(in_features=13,out_features=1)
        #这里的forward函数,在构建神经网络时实现前向计算过程,并返回预测结果,本任务中返回房价预测结果
        #Forward computation of networks
    def forward(self,inputs):
        x=self.fc(inputs)
        return x
    """
    训练配置过程
    1. 指定运行训练的机器资源
    2. 声明模型实例
    3. 加载训练和测试数据
    4. 设置优化算法和学习率
    具体流程
    声明定义好的回归模型实例为Regressor,并将模型的状态设置为train
    使用load_data函数加载训练数据和测试数据
    设置优化算法和学习率,优化算法使用随机梯度下降,学习率同手写,设置为0.01
    """
# Declare a predefined linear regression model
model = Regressor()
# Enable model training mode and set the model status to train.
training_data,test_data=load_data()
# Define the optimization algorithm, using stochastic gradient descent (SGD).
# Set learning rate as 0.01
opt = paddle.optimizer.SGD(learning_rate=0.01,parameters=model.parameters())

epoch_num=20 # Set the model training rounds
batch_size=10 # Set the batch size, which is the number of samples used in one model training session.

# Define the model training epoch (outer loop).
for epoch_id in range(epoch_num):
    # Before each iteration, the training set data is shuffled.
    np.random.shuffle(training_data)
    # The training set data is split, and the batch size is set to 10.
    mini_batches = [training_data[k:k+batch_size] for k in range(0,len(training_data),batch_size)]
    # Define model training (circle inside)
    for iter_id,mini_batch in enumerate(mini_batches):
        # Convert the data on factors influencing housing prices in the current batch into np.array format.
        x=np.array(mini_batch[:,:-1]) 
        # Convert the current batch of labeled data (actual house prices) into np.array format.
        y=np.array(mini_batch[:,-1:])
        # Convert np.array data to tensor format.
        house_features=paddle.to_tensor(x,dtype='float32')
        prices = paddle.to_tensor(y,dtype='float32')
        """
        注意为什么要转换成tensor格式
        tensor和数组一样可以存储数据
        同时,同时,支持自动求导,这一点是paddle简化代码的关键
        可以在GPU上运行
        经常用于深度学习训练
        
        需要知道,数据来源于文件(txt/csv)/图片/数据库
        这些数据无法直接转换成tensor,所以先改成numpy再改成tensor

        一个常用的流程
        data = np.fromfile(...)
        x = paddle.to_tensor(data)
        """
        # Forward computation
        predicts = model(house_features)
        # The loss is calculated using the squared error cost as the loss function.
        loss = F.square_error_cost(predicts,label=prices)
        avg_loss = paddle.mean(loss) # paddle.mean()函数对tensor所有元素求平均值
        if iter_id%20==0:
            print("epoch:{},iter:{},loss is: {}".format(epoch_id,iter_id,avg_loss.numpy()))
        # Back broadcast,calculate the gradient of all factors of each stairs
        avg_loss.backward()
        # Refresh the factors,Iterate one step based on the set learning rate.
        opt.step()
        # Clear gradient variables and proceed to the next round of calculations.
        opt.clear_grad()
#保存模型训练成功的参数 
paddle.save(model.state_dict(), 'LR_model.pdparams')
print("模型保存成功, 模型参数保存在LR_model.pdparams中")
def load_one_example():
    # Randomly get a piece of data from test data
    idx=np.random.randint(0,test_data.shape[0])
    ind=-10
    one_data,label=test_data[idx,:-1],test_data[idx,-1]
    #transform
    one_data=one_data.reshape([1,-1])# 此处向下的参数-1表示自动计算
    return one_data,label

model_dict=paddle.load('LR_model.pdparams') # 保存模型训练参数
model.load_dict(model_dict)
model.eval()
one_data,label=load_one_example()
one_data=paddle.to_tensor(one_data,dtype="float32")
predict=model(one_data)
predict=predict*(max_values[-1]-min_values[-1])+min_values[-1]
label=label*(max_values[-1]-min_values[-1])+min_values[-1]
print("Inference result is {},the corresbonding label is {}".format(predict.numpy(),label))


    

 

posted @ 2026-04-03 08:48  Noname_min  阅读(1)  评论(0)    收藏  举报