5.2 模型参数 - yc-limitless

5.2.0 头文件

import torch
from torch import nn

5.2.1创建一个网络模型

# 通过实例化nn.Sequential来创建一个网络模型，模型包括一个具有8个隐藏单元带有relu激活函数的全连接隐藏层，具有1个输出单元不带激活函数的全连接输出层
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
# 定义输入特征
X = torch.rand(size=(2, 4))
# 获得模型输出
Y = net(X)
print(Y)
# 输出：
# tensor([[-0.1654],
#         [-0.2289]], grad_fn=<AddmmBackward0>)

5.2.2 访问模型参数（w和b）

# 通过索引来访问任意层的模型参数（w和b）
print(net[2].state_dict())
# 输出：
# OrderedDict([('weight', tensor([[ 0.2019, -0.0441, -0.2342,  0.0782,  0.2378,  0.1971, -0.0026,  0.2993]])), ('bias', tensor([-0.1186]))])
print(net[2].bias)
# 输出：
# tensor([0.3254], requires_grad=True)
print(net[2].bias.data)
# 输出：
# tensor([0.3254])
print(net[2].weight.grad)
# 输出：
# None

# 访问所有层的模型参数
print([(name, param.shape) for name, param in net[0].named_parameters()])
# 输出：
# [('weight', torch.Size([8, 4])), ('bias', torch.Size([8]))]
print([(name, param.shape) for name, param in net.named_parameters()])
# 输出：
# [('0.weight', torch.Size([8, 4])), ('0.bias', torch.Size([8])), ('2.weight', torch.Size([1, 8])), ('2.bias', torch.Size([1]))]

# 创建一个嵌套结构的网络模型，并访问其模型参数
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),nn.Linear(8, 4), nn.ReLU())
def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())
    return net
rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
Y = rgnet(X)
print(Y)
# 输出：
# tensor([[0.0706],
#         [0.0706]], grad_fn=<AddmmBackward0>)
print(rgnet)
# 输出：
# Sequential(
#   (0): Sequential(
#     (block 0): Sequential(
#       (0): Linear(in_features=4, out_features=8, bias=True)
#       (1): ReLU()
#       (2): Linear(in_features=8, out_features=4, bias=True)
#       (3): ReLU()
#     )
#     (block 1): Sequential(
#       (0): Linear(in_features=4, out_features=8, bias=True)
#       (1): ReLU()
#       (2): Linear(in_features=8, out_features=4, bias=True)
#       (3): ReLU()
#     )
#     (block 2): Sequential(
#       (0): Linear(in_features=4, out_features=8, bias=True)
#       (1): ReLU()
#       (2): Linear(in_features=8, out_features=4, bias=True)
#       (3): ReLU()
#     )
#     (block 3): Sequential(
#       (0): Linear(in_features=4, out_features=8, bias=True)
#       (1): ReLU()
#       (2): Linear(in_features=8, out_features=4, bias=True)
#       (3): ReLU()
#     )
#   )
#   (1): Linear(in_features=4, out_features=1, bias=True)
# )
print(rgnet[0][1][0].bias.data)
# 输出：
# tensor([ 4.7546e-01, -2.5980e-01,  4.2100e-01, -3.5980e-02,  1.4627e-01,-2.0683e-04, -2.0049e-01, -1.8766e-01])

5.2.3 初始化网络模型参数

# 内置参数初始化
def init_normal(m):
    # 初始化全连接层的权重和偏移量
    if type(m) == nn.Linear:
        # 服从随机的均值为0，方差为0.01的正态分布
        nn.init.normal_(m.weight, mean=0, std=0.01)
        # 将偏移量初始化为0
        nn.init.zeros_(m.bias)
net.apply(init_normal)
print(net[0].weight.data)
# 输出：
# tensor([[-0.0046,  0.0097,  0.0019,  0.0108],
#         [ 0.0063, -0.0096, -0.0096,  0.0097],
#         [ 0.0016, -0.0121,  0.0006,  0.0047],
#         [ 0.0162,  0.0023,  0.0110, -0.0044],
#         [-0.0017, -0.0074,  0.0021, -0.0002],
#         [ 0.0084,  0.0022, -0.0017,  0.0205],
#         [ 0.0012,  0.0087,  0.0021,  0.0025],
#         [ 0.0013,  0.0096, -0.0081, -0.0019]])
print(net[0].bias.data)
# 输出：
# tensor([0., 0., 0., 0., 0., 0., 0., 0.])

def init_constant(m):
    # 初始化全连接层的权重和偏移量
    if type(m) == nn.Linear:
        # 将权重w初始化为全1
        nn.init.constant_(m.weight, 1)
        # 将偏移量初始化为0
        nn.init.zeros_(m.bias)
net.apply(init_constant)
print(net[0].weight.data)
# 输出：
# tensor([[1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.]])
print(net[0].bias.data)
# 输出：
# tensor([0., 0., 0., 0., 0., 0., 0., 0.])

# xavier初始化
def init_xavier(m):
    # 初始化全连接层的权重
    if type(m) == nn.Linear:
        # 从以0为中心，标准差为std = sqrt(2/(fan_in + fan_out))的正态分布中随机取样，其中fan_in为输入神经元的个数，fan_out为输出神经元的个数
        nn.init.xavier_uniform_(m.weight)

def init_42(m):
    # 初始化全连接层的权重
    if type(m) == nn.Linear:
        # 将权重初始化为全42
        nn.init.constant_(m.weight, 42)
net[0].apply(init_xavier)
print(net[0].weight.data)
# 输出：
# tensor([[ 0.6856, -0.2490, -0.3304, -0.2262],
#         [-0.5909, -0.1715,  0.1036,  0.6577],
#         [-0.3842, -0.3241, -0.1618,  0.4961],
#         [ 0.2342,  0.2458, -0.0167, -0.4840],
#         [-0.0285, -0.6719, -0.3516, -0.4315],
#         [ 0.1638, -0.0394, -0.5881,  0.5157],
#         [-0.3652,  0.5219,  0.4790,  0.0345],
#         [ 0.0080, -0.4427,  0.3405, -0.4380]])
net[2].apply(init_42)
print(net[2].weight.data)
# 输出：
# tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])

# 自定义参数初始化
def my_init(m):

    if type(m) == nn.Linear:
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5
net.apply(my_init)
print(net[0].weight)
# 输出：
# tensor([[ 8.0693, -9.7628, -0.0000, -0.0000],
#         [ 5.9148, -9.6592, -0.0000, -0.0000],
#         [-5.9411,  0.0000, -9.4628, -9.1080],
#         [-0.0000, -0.0000,  5.0042, -5.5144],
#         [-0.0000, -0.0000, -0.0000,  0.0000],
#         [ 0.0000, -6.3051,  9.3524,  6.3112],
#         [ 0.0000,  0.0000, -0.0000, -0.0000],
#         [ 7.5141, -0.0000,  8.2071, -0.0000]], requires_grad=True)

5.2.4 修改网络模型参数

# 将权重参数全部加1
net[0].weight.data[:] += 1
# 将下标[0,0]的权重设置为42
net[0].weight.data[0, 0] = 42
print(net[0].weight)
# 输出：
# tensor([[42.0000, -8.7628,  1.0000,  1.0000],
#         [ 6.9148, -8.6592,  1.0000,  1.0000],
#         [-4.9411,  1.0000, -8.4628, -8.1080],
#         [ 1.0000,  1.0000,  6.0042, -4.5144],
#         [ 1.0000,  1.0000,  1.0000,  1.0000],
#         [ 1.0000, -5.3051, 10.3524,  7.3112],
#         [ 1.0000,  1.0000,  1.0000,  1.0000],
#         [ 8.5141,  1.0000,  9.2071,  1.0000]], requires_grad=True)

5.2.3 共享层

# 定义一个共享层
shared = nn.Linear(8, 8)
# 定义一个包含共享层的网络模型
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared, nn.ReLU(), nn.Linear(8, 1))
Y = net(X)
# 第二层和第四层是同一个共享层,所以两个层的权重参数相同
print(net[2].weight.data == net[4].weight.data)
# 输出:
# tensor([[True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True]])
# 更改共享层的权重参数
net[2].weight.data[0, 0] = 100
print(net[2].weight.data == net[4].weight.data)
# 输出:
# tensor([[True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True]])

本小节完整代码如下

import torch
from torch import nn

# ------------------------------创建一个网络模型------------------------------------

# 通过实例化nn.Sequential来创建一个网络模型，模型包括一个具有8个隐藏单元带有relu激活函数的全连接隐藏层，具有1个输出单元不带激活函数的全连接输出层
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
# 定义输入特征
X = torch.rand(size=(2, 4))
# 获得模型输出
Y = net(X)
print(Y)
# 输出：
# tensor([[-0.1654],
#         [-0.2289]], grad_fn=<AddmmBackward0>)

# ------------------------------访问模型参数（w和b）------------------------------------

# 通过索引来访问任意层的模型参数（w和b）
print(net[2].state_dict())
# 输出：
# OrderedDict([('weight', tensor([[ 0.2019, -0.0441, -0.2342,  0.0782,  0.2378,  0.1971, -0.0026,  0.2993]])), ('bias', tensor([-0.1186]))])
print(net[2].bias)
# 输出：
# tensor([0.3254], requires_grad=True)
print(net[2].bias.data)
# 输出：
# tensor([0.3254])
print(net[2].weight.grad)
# 输出：
# None

# 访问所有层的模型参数
print([(name, param.shape) for name, param in net[0].named_parameters()])
# 输出：
# [('weight', torch.Size([8, 4])), ('bias', torch.Size([8]))]
print([(name, param.shape) for name, param in net.named_parameters()])
# 输出：
# [('0.weight', torch.Size([8, 4])), ('0.bias', torch.Size([8])), ('2.weight', torch.Size([1, 8])), ('2.bias', torch.Size([1]))]

# 创建一个嵌套结构的网络模型，并访问其模型参数
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),nn.Linear(8, 4), nn.ReLU())
def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block {i}', block1())
    return net
rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
Y = rgnet(X)
print(Y)
# 输出：
# tensor([[0.0706],
#         [0.0706]], grad_fn=<AddmmBackward0>)
print(rgnet)
# 输出：
# Sequential(
#   (0): Sequential(
#     (block 0): Sequential(
#       (0): Linear(in_features=4, out_features=8, bias=True)
#       (1): ReLU()
#       (2): Linear(in_features=8, out_features=4, bias=True)
#       (3): ReLU()
#     )
#     (block 1): Sequential(
#       (0): Linear(in_features=4, out_features=8, bias=True)
#       (1): ReLU()
#       (2): Linear(in_features=8, out_features=4, bias=True)
#       (3): ReLU()
#     )
#     (block 2): Sequential(
#       (0): Linear(in_features=4, out_features=8, bias=True)
#       (1): ReLU()
#       (2): Linear(in_features=8, out_features=4, bias=True)
#       (3): ReLU()
#     )
#     (block 3): Sequential(
#       (0): Linear(in_features=4, out_features=8, bias=True)
#       (1): ReLU()
#       (2): Linear(in_features=8, out_features=4, bias=True)
#       (3): ReLU()
#     )
#   )
#   (1): Linear(in_features=4, out_features=1, bias=True)
# )
print(rgnet[0][1][0].bias.data)
# 输出：
# tensor([ 4.7546e-01, -2.5980e-01,  4.2100e-01, -3.5980e-02,  1.4627e-01,-2.0683e-04, -2.0049e-01, -1.8766e-01])

# ------------------------------初始化网络模型参数------------------------------------

# 内置参数初始化
def init_normal(m):
    # 初始化全连接层的权重和偏移量
    if type(m) == nn.Linear:
        # 服从随机的均值为0，方差为0.01的正态分布
        nn.init.normal_(m.weight, mean=0, std=0.01)
        # 将偏移量初始化为0
        nn.init.zeros_(m.bias)
net.apply(init_normal)
print(net[0].weight.data)
# 输出：
# tensor([[-0.0046,  0.0097,  0.0019,  0.0108],
#         [ 0.0063, -0.0096, -0.0096,  0.0097],
#         [ 0.0016, -0.0121,  0.0006,  0.0047],
#         [ 0.0162,  0.0023,  0.0110, -0.0044],
#         [-0.0017, -0.0074,  0.0021, -0.0002],
#         [ 0.0084,  0.0022, -0.0017,  0.0205],
#         [ 0.0012,  0.0087,  0.0021,  0.0025],
#         [ 0.0013,  0.0096, -0.0081, -0.0019]])
print(net[0].bias.data)
# 输出：
# tensor([0., 0., 0., 0., 0., 0., 0., 0.])

def init_constant(m):
    # 初始化全连接层的权重和偏移量
    if type(m) == nn.Linear:
        # 将权重w初始化为全1
        nn.init.constant_(m.weight, 1)
        # 将偏移量初始化为0
        nn.init.zeros_(m.bias)
net.apply(init_constant)
print(net[0].weight.data)
# 输出：
# tensor([[1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.],
#         [1., 1., 1., 1.]])
print(net[0].bias.data)
# 输出：
# tensor([0., 0., 0., 0., 0., 0., 0., 0.])

# xavier初始化
def init_xavier(m):
    # 初始化全连接层的权重
    if type(m) == nn.Linear:
        # 从以0为中心，标准差为std = sqrt(2/(fan_in + fan_out))的正态分布中随机取样，其中fan_in为输入神经元的个数，fan_out为输出神经元的个数
        nn.init.xavier_uniform_(m.weight)

def init_42(m):
    # 初始化全连接层的权重
    if type(m) == nn.Linear:
        # 将权重初始化为全42
        nn.init.constant_(m.weight, 42)
net[0].apply(init_xavier)
print(net[0].weight.data)
# 输出：
# tensor([[ 0.6856, -0.2490, -0.3304, -0.2262],
#         [-0.5909, -0.1715,  0.1036,  0.6577],
#         [-0.3842, -0.3241, -0.1618,  0.4961],
#         [ 0.2342,  0.2458, -0.0167, -0.4840],
#         [-0.0285, -0.6719, -0.3516, -0.4315],
#         [ 0.1638, -0.0394, -0.5881,  0.5157],
#         [-0.3652,  0.5219,  0.4790,  0.0345],
#         [ 0.0080, -0.4427,  0.3405, -0.4380]])
net[2].apply(init_42)
print(net[2].weight.data)
# 输出：
# tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])

# 自定义参数初始化
def my_init(m):

    if type(m) == nn.Linear:
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5
net.apply(my_init)
print(net[0].weight)
# 输出：
# tensor([[ 8.0693, -9.7628, -0.0000, -0.0000],
#         [ 5.9148, -9.6592, -0.0000, -0.0000],
#         [-5.9411,  0.0000, -9.4628, -9.1080],
#         [-0.0000, -0.0000,  5.0042, -5.5144],
#         [-0.0000, -0.0000, -0.0000,  0.0000],
#         [ 0.0000, -6.3051,  9.3524,  6.3112],
#         [ 0.0000,  0.0000, -0.0000, -0.0000],
#         [ 7.5141, -0.0000,  8.2071, -0.0000]], requires_grad=True)

# ------------------------------修改网络模型参数------------------------------------

# 将权重参数全部加1
net[0].weight.data[:] += 1
# 将下标[0,0]的权重设置为42
net[0].weight.data[0, 0] = 42
print(net[0].weight)
# 输出：
# tensor([[42.0000, -8.7628,  1.0000,  1.0000],
#         [ 6.9148, -8.6592,  1.0000,  1.0000],
#         [-4.9411,  1.0000, -8.4628, -8.1080],
#         [ 1.0000,  1.0000,  6.0042, -4.5144],
#         [ 1.0000,  1.0000,  1.0000,  1.0000],
#         [ 1.0000, -5.3051, 10.3524,  7.3112],
#         [ 1.0000,  1.0000,  1.0000,  1.0000],
#         [ 8.5141,  1.0000,  9.2071,  1.0000]], requires_grad=True)

# ------------------------------共享层------------------------------------

# 定义一个共享层
shared = nn.Linear(8, 8)
# 定义一个包含共享层的网络模型
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared, nn.ReLU(), nn.Linear(8, 1))
Y = net(X)
# 第二层和第四层是同一个共享层,所以两个层的权重参数相同
print(net[2].weight.data == net[4].weight.data)
# 输出:
# tensor([[True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True]])
# 更改共享层的权重参数
net[2].weight.data[0, 0] = 100
print(net[2].weight.data == net[4].weight.data)
# 输出:
# tensor([[True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True],
#         [True, True, True, True, True, True, True, True]])

posted on 2022-11-08 14:55 yc-limitless 阅读(76) 评论(0) 收藏举报