5.2.0 头文件
import torch
from torch import nn
5.2.1创建一个网络模型
# 通过实例化nn.Sequential来创建一个网络模型,模型包括一个具有8个隐藏单元带有relu激活函数的全连接隐藏层,具有1个输出单元不带激活函数的全连接输出层
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
# 定义输入特征
X = torch.rand(size=(2, 4))
# 获得模型输出
Y = net(X)
print(Y)
# 输出:
# tensor([[-0.1654],
# [-0.2289]], grad_fn=<AddmmBackward0>)
5.2.2 访问模型参数(w和b)
# 通过索引来访问任意层的模型参数(w和b)
print(net[2].state_dict())
# 输出:
# OrderedDict([('weight', tensor([[ 0.2019, -0.0441, -0.2342, 0.0782, 0.2378, 0.1971, -0.0026, 0.2993]])), ('bias', tensor([-0.1186]))])
print(net[2].bias)
# 输出:
# tensor([0.3254], requires_grad=True)
print(net[2].bias.data)
# 输出:
# tensor([0.3254])
print(net[2].weight.grad)
# 输出:
# None
# 访问所有层的模型参数
print([(name, param.shape) for name, param in net[0].named_parameters()])
# 输出:
# [('weight', torch.Size([8, 4])), ('bias', torch.Size([8]))]
print([(name, param.shape) for name, param in net.named_parameters()])
# 输出:
# [('0.weight', torch.Size([8, 4])), ('0.bias', torch.Size([8])), ('2.weight', torch.Size([1, 8])), ('2.bias', torch.Size([1]))]
# 创建一个嵌套结构的网络模型,并访问其模型参数
def block1():
return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),nn.Linear(8, 4), nn.ReLU())
def block2():
net = nn.Sequential()
for i in range(4):
net.add_module(f'block {i}', block1())
return net
rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
Y = rgnet(X)
print(Y)
# 输出:
# tensor([[0.0706],
# [0.0706]], grad_fn=<AddmmBackward0>)
print(rgnet)
# 输出:
# Sequential(
# (0): Sequential(
# (block 0): Sequential(
# (0): Linear(in_features=4, out_features=8, bias=True)
# (1): ReLU()
# (2): Linear(in_features=8, out_features=4, bias=True)
# (3): ReLU()
# )
# (block 1): Sequential(
# (0): Linear(in_features=4, out_features=8, bias=True)
# (1): ReLU()
# (2): Linear(in_features=8, out_features=4, bias=True)
# (3): ReLU()
# )
# (block 2): Sequential(
# (0): Linear(in_features=4, out_features=8, bias=True)
# (1): ReLU()
# (2): Linear(in_features=8, out_features=4, bias=True)
# (3): ReLU()
# )
# (block 3): Sequential(
# (0): Linear(in_features=4, out_features=8, bias=True)
# (1): ReLU()
# (2): Linear(in_features=8, out_features=4, bias=True)
# (3): ReLU()
# )
# )
# (1): Linear(in_features=4, out_features=1, bias=True)
# )
print(rgnet[0][1][0].bias.data)
# 输出:
# tensor([ 4.7546e-01, -2.5980e-01, 4.2100e-01, -3.5980e-02, 1.4627e-01,-2.0683e-04, -2.0049e-01, -1.8766e-01])
5.2.3 初始化网络模型参数
# 内置参数初始化
def init_normal(m):
# 初始化全连接层的权重和偏移量
if type(m) == nn.Linear:
# 服从随机的均值为0,方差为0.01的正态分布
nn.init.normal_(m.weight, mean=0, std=0.01)
# 将偏移量初始化为0
nn.init.zeros_(m.bias)
net.apply(init_normal)
print(net[0].weight.data)
# 输出:
# tensor([[-0.0046, 0.0097, 0.0019, 0.0108],
# [ 0.0063, -0.0096, -0.0096, 0.0097],
# [ 0.0016, -0.0121, 0.0006, 0.0047],
# [ 0.0162, 0.0023, 0.0110, -0.0044],
# [-0.0017, -0.0074, 0.0021, -0.0002],
# [ 0.0084, 0.0022, -0.0017, 0.0205],
# [ 0.0012, 0.0087, 0.0021, 0.0025],
# [ 0.0013, 0.0096, -0.0081, -0.0019]])
print(net[0].bias.data)
# 输出:
# tensor([0., 0., 0., 0., 0., 0., 0., 0.])
def init_constant(m):
# 初始化全连接层的权重和偏移量
if type(m) == nn.Linear:
# 将权重w初始化为全1
nn.init.constant_(m.weight, 1)
# 将偏移量初始化为0
nn.init.zeros_(m.bias)
net.apply(init_constant)
print(net[0].weight.data)
# 输出:
# tensor([[1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.]])
print(net[0].bias.data)
# 输出:
# tensor([0., 0., 0., 0., 0., 0., 0., 0.])
# xavier初始化
def init_xavier(m):
# 初始化全连接层的权重
if type(m) == nn.Linear:
# 从以0为中心,标准差为std = sqrt(2/(fan_in + fan_out))的正态分布中随机取样,其中fan_in为输入神经元的个数,fan_out为输出神经元的个数
nn.init.xavier_uniform_(m.weight)
def init_42(m):
# 初始化全连接层的权重
if type(m) == nn.Linear:
# 将权重初始化为全42
nn.init.constant_(m.weight, 42)
net[0].apply(init_xavier)
print(net[0].weight.data)
# 输出:
# tensor([[ 0.6856, -0.2490, -0.3304, -0.2262],
# [-0.5909, -0.1715, 0.1036, 0.6577],
# [-0.3842, -0.3241, -0.1618, 0.4961],
# [ 0.2342, 0.2458, -0.0167, -0.4840],
# [-0.0285, -0.6719, -0.3516, -0.4315],
# [ 0.1638, -0.0394, -0.5881, 0.5157],
# [-0.3652, 0.5219, 0.4790, 0.0345],
# [ 0.0080, -0.4427, 0.3405, -0.4380]])
net[2].apply(init_42)
print(net[2].weight.data)
# 输出:
# tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])
# 自定义参数初始化
def my_init(m):
if type(m) == nn.Linear:
nn.init.uniform_(m.weight, -10, 10)
m.weight.data *= m.weight.data.abs() >= 5
net.apply(my_init)
print(net[0].weight)
# 输出:
# tensor([[ 8.0693, -9.7628, -0.0000, -0.0000],
# [ 5.9148, -9.6592, -0.0000, -0.0000],
# [-5.9411, 0.0000, -9.4628, -9.1080],
# [-0.0000, -0.0000, 5.0042, -5.5144],
# [-0.0000, -0.0000, -0.0000, 0.0000],
# [ 0.0000, -6.3051, 9.3524, 6.3112],
# [ 0.0000, 0.0000, -0.0000, -0.0000],
# [ 7.5141, -0.0000, 8.2071, -0.0000]], requires_grad=True)
5.2.4 修改网络模型参数
# 将权重参数全部加1
net[0].weight.data[:] += 1
# 将下标[0,0]的权重设置为42
net[0].weight.data[0, 0] = 42
print(net[0].weight)
# 输出:
# tensor([[42.0000, -8.7628, 1.0000, 1.0000],
# [ 6.9148, -8.6592, 1.0000, 1.0000],
# [-4.9411, 1.0000, -8.4628, -8.1080],
# [ 1.0000, 1.0000, 6.0042, -4.5144],
# [ 1.0000, 1.0000, 1.0000, 1.0000],
# [ 1.0000, -5.3051, 10.3524, 7.3112],
# [ 1.0000, 1.0000, 1.0000, 1.0000],
# [ 8.5141, 1.0000, 9.2071, 1.0000]], requires_grad=True)
5.2.3 共享层
# 定义一个共享层
shared = nn.Linear(8, 8)
# 定义一个包含共享层的网络模型
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared, nn.ReLU(), nn.Linear(8, 1))
Y = net(X)
# 第二层和第四层是同一个共享层,所以两个层的权重参数相同
print(net[2].weight.data == net[4].weight.data)
# 输出:
# tensor([[True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True]])
# 更改共享层的权重参数
net[2].weight.data[0, 0] = 100
print(net[2].weight.data == net[4].weight.data)
# 输出:
# tensor([[True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True]])
本小节完整代码如下
import torch
from torch import nn
# ------------------------------创建一个网络模型------------------------------------
# 通过实例化nn.Sequential来创建一个网络模型,模型包括一个具有8个隐藏单元带有relu激活函数的全连接隐藏层,具有1个输出单元不带激活函数的全连接输出层
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
# 定义输入特征
X = torch.rand(size=(2, 4))
# 获得模型输出
Y = net(X)
print(Y)
# 输出:
# tensor([[-0.1654],
# [-0.2289]], grad_fn=<AddmmBackward0>)
# ------------------------------访问模型参数(w和b)------------------------------------
# 通过索引来访问任意层的模型参数(w和b)
print(net[2].state_dict())
# 输出:
# OrderedDict([('weight', tensor([[ 0.2019, -0.0441, -0.2342, 0.0782, 0.2378, 0.1971, -0.0026, 0.2993]])), ('bias', tensor([-0.1186]))])
print(net[2].bias)
# 输出:
# tensor([0.3254], requires_grad=True)
print(net[2].bias.data)
# 输出:
# tensor([0.3254])
print(net[2].weight.grad)
# 输出:
# None
# 访问所有层的模型参数
print([(name, param.shape) for name, param in net[0].named_parameters()])
# 输出:
# [('weight', torch.Size([8, 4])), ('bias', torch.Size([8]))]
print([(name, param.shape) for name, param in net.named_parameters()])
# 输出:
# [('0.weight', torch.Size([8, 4])), ('0.bias', torch.Size([8])), ('2.weight', torch.Size([1, 8])), ('2.bias', torch.Size([1]))]
# 创建一个嵌套结构的网络模型,并访问其模型参数
def block1():
return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),nn.Linear(8, 4), nn.ReLU())
def block2():
net = nn.Sequential()
for i in range(4):
net.add_module(f'block {i}', block1())
return net
rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
Y = rgnet(X)
print(Y)
# 输出:
# tensor([[0.0706],
# [0.0706]], grad_fn=<AddmmBackward0>)
print(rgnet)
# 输出:
# Sequential(
# (0): Sequential(
# (block 0): Sequential(
# (0): Linear(in_features=4, out_features=8, bias=True)
# (1): ReLU()
# (2): Linear(in_features=8, out_features=4, bias=True)
# (3): ReLU()
# )
# (block 1): Sequential(
# (0): Linear(in_features=4, out_features=8, bias=True)
# (1): ReLU()
# (2): Linear(in_features=8, out_features=4, bias=True)
# (3): ReLU()
# )
# (block 2): Sequential(
# (0): Linear(in_features=4, out_features=8, bias=True)
# (1): ReLU()
# (2): Linear(in_features=8, out_features=4, bias=True)
# (3): ReLU()
# )
# (block 3): Sequential(
# (0): Linear(in_features=4, out_features=8, bias=True)
# (1): ReLU()
# (2): Linear(in_features=8, out_features=4, bias=True)
# (3): ReLU()
# )
# )
# (1): Linear(in_features=4, out_features=1, bias=True)
# )
print(rgnet[0][1][0].bias.data)
# 输出:
# tensor([ 4.7546e-01, -2.5980e-01, 4.2100e-01, -3.5980e-02, 1.4627e-01,-2.0683e-04, -2.0049e-01, -1.8766e-01])
# ------------------------------初始化网络模型参数------------------------------------
# 内置参数初始化
def init_normal(m):
# 初始化全连接层的权重和偏移量
if type(m) == nn.Linear:
# 服从随机的均值为0,方差为0.01的正态分布
nn.init.normal_(m.weight, mean=0, std=0.01)
# 将偏移量初始化为0
nn.init.zeros_(m.bias)
net.apply(init_normal)
print(net[0].weight.data)
# 输出:
# tensor([[-0.0046, 0.0097, 0.0019, 0.0108],
# [ 0.0063, -0.0096, -0.0096, 0.0097],
# [ 0.0016, -0.0121, 0.0006, 0.0047],
# [ 0.0162, 0.0023, 0.0110, -0.0044],
# [-0.0017, -0.0074, 0.0021, -0.0002],
# [ 0.0084, 0.0022, -0.0017, 0.0205],
# [ 0.0012, 0.0087, 0.0021, 0.0025],
# [ 0.0013, 0.0096, -0.0081, -0.0019]])
print(net[0].bias.data)
# 输出:
# tensor([0., 0., 0., 0., 0., 0., 0., 0.])
def init_constant(m):
# 初始化全连接层的权重和偏移量
if type(m) == nn.Linear:
# 将权重w初始化为全1
nn.init.constant_(m.weight, 1)
# 将偏移量初始化为0
nn.init.zeros_(m.bias)
net.apply(init_constant)
print(net[0].weight.data)
# 输出:
# tensor([[1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.],
# [1., 1., 1., 1.]])
print(net[0].bias.data)
# 输出:
# tensor([0., 0., 0., 0., 0., 0., 0., 0.])
# xavier初始化
def init_xavier(m):
# 初始化全连接层的权重
if type(m) == nn.Linear:
# 从以0为中心,标准差为std = sqrt(2/(fan_in + fan_out))的正态分布中随机取样,其中fan_in为输入神经元的个数,fan_out为输出神经元的个数
nn.init.xavier_uniform_(m.weight)
def init_42(m):
# 初始化全连接层的权重
if type(m) == nn.Linear:
# 将权重初始化为全42
nn.init.constant_(m.weight, 42)
net[0].apply(init_xavier)
print(net[0].weight.data)
# 输出:
# tensor([[ 0.6856, -0.2490, -0.3304, -0.2262],
# [-0.5909, -0.1715, 0.1036, 0.6577],
# [-0.3842, -0.3241, -0.1618, 0.4961],
# [ 0.2342, 0.2458, -0.0167, -0.4840],
# [-0.0285, -0.6719, -0.3516, -0.4315],
# [ 0.1638, -0.0394, -0.5881, 0.5157],
# [-0.3652, 0.5219, 0.4790, 0.0345],
# [ 0.0080, -0.4427, 0.3405, -0.4380]])
net[2].apply(init_42)
print(net[2].weight.data)
# 输出:
# tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])
# 自定义参数初始化
def my_init(m):
if type(m) == nn.Linear:
nn.init.uniform_(m.weight, -10, 10)
m.weight.data *= m.weight.data.abs() >= 5
net.apply(my_init)
print(net[0].weight)
# 输出:
# tensor([[ 8.0693, -9.7628, -0.0000, -0.0000],
# [ 5.9148, -9.6592, -0.0000, -0.0000],
# [-5.9411, 0.0000, -9.4628, -9.1080],
# [-0.0000, -0.0000, 5.0042, -5.5144],
# [-0.0000, -0.0000, -0.0000, 0.0000],
# [ 0.0000, -6.3051, 9.3524, 6.3112],
# [ 0.0000, 0.0000, -0.0000, -0.0000],
# [ 7.5141, -0.0000, 8.2071, -0.0000]], requires_grad=True)
# ------------------------------修改网络模型参数------------------------------------
# 将权重参数全部加1
net[0].weight.data[:] += 1
# 将下标[0,0]的权重设置为42
net[0].weight.data[0, 0] = 42
print(net[0].weight)
# 输出:
# tensor([[42.0000, -8.7628, 1.0000, 1.0000],
# [ 6.9148, -8.6592, 1.0000, 1.0000],
# [-4.9411, 1.0000, -8.4628, -8.1080],
# [ 1.0000, 1.0000, 6.0042, -4.5144],
# [ 1.0000, 1.0000, 1.0000, 1.0000],
# [ 1.0000, -5.3051, 10.3524, 7.3112],
# [ 1.0000, 1.0000, 1.0000, 1.0000],
# [ 8.5141, 1.0000, 9.2071, 1.0000]], requires_grad=True)
# ------------------------------共享层------------------------------------
# 定义一个共享层
shared = nn.Linear(8, 8)
# 定义一个包含共享层的网络模型
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared, nn.ReLU(), nn.Linear(8, 1))
Y = net(X)
# 第二层和第四层是同一个共享层,所以两个层的权重参数相同
print(net[2].weight.data == net[4].weight.data)
# 输出:
# tensor([[True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True]])
# 更改共享层的权重参数
net[2].weight.data[0, 0] = 100
print(net[2].weight.data == net[4].weight.data)
# 输出:
# tensor([[True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True],
# [True, True, True, True, True, True, True, True]])
浙公网安备 33010602011771号