点击查看代码
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
# ====================== 1. 配置参数(初学者友好版) ======================
# 设备配置:优先使用GPU,没有则用CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 序列参数:n的取值范围(生成0^n1^n,n从1到5)
n_list = [1, 2, 3, 4, 5]
# 字符到数字的映射(编码)
char2idx = {'0': 0, '1': 1}
idx2char = {0: '0', 1: '1'}
vocab_size = len(char2idx) # 词汇表大小(0和1)
# RNN参数
input_size = vocab_size # 输入维度(one-hot编码)
hidden_size = 16 # 隐藏层维度(初学者可先固定)
num_layers = 1 # RNN层数(单层更简单)
learning_rate = 0.01 # 学习率
num_epochs = 1000 # 训练轮数
print_every = 200 # 每200轮打印一次训练结果
# ====================== 2. 生成训练数据 ======================
def generate_data(n_list):
"""生成0^n1^n形式的序列数据"""
data = []
for n in n_list:
# 生成序列:n个0 + n个1
seq = '0' * n + '1' * n
data.append(seq)
return data
# 生成训练数据
train_data = generate_data(n_list)
print("训练序列样本:", train_data)
# ====================== 3. 数据预处理(编码+转换为张量) ======================
def encode_sequence(seq):
"""将字符序列转换为数字编码的张量(one-hot)"""
# 第一步:字符转数字索引
idx_seq = [char2idx[char] for char in seq]
# 第二步:转换为one-hot编码(模型输入需要)
one_hot_seq = torch.zeros(len(seq), vocab_size).to(device)
for i, idx in enumerate(idx_seq):
one_hot_seq[i, idx] = 1.0
return one_hot_seq, torch.tensor(idx_seq).to(device)
# ====================== 4. 构建简单的RNN模型 ======================
class SimpleRNN(nn.Module):
def __init__(self, input_size, hidden_size, vocab_size, num_layers):
super(SimpleRNN, self).__init__()
self.hidden_size = hidden_size # 隐藏层维度
self.num_layers = num_layers # RNN层数
# 定义RNN层:输入维度=词汇表大小,隐藏层维度=hidden_size,层数=num_layers
self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
# 全连接层:将隐藏层输出映射到词汇表大小(预测0或1)
self.fc = nn.Linear(hidden_size, vocab_size)
def forward(self, x, hidden):
"""
前向传播
x: 输入张量 (batch_size, seq_len, input_size)
hidden: 隐藏状态张量 (num_layers, batch_size, hidden_size)
"""
# RNN前向传播
out, hidden = self.rnn(x, hidden)
# 调整形状:(batch_size*seq_len, hidden_size),方便全连接层处理
out = out.reshape(out.size(0)*out.size(1), out.size(2))
# 全连接层预测
out = self.fc(out)
return out, hidden
def init_hidden(self, batch_size):
"""初始化隐藏状态(全0)"""
return torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
# ====================== 5. 初始化模型、损失函数、优化器 ======================
model = SimpleRNN(input_size, hidden_size, vocab_size, num_layers).to(device)
criterion = nn.CrossEntropyLoss() # 交叉熵损失(分类任务)
optimizer = optim.Adam(model.parameters(), lr=learning_rate) # Adam优化器
# ====================== 6. 训练模型 ======================
print("\n开始训练模型...")
for epoch in range(num_epochs):
total_loss = 0
# 遍历所有训练序列
for seq in train_data:
# 编码序列:获取one-hot输入和数字标签
input_one_hot, target_idx = encode_sequence(seq)
# 调整输入形状:(batch_size=1, seq_len, input_size)
input_one_hot = input_one_hot.unsqueeze(0)
# 初始化隐藏状态
hidden = model.init_hidden(batch_size=1)
# 梯度清零(避免累积)
optimizer.zero_grad()
# 前向传播:逐个字符预测(除了最后一个字符)
loss = 0
for i in range(len(seq)-1):
# 取第i个字符作为输入,预测第i+1个字符
input_char = input_one_hot[:, i:i+1, :]
output, hidden = model(input_char, hidden)
# 计算损失:预测值 vs 第i+1个字符的标签
loss += criterion(output, target_idx[i+1:i+2])
# 反向传播+参数更新
loss.backward()
optimizer.step()
total_loss += loss.item()
# 打印训练进度
if (epoch+1) % print_every == 0:
avg_loss = total_loss / len(train_data)
print(f'Epoch [{epoch+1}/{num_epochs}], 平均损失: {avg_loss:.4f}')
# ====================== 7. 测试模型:生成0^n 1^n序列 ======================
print("\n测试模型生成序列...")
def generate_sequence(n):
"""
生成0^n 1^n序列
n: 0和1的个数
"""
# 初始输入:第一个字符是0(one-hot编码)
input_char = torch.tensor([[char2idx['0']]]).to(device)
input_one_hot = torch.zeros(1, 1, vocab_size).to(device)
input_one_hot[0, 0, input_char] = 1.0
# 初始化隐藏状态
hidden = model.init_hidden(batch_size=1)
# 存储生成的序列
generated_seq = ['0']
# 第一步:生成前n-1个0
for _ in range(n-1):
output, hidden = model(input_one_hot, hidden)
# 预测下一个字符(取概率最大的)
_, predicted_idx = torch.max(output, 1)
generated_seq.append(idx2char[predicted_idx.item()])
# 更新输入为当前预测的字符
input_one_hot = torch.zeros(1, 1, vocab_size).to(device)
input_one_hot[0, 0, predicted_idx] = 1.0
# 第二步:生成n个1
for _ in range(n):
output, hidden = model(input_one_hot, hidden)
_, predicted_idx = torch.max(output, 1)
generated_seq.append(idx2char[predicted_idx.item()])
return ''.join(generated_seq)
# 测试生成不同n的序列
for test_n in [1, 2, 3, 4, 5]:
generated_seq = generate_sequence(test_n)
expected_seq = '0'*test_n + '1'*test_n
print(f'n={test_n}: 预期序列={expected_seq}, 生成序列={generated_seq}, 匹配: {generated_seq == expected_seq}')