用循环神经网络生成0^n 1^n形式的简单序列
题目详细如下:

源代码
import torch
import torch.nn as nn
import torch.optim as optim
import random
#数据准备与预处理
def generate_data(num_samples, max_n=5):
data = []
for _ in range(num_samples):
n = random.randint(1, max_n)
seq = '0' * n + '1' * n
data.append(seq)
return data
def encode_seq(seq, max_len):
encoded = [0 if c == '0' else 1 for c in seq]
padded = encoded + [2] * (max_len - len(encoded)) #2为填充符
return torch.tensor(padded, dtype=torch.long)
max_n = 5
max_seq_len = 2 * max_n
train_data = generate_data(1000, max_n)
#定义LSTM模型
class SeqGenerate(nn.Module):
def __init__(self, vocab_size=3, embed_dim=8, hidden_dim=16):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, vocab_size)
def forward(self, x, hidden=None):
# x形状:(batch_size, seq_len)
x = self.embedding(x)
x, hidden = self.lstm(x, hidden)
logits = self.fc(x)
return logits, hidden
#训练模型
model = SeqGenerate()
criterion = nn.CrossEntropyLoss(ignore_index=2)
optimizer = optim.Adam(model.parameters(), lr=0.001)
#训练循环
epochs = 50
batch_size = 32
for epoch in range(epochs):
total_loss = 0
random.shuffle(train_data)
for i in range(0, len(train_data), batch_size):
batch = train_data[i: (i+batch_size)]
inputs = [encode_seq(seq[:-1], max_seq_len-1) for seq in batch]
labels = [encode_seq(seq[1:], max_seq_len-1) for seq in batch]
inputs = torch.stack(inputs)
labels = torch.stack(labels)
optimizer.zero_grad()
logits, _ = model(inputs)
loss = criterion(logits.reshape(-1, 3), labels.reshape(-1))
loss.backward()
optimizer.step()
total_loss += loss.item()
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}, Loss:{total_loss / len(train_data):.4f}")
#生成 0n 1n序列
def generate_seq(model, start_char='0', target_n=3):
model.eval()
seq = [start_char]
hidden = None
for _ in range(target_n - 1):
input_tensor = torch.tensor([[0]], dtype=torch.long)
logits, hidden = model(input_tensor, hidden)
next_char = torch.argmax(logits, dim=-1).item()
seq.append('0' if next_char == 0 else '1')
for _ in range(target_n):
nput_tensor = torch.tensor([[1]], dtype=torch.long)
logits, hidden = model(input_tensor, hidden)
next_char = torch.argmax(logits, dim=-1).item()
seq.append('0' if next_char == 0 else '1')
return ''.join(seq)
generate_seq = generate_seq(model, target_n=3)
print(f"生成的0^n 1^n序列:{generate_seq}")
输出结果:

浙公网安备 33010602011771号