用循环神经网络生成0^n 1^n形式的简单序列

题目详细如下:

微信图片_20251211231041_46_13

源代码
import torch
import torch.nn as nn
import torch.optim as optim
import random

#数据准备与预处理
def generate_data(num_samples, max_n=5):
    data = []
    for _ in range(num_samples):
        n = random.randint(1, max_n)
        seq = '0' * n + '1' * n 
        data.append(seq)
    return data

def encode_seq(seq, max_len):
    encoded = [0 if c == '0' else 1 for c in seq]
    padded = encoded + [2] * (max_len - len(encoded))  #2为填充符
    return torch.tensor(padded, dtype=torch.long)

max_n = 5
max_seq_len = 2 * max_n
train_data = generate_data(1000, max_n)

#定义LSTM模型
class SeqGenerate(nn.Module):
    def __init__(self, vocab_size=3, embed_dim=8, hidden_dim=16):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)
    
    def forward(self, x, hidden=None):
        # x形状:(batch_size, seq_len)
        x = self.embedding(x)
        x, hidden = self.lstm(x, hidden)
        logits = self.fc(x)
        return logits, hidden

#训练模型
model = SeqGenerate()
criterion = nn.CrossEntropyLoss(ignore_index=2)
optimizer = optim.Adam(model.parameters(), lr=0.001)
#训练循环
epochs = 50
batch_size = 32
for epoch in range(epochs):
    total_loss = 0
    random.shuffle(train_data)

    for i in range(0, len(train_data), batch_size):
        batch = train_data[i: (i+batch_size)]
        inputs = [encode_seq(seq[:-1], max_seq_len-1) for seq in batch]
        labels = [encode_seq(seq[1:], max_seq_len-1) for seq in batch]
        inputs = torch.stack(inputs)
        labels = torch.stack(labels)

        optimizer.zero_grad()
        logits, _ = model(inputs)
        loss = criterion(logits.reshape(-1, 3), labels.reshape(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss:{total_loss / len(train_data):.4f}")

#生成 0n 1n序列
def generate_seq(model, start_char='0', target_n=3):
    model.eval()
    seq = [start_char]
    hidden = None

    for _ in range(target_n - 1):
        input_tensor = torch.tensor([[0]], dtype=torch.long)
        logits, hidden = model(input_tensor, hidden)
        next_char = torch.argmax(logits, dim=-1).item()
        seq.append('0' if next_char == 0 else '1')
    
    for _ in range(target_n):
        nput_tensor = torch.tensor([[1]], dtype=torch.long)
        logits, hidden = model(input_tensor, hidden)
        next_char = torch.argmax(logits, dim=-1).item()
        seq.append('0' if next_char == 0 else '1')
    
    return ''.join(seq)

generate_seq = generate_seq(model, target_n=3)
print(f"生成的0^n 1^n序列:{generate_seq}")

输出结果:

屏幕截图 2025-12-11 230755

posted @ 2025-12-11 23:14  与尔5  阅读(4)  评论(0)    收藏  举报