pad_sequence、pack_padded_sequence和pad_packed_sequence函数

pad_sequence

torch.nn.utils.rnn.pad_sequence 是一个用于填充变长序列的工具函数,常用于自然语言处理(NLP)或时间序列任务中,将不同长度的序列批量处理为相同长度。

import torch
from torch.nn.utils.rnn import pad_sequence

# 假设3个句子(单词ID序列),长度分别为4, 3, 5
sequences = [
    torch.tensor([1, 2, 3, 4]),          # 长度4
    torch.tensor([5, 6, 7]),             # 长度3
    torch.tensor([8, 9, 10, 11, 12])     # 长度5
]

# 填充为(max_len, batch_size)
padded = pad_sequence(sequences, batch_first=True, padding_value=0)
print(padded)
#填充后的结果:可以得知,是按照同一批次最大序列长度进行填充
#tensor([[ 1,  2,  3,  4,  0],
#        [ 5,  6,  7,  0,  0],
#        [ 8,  9, 10, 11, 12]])

pack_padded_sequence和pad_packed_sequence函数

在PyTorch中,pack_padded_sequence和pad_packed_sequence是一对用于高效处理变长序列的函数,尤其在RNN/LSTM/GRU等循环神经网络中,可以跳过填充(Padding)部分的计算,节省内存和计算资源。

import torch
from torch.nn.utils.rnn import pad_sequence
from torch.nn.utils.rnn import pack_padded_sequence,pad_packed_sequence

# 假设3个句子(单词ID序列),长度分别为4, 3, 5
sequences = [
    torch.tensor([1, 2, 3, 4]),          # 长度4
    torch.tensor([5, 6, 7]),             # 长度3
    torch.tensor([8, 9, 10, 11, 12])     # 长度5
]

padded = pad_sequence(sequences, batch_first=True, padding_value=0)
print(padded)

packed = pack_padded_sequence(input=padded, lengths=[4, 3, 5], batch_first=True,enforce_sorted=False)
print(packed)

unpacked,length =pad_packed_sequence(packed, batch_first=True,padding_value=0)
print(unpacked)

正常训练时如何设置:

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

# 假设3个变长序列(单词ID),每个索引代表一个单词。可以设置index2word列表来进行映射。
sequences = [
    torch.tensor([1, 2, 3]),       # 长度3
    torch.tensor([4, 5]),          # 长度2
    torch.tensor([6, 7, 8, 9])     # 长度4
]

# 1. 填充原始序列 (batch_first=True),按照批次最大长度进行填充
padded_input = pad_sequence(sequences, batch_first=True, padding_value=0)
print("Padded Input (word IDs):\n", padded_input)

# 2. 词嵌入(假设词表大小=10,嵌入维度=16),设置0索引的向量都为0
embedding = nn.Embedding(num_embeddings=10, embedding_dim=16, padding_idx=0)
# 形状: [batch_size, max_seq_len, embed_dim],表示为三个句子,每个句子最大长度,每个词向量维度
embedded = embedding(padded_input)  
print("Embedded Shape:", embedded.shape)

# 3. 获取实际长度并排序(RNN效率优化)
lengths = torch.tensor([len(seq) for seq in sequences])
lengths, sort_idx = lengths.sort(descending=True)
embedded = embedded[sort_idx]  # 排序后的嵌入张量

# 4. 打包序列(跳过填充部分),不会计算pad值
packed = pack_padded_sequence(embedded, lengths, batch_first=True)
print("Packed Data Shape:", packed.data.shape)

# 5. 输入RNN(假设单向LSTM)
rnn = nn.LSTM(input_size=16, hidden_size=64, batch_first=True)
output, (hn, cn) = rnn(packed)

# 6. 解包输出
unpacked_output, _ = pad_packed_sequence(output, batch_first=True, padding_value=0)
print("Unpacked Output Shape:", unpacked_output.shape)

# 7. 恢复原始顺序(可选)
_, unsort_idx = sort_idx.sort()
unpacked_output = unpacked_output[unsort_idx]
posted @ 2025-06-16 16:45  CodeCraftsMan  阅读(112)  评论(0)    收藏  举报