1 import torch
2 import torch.nn as nn
3 import time
4 import math
5 import sys
6 sys.path.append("/home/kesci/input")
7 import d2l_jay9460 as d2l
8 (corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics()
9 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
1 rnn_layer = nn.RNN(input_size=vocab_size, hidden_size=num_hiddens)
2 num_steps, batch_size = 35, 2
3 X = torch.rand(num_steps, batch_size, vocab_size)
4 state = None
5 Y, state_new = rnn_layer(X, state)
6 print(Y.shape, state_new.shape)
1 class RNNModel(nn.Module):
2 def __init__(self, rnn_layer, vocab_size):
3 super(RNNModel, self).__init__()
4 self.rnn = rnn_layer
5 self.hidden_size = rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1)
6 self.vocab_size = vocab_size
7 self.dense = nn.Linear(self.hidden_size, vocab_size)
8
9 def forward(self, inputs, state):
10 # inputs.shape: (batch_size, num_steps)
11 X = to_onehot(inputs, vocab_size)
12 X = torch.stack(X) # X.shape: (num_steps, batch_size, vocab_size)
13 hiddens, state = self.rnn(X, state)
14 hiddens = hiddens.view(-1, hiddens.shape[-1]) # hiddens.shape: (num_steps * batch_size, hidden_size)
15 output = self.dense(hiddens)
16 return output, state
1 def predict_rnn_pytorch(prefix, num_chars, model, vocab_size, device, idx_to_char,
2 char_to_idx):
3 state = None
4 output = [char_to_idx[prefix[0]]] # output记录prefix加上预测的num_chars个字符
5 for t in range(num_chars + len(prefix) - 1):
6 X = torch.tensor([output[-1]], device=device).view(1, 1)
7 (Y, state) = model(X, state) # 前向计算不需要传入模型参数
8 if t < len(prefix) - 1:
9 output.append(char_to_idx[prefix[t + 1]])
10 else:
11 output.append(Y.argmax(dim=1).item())
12 return ''.join([idx_to_char[i] for i in output])