TensorFlow11.5 循环神经网络RNN-LSTM、LSTM实战

LSTM的产生

我们之前在求RNN的loss的时候很容易出现梯度弥散或者梯度爆炸。这个LSTM的出现很大程度上减少了梯度弥散的情况。
image
还有一个很重要的就是RNN只能够记住一个比较短的序列,如果一个句子单词很多的话,它学到最后一个的时候可能它的前面的就忘记了。而这个LSTM能够改善这问题它能够学习一个比较长的序列。之前RNN是short-term-memory 加上这个long,就是long-short-term-memory就是这个LSTM。

之前的RNN:
image
image
我们在时间这个维度上展开就是这样的。
image

LSTM原理

image
这个门只是一个比喻。
image
image
这里相乘的符号就是一个信息的过滤,然后相加的符号就是一个信息的融合。

第一道门:忘记门
image
第二道门:输入门
image
image
第三道门:输出门
image

总的:
image
image

LSTM设计的核心就是这三道门,这有点像一个逻辑单元。
image
在这里我们会发现
1.如果输入门=0(关上),忘记门=1(打开)的话,这里会记住这个先前的,忘记现在的,也就是\(C_t\)==\(C_{t-1}\)
2.如果我们的输入门=1(打开),忘记门=1(打开)的话。他会添加上先前的值
3.如果我们的输入门=0(关闭),忘记门=0(关闭)的话。这个\(C_t\)=0+0,它就会清除掉这个value。
4.如果我们的输入门=1(打开),忘记门=0(关闭)的话。他就会用这个新建完全覆盖掉这个解。
然后这个输出门就是他也可以有选择的输出,不一定全部输出。

为什么LSTM可以解决这个梯度弥散的现象?
image

这个LSTM它没有\(W_{hh}^k\)他没有这个K次方出现,而他的梯度是一些因子相加,相互之间有制约性。最主要的就是它没有这个\(W_{hh}^k\)
image

LSTM实战

LSTM
在之前的RNN情感问题分类的基础上改变的。

GRU

  • simpler
  • lower computation cost

自定义网络

import  os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import  tensorflow as tf
import  numpy as np
from    tensorflow import keras
from    tensorflow.keras import layers


tf.random.set_seed(22)
np.random.seed(22) 
assert tf.__version__.startswith('2.')

batchsz = 128

# the most frequest words
total_words = 10000
max_review_len = 80
embedding_len = 100
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# x_train:[b, 80]
# x_test: [b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)

db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)



class MyRNN(keras.Model):

    def __init__(self, units):
        super(MyRNN, self).__init__()


        # transform text to embedding representation
        # [b, 80] => [b, 80, 100]
        self.embedding = layers.Embedding(total_words, embedding_len,
                                          input_length=max_review_len)

        # [b, 80, 100] , h_dim: 64
        self.rnn = keras.Sequential([
            # layers.SimpleRNN(units, dropout=0.5, return_sequences=True, unroll=True),
            # layers.SimpleRNN(units, dropout=0.5, unroll=True)

            layers.LSTM(units, dropout=0.5, return_sequences=True, unroll=True),
            layers.LSTM(units, dropout=0.5, unroll=True)
        ])


        # fc, [b, 80, 100] => [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)

    def call(self, inputs, training=None):
        """
        net(x) net(x, training=True) :train mode
        net(x, training=False): test
        :param inputs: [b, 80]
        :param training:
        :return:
        """
        # [b, 80]
        x = inputs
        # embedding: [b, 80] => [b, 80, 100]
        x = self.embedding(x)
        # rnn cell compute
        # x: [b, 80, 100] => [b, 64]
        x = self.rnn(x,training=training)

        # out: [b, 64] => [b, 1]
        x = self.outlayer(x)
        # p(y is pos|x)
        prob = tf.sigmoid(x)

        return prob

def main():
    units = 64
    epochs = 4

    import time

    t0 = time.time()

    model = MyRNN(units)
    model.compile(optimizer = keras.optimizers.Adam(0.001),
                  loss = tf.losses.BinaryCrossentropy(),
                  metrics=['accuracy'])
    model.fit(db_train, epochs=epochs, validation_data=db_test)

    model.evaluate(db_test)


    t1 = time.time()
    # 69.3 secnods, 83%
    print('total time cost:', t1-t0)


if __name__ == '__main__':
    main()

用kears中的API

import  os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import  tensorflow as tf
import  numpy as np
from    tensorflow import keras
from    tensorflow.keras import layers,Sequential


tf.random.set_seed(22)
np.random.seed(22)
assert tf.__version__.startswith('2.')

batchsz = 128
total_words = 10000
max_review_len = 80
embedding_len = 100
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# x_train:[b, 80]
# x_test: [b, 80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)


print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('y_train shape:', y_train.shape)
print('x_test shape:', x_test.shape)


model = Sequential([
    layers.Embedding(input_dim=total_words, output_dim=embedding_len, input_length=max_review_len),
    #layers.LSTM(64, dropout=0.5, return_sequences=True, unroll=True),
    layers.LSTM(64, dropout=0.5, return_sequences=True, unroll=True),
    layers.LSTM(64, dropout=0.5, unroll=True),
    layers.Dense(1,activation='sigmoid')
])


model.build()
model.summary()

model.compile(optimizer="Adamax",
        loss=keras.losses.BinaryCrossentropy(), metrics=["accuracy"])

epochs = 4
batch_size = 64
history = model.fit(x_train, y_train, epochs=epochs, batch_size=batchsz,
        validation_data=(x_test, y_test))


image

posted @ 2023-06-26 23:48  lipu123  阅读(113)  评论(0)    收藏  举报