# RNNCell、LSTMCell、tf.nn.static_rnn、tf.nn.static_bidirectional_rnn和tf.nn.bidirectional_dynamic_rnn

@deprecation.deprecated(None,
"Please use keras.layers.RNN(cell, unroll=True), "
"which is equivalent to this API")
@tf_export(v1=["nn.static_rnn"])
def static_rnn(cell,
inputs,
initial_state=None,
dtype=None,
sequence_length=None,
scope=None):
"""Creates a recurrent neural network specified by RNNCell cell.

The simplest form of RNN network generated is:

python
state = cell.zero_state(...)  # Cell状态
outputs = []
for input_ in inputs:
output, state = cell(input_, state)  # 根据input_和state更新cell。
outputs.append(output)
return (outputs, state)  # outputs是每个时间步的output，state是最终的细胞状态。

However, a few other options are available:

An initial state can be provided.

If the sequence_length vector is provided, dynamic calculation is performed.
This method of calculation does not compute the RNN steps past the maximum
sequence length of the minibatch (thus saving computational time),
and properly propagates the state at an example's sequence length
to the final state output.

The dynamic calculation performed is, at time t for batch row b,

python
(output, state)(b, t) =
(t >= sequence_length(b))
? (zeros(cell.output_size), states(b, sequence_length(b) - 1))  # zeros，states(剩余的长度)
: cell(input(b, t), state(b, t - 1))


Args:
cell: An instance of RNNCell.
cell：一个RNNCell实例。

inputs: A length T list of inputs, each a Tensor of shape [batch_size,
input_size], or a nested tuple of such elements.
inputs：长度为T的inputs。每个Tensor的形状是[batch_size, input_size]，或者嵌套的这种元素。

initial_state: (optional) An initial state for the RNN. If cell.state_size
is an integer, this must be a Tensor of appropriate type and shape
[batch_size, cell.state_size]. If cell.state_size is a tuple, this
should be a tuple of tensors having shapes [batch_size, s] for s in
cell.state_size.
initial_state：可选，RNN的初始状态。

dtype: (optional) The data type for the initial state and expected output.
Required if initial_state is not provided or RNN state has a heterogeneous
dtype.
dtype：可选，初始状态和期望输出的数据类型。如果初始状态没有提供或者RNN状态是合成类型，那必须提供。

sequence_length: Specifies the length of each sequence in inputs. An int32
or int64 vector (tensor) size [batch_size], values in [0, T).
sequence_length：指定每个inputs序列的长度。一个int32或int64的向量，size是[batch_size]，值是[0, T)

scope: VariableScope for the created subgraph; defaults to "rnn".
scope：用于创建子图的变量作用域，默认rnn。

Returns:
A pair (outputs, state) where:

- outputs is a length T list of outputs (one for each input), or a nested
tuple of such elements.
outputs：长度为T的outputs，每个output对应一个input。或者嵌套的这种元素。
- state is the final state
state：最终状态。

Raises:
TypeError: If cell is not an instance of RNNCell.
ValueError: If inputs is None or an empty list, or if the input depth
(column size) cannot be inferred from inputs via shape inference.
"""

@deprecation.deprecated(None, "Please use keras.layers.Bidirectional("
"keras.layers.RNN(cell, unroll=True)), which is "
"equivalent to this API")
@tf_export(v1=["nn.static_bidirectional_rnn"])
def static_bidirectional_rnn(cell_fw,
cell_bw,
inputs,
initial_state_fw=None,
initial_state_bw=None,
dtype=None,
sequence_length=None,
scope=None):
"""Creates a bidirectional recurrent neural network.

Similar to the unidirectional case above (rnn) but takes input and builds
independent forward and backward RNNs with the final forward and backward
outputs depth-concatenated, such that the output will have the format
[time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of
forward and backward cell must match. The initial state for both directions
is zero by default (but can be set optionally) and no intermediate states are
ever returned -- the network is fully unrolled for the given (passed in)
length(s) of the sequence(s) or completely unrolled if length(s) is not given.

Args:
cell_fw: An instance of RNNCell, to be used for forward direction.
cell_bw: An instance of RNNCell, to be used for backward direction.
inputs: A length T list of inputs, each a tensor of shape [batch_size,
input_size], or a nested tuple of such elements.
initial_state_fw: (optional) An initial state for the forward RNN. This must
be a tensor of appropriate type and shape [batch_size,
cell_fw.state_size]. If cell_fw.state_size is a tuple, this should be a
tuple of tensors having shapes [batch_size, s] for s in
cell_fw.state_size.
initial_state_bw: (optional) Same as for initial_state_fw, but using the
corresponding properties of cell_bw.
dtype: (optional) The data type for the initial state.  Required if either
of the initial states are not provided.
sequence_length: (optional) An int32/int64 vector, size [batch_size],
containing the actual lengths for each of the sequences.
scope: VariableScope for the created subgraph; defaults to
"bidirectional_rnn"

Returns:
A tuple (outputs, output_state_fw, output_state_bw) where:
outputs is a length T list of outputs (one for each input), which
are depth-concatenated forward and backward outputs.
output_state_fw is the final state of the forward rnn.
output_state_bw is the final state of the backward rnn.

Raises:
TypeError: If cell_fw or cell_bw is not an instance of RNNCell.
ValueError: If inputs is None or an empty list.
"""

@deprecation.deprecated(None, "Please use keras.layers.Bidirectional("
"keras.layers.RNN(cell)), which is equivalent to "
"this API")
@tf_export(v1=["nn.bidirectional_dynamic_rnn"])
def bidirectional_dynamic_rnn(cell_fw,
cell_bw,
inputs,
sequence_length=None,
initial_state_fw=None,
initial_state_bw=None,
dtype=None,
parallel_iterations=None,
swap_memory=False,
time_major=False,
scope=None):
"""Creates a dynamic version of bidirectional recurrent neural network.

Takes input and builds independent forward and backward RNNs. The input_size
of forward and backward cell must match. The initial state for both directions
is zero by default (but can be set optionally) and no intermediate states are
ever returned -- the network is fully unrolled for the given (passed in)
length(s) of the sequence(s) or completely unrolled if length(s) is not
given.

Args:
cell_fw: An instance of RNNCell, to be used for forward direction.
cell_bw: An instance of RNNCell, to be used for backward direction.
inputs: The RNN inputs.
If time_major == False (default), this must be a tensor of shape:
[batch_size, max_time, ...], or a nested tuple of such elements.
If time_major == True, this must be a tensor of shape: [max_time,
batch_size, ...], or a nested tuple of such elements.
sequence_length: (optional) An int32/int64 vector, size [batch_size],
containing the actual lengths for each of the sequences in the batch. If
not provided, all batch entries are assumed to be full sequences; and time
reversal is applied from time 0 to max_time for each sequence.
sequence_length：可选的，一个int32/int64向量，size是[batch_size]，包含了一个batch中每个序列的真实长度，如果没有提供，就认为batch中的序列都是完整的，时间反转会将0-max_time应用到每个序列。

initial_state_fw: (optional) An initial state for the forward RNN. This must
be a tensor of appropriate type and shape [batch_size,
cell_fw.state_size]. If cell_fw.state_size is a tuple, this should be a
tuple of tensors having shapes [batch_size, s] for s in
cell_fw.state_size.
initial_state_bw: (optional) Same as for initial_state_fw, but using the
corresponding properties of cell_bw.
dtype: (optional) The data type for the initial states and expected output.
Required if initial_states are not provided or RNN states have a
heterogeneous dtype.
parallel_iterations: (Default: 32).  The number of iterations to run in
parallel.  Those operations which do not have any temporal dependency and
can be run in parallel, will be.  This parameter trades off time for
space.  Values >> 1 use more memory but take less time, while smaller
values use less memory but computations take longer.
swap_memory: Transparently swap the tensors produced in forward inference
but needed for back prop from GPU to CPU.  This allows training RNNs which
would typically not fit on a single GPU, with very minimal (or no)
performance penalty.
time_major: The shape format of the inputs and outputs Tensors. If true,
these Tensors must be shaped [max_time, batch_size, depth]. If false,
these Tensors must be shaped [batch_size, max_time, depth]. Using
time_major = True is a bit more efficient because it avoids transposes
at the beginning and end of the RNN calculation.  However, most TensorFlow
data is batch-major, so by default this function accepts input and emits
output in batch-major form.
scope: VariableScope for the created subgraph; defaults to
"bidirectional_rnn"

Returns:
A tuple (outputs, output_states) where:
outputs: A tuple (output_fw, output_bw) containing the forward and
the backward rnn output Tensor.
If time_major == False (default),
output_fw will be a Tensor shaped:
[batch_size, max_time, cell_fw.output_size]
and output_bw will be a Tensor shaped:
[batch_size, max_time, cell_bw.output_size].
If time_major == True,
output_fw will be a Tensor shaped:
[max_time, batch_size, cell_fw.output_size]
and output_bw will be a Tensor shaped:
[max_time, batch_size, cell_bw.output_size].
It returns a tuple instead of a single concatenated Tensor, unlike
in the bidirectional_rnn. If the concatenated one is preferred,
the forward and backward outputs can be concatenated as
tf.concat(outputs, 2).
output_states: A tuple (output_state_fw, output_state_bw) containing
the forward and the backward final states of bidirectional rnn.

Raises:
TypeError: If cell_fw or cell_bw is not an instance of RNNCell.
"""

@tf_export("nn.rnn_cell.RNNCell")
class RNNCell(base_layer.Layer):
"""Abstract object representing an RNN cell.

Every RNNCell must have the properties below and implement call with
the signature (output, next_state) = call(input, state).  The optional
third input argument, scope, is allowed for backwards compatibility
purposes; but should be left off for new subclasses.

This definition of cell differs from the definition used in the literature.
In the literature, 'cell' refers to an object with a single scalar output.
This definition refers to a horizontal array of such units.

An RNN cell, in the most abstract setting, is anything that has
a state and performs some operation that takes a matrix of inputs.
This operation results in an output matrix with self.output_size columns.
If self.state_size is an integer, this operation also results in a new
state matrix with self.state_size columns.  If self.state_size is a
(possibly nested tuple of) TensorShape object(s), then it should return a
matching structure of Tensors having shape [batch_size].concatenate(s)
for each s in self.batch_size.
"""

def __call__(self, inputs, state, scope=None):
"""Run this RNN cell on inputs, starting from the given state.

Args:
inputs: 2-D tensor with shape [batch_size, input_size].
state: if self.state_size is an integer, this should be a 2-D Tensor
with shape [batch_size, self.state_size].  Otherwise, if
self.state_size is a tuple of integers, this should be a tuple
with shapes [batch_size, s] for s in self.state_size.
scope: VariableScope for the created subgraph; defaults to class name.

Returns:
A pair containing:

- Output: A 2-D tensor with shape [batch_size, self.output_size].
- New state: Either a single 2-D tensor, or a tuple of tensors matching
the arity and shapes of state.
"""

class LayerRNNCell(RNNCell):
"""Subclass of RNNCells that act like proper tf.Layer objects.

For backwards compatibility purposes, most RNNCell instances allow their
call methods to instantiate variables via tf.get_variable.  The underlying
variable scope thus keeps track of any variables, and returning cached
versions.  This is atypical of tf.layer objects, which separate this
part of layer building into a build method that is only called once.

Here we provide a subclass for RNNCell objects that act exactly as
Layer objects do.  They must provide a build method and their
call methods do not access Variables tf.get_variable.
"""

@tf_export(v1=["nn.rnn_cell.LSTMCell"])
class LSTMCell(LayerRNNCell):
"""Long short-term memory unit (LSTM) recurrent network cell.

The default non-peephole implementation is based on:

https://pdfs.semanticscholar.org/1154/0131eae85b2e11d53df7f1360eeb6476e7f4.pdf

Felix Gers, Jurgen Schmidhuber, and Fred Cummins.
"Learning to forget: Continual prediction with LSTM." IET, 850-855, 1999.

The peephole implementation is based on:

Hasim Sak, Andrew Senior, and Francoise Beaufays.
"Long short-term memory recurrent neural network architectures for
large scale acoustic modeling." INTERSPEECH, 2014.

The class uses optional peep-hole connections, optional cell clipping, and
an optional projection layer.

Note that this cell is not optimized for performance. Please use
tf.contrib.cudnn_rnn.CudnnLSTM for better performance on GPU, or
tf.contrib.rnn.LSTMBlockCell and tf.contrib.rnn.LSTMBlockFusedCell for
better performance on CPU.
"""
@deprecated(None, "This class is equivalent as tf.keras.layers.LSTMCell,"
" and will be replaced by that in Tensorflow 2.0.")
def __init__(self, num_units,
use_peepholes=False, cell_clip=None,
initializer=None, num_proj=None, proj_clip=None,
num_unit_shards=None, num_proj_shards=None,
forget_bias=1.0, state_is_tuple=True,
activation=None, reuse=None, name=None, dtype=None, **kwargs):
"""Initialize the parameters for an LSTM cell.

Args:
num_units: int, The number of units in the LSTM cell.
use_peepholes: bool, set True to enable diagonal/peephole connections.
cell_clip: (optional) A float value, if provided the cell state is clipped
by this value prior to the cell output activation.
initializer: (optional) The initializer to use for the weight and
projection matrices.
num_proj: (optional) int, The output dimensionality for the projection
matrices.  If None, no projection is performed.
proj_clip: (optional) A float value.  If num_proj > 0 and proj_clip is
provided, then the projected values are clipped elementwise to within
[-proj_clip, proj_clip].
num_unit_shards: Deprecated, will be removed by Jan. 2017.
Use a variable_scope partitioner instead.
num_proj_shards: Deprecated, will be removed by Jan. 2017.
Use a variable_scope partitioner instead.
forget_bias: Biases of the forget gate are initialized by default to 1
in order to reduce the scale of forgetting at the beginning of
the training. Must set it manually to 0.0 when restoring from
CudnnLSTM trained checkpoints.
state_is_tuple: If True, accepted and returned states are 2-tuples of
the c_state and m_state.  If False, they are concatenated
along the column axis.  This latter behavior will soon be deprecated.
activation: Activation function of the inner states.  Default: tanh. It
could also be string that is within Keras activation function names.
reuse: (optional) Python boolean describing whether to reuse variables
in an existing scope.  If not True, and the existing scope already has
the given variables, an error is raised.
name: String, the name of the layer. Layers with the same name will
share weights, but to avoid mistakes we require reuse=True in such
cases.
dtype: Default dtype of the layer (default of None means use the type
of the first input). Required when build is called before call.
**kwargs: Dict, keyword named properties for common layer attributes, like
trainable etc when constructing the cell from configs of get_config().

When restoring from CudnnLSTM-trained checkpoints, use
CudnnCompatibleLSTMCell instead.
"""

posted @ 2020-07-31 20:30  ZH奶酪  阅读(108)  评论(0编辑  收藏