损失函数
损失函数
https://github.com/keras-team/keras/blob/master/keras/losses.py
| """Built-in loss functions. | |
| """ | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| import abc | |
| import six | |
| from . import backend as K | |
| from .utils import losses_utils | |
| from .utils.generic_utils import deserialize_keras_object | |
| from .utils.generic_utils import serialize_keras_object | |
| @six.add_metaclass(abc.ABCMeta) | |
| class Loss(object): | |
| """Loss base class. | |
| To be implemented by subclasses: | |
| * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`. | |
| Example subclass implementation: | |
| ```python | |
| class MeanSquaredError(Loss): | |
| def call(self, y_true, y_pred): | |
| y_pred = ops.convert_to_tensor(y_pred) | |
| y_true = math_ops.cast(y_true, y_pred.dtype) | |
| return K.mean(math_ops.square(y_pred - y_true), axis=-1) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss Reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: Optional name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name=None): | |
| self.reduction = reduction | |
| self.name = name | |
| def __call__(self, y_true, y_pred, sample_weight=None): | |
| """Invokes the `Loss` instance. | |
| # Arguments | |
| y_true: Ground truth values. | |
| y_pred: The predicted values. | |
| sample_weight: Optional `Tensor` whose rank is either 0, or the same rank | |
| as `y_true`, or is broadcastable to `y_true`. `sample_weight` acts as a | |
| coefficient for the loss. If a scalar is provided, then the loss is | |
| simply scaled by the given value. If `sample_weight` is a tensor of size | |
| `[batch_size]`, then the total loss for each sample of the batch is | |
| rescaled by the corresponding element in the `sample_weight` vector. If | |
| the shape of `sample_weight` matches the shape of `y_pred`, then the | |
| loss of each measurable element of `y_pred` is scaled by the | |
| corresponding value of `sample_weight`. | |
| # Returns | |
| Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same | |
| shape as `y_true`; otherwise, it is scalar. | |
| # Raises | |
| ValueError: If the shape of `sample_weight` is invalid. | |
| """ | |
| # If we are wrapping a lambda function strip '<>' from the name as it is not | |
| # accepted in scope name. | |
| scope_name = 'lambda' if self.name == '<lambda>' else self.name | |
| with K.name_scope(scope_name): | |
| losses = self.call(y_true, y_pred) | |
| return losses_utils.compute_weighted_loss( | |
| losses, sample_weight, reduction=self.reduction) | |
| @classmethod | |
| def from_config(cls, config): | |
| """Instantiates a `Loss` from its config (output of `get_config()`). | |
| # Arguments | |
| config: Output of `get_config()`. | |
| # Returns | |
| A `Loss` instance. | |
| """ | |
| return cls(**config) | |
| def get_config(self): | |
| return {'reduction': self.reduction, 'name': self.name} | |
| @abc.abstractmethod | |
| def call(self, y_true, y_pred): | |
| """Invokes the `Loss` instance. | |
| # Arguments | |
| y_true: Ground truth values, with the same shape as 'y_pred'. | |
| y_pred: The predicted values. | |
| """ | |
| raise NotImplementedError('Must be implemented in subclasses.') | |
| class LossFunctionWrapper(Loss): | |
| """Wraps a loss function in the `Loss` class. | |
| # Arguments | |
| fn: The loss function to wrap, with signature `fn(y_true, y_pred, | |
| **kwargs)`. | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) name for the loss. | |
| **kwargs: The keyword arguments that are passed on to `fn`. | |
| """ | |
| def __init__(self, | |
| fn, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name=None, | |
| **kwargs): | |
| super(LossFunctionWrapper, self).__init__(reduction=reduction, name=name) | |
| self.fn = fn | |
| self._fn_kwargs = kwargs | |
| def call(self, y_true, y_pred): | |
| """Invokes the `LossFunctionWrapper` instance. | |
| # Arguments | |
| y_true: Ground truth values. | |
| y_pred: The predicted values. | |
| # Returns | |
| Loss values per sample. | |
| """ | |
| return self.fn(y_true, y_pred, **self._fn_kwargs) | |
| def get_config(self): | |
| config = {} | |
| for k, v in six.iteritems(self._fn_kwargs): | |
| config[k] = K.eval(v) if K.is_tensor(v) or K.is_variable(v) else v | |
| base_config = super(LossFunctionWrapper, self).get_config() | |
| return dict(list(base_config.items()) + list(config.items())) | |
| class MeanSquaredError(LossFunctionWrapper): | |
| """Computes the mean of squares of errors between labels and predictions. | |
| Standalone usage: | |
| ```python | |
| mse = keras.losses.MeanSquaredError() | |
| loss = mse([0., 0., 1., 1.], [1., 1., 1., 0.]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.MeanSquaredError()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) name for the loss. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='mean_squared_error'): | |
| super(MeanSquaredError, self).__init__( | |
| mean_squared_error, name=name, reduction=reduction) | |
| class MeanAbsoluteError(LossFunctionWrapper): | |
| """Computes the mean of absolute difference between labels and predictions. | |
| Standalone usage: | |
| ```python | |
| mae = keras.losses.MeanAbsoluteError() | |
| loss = mae([0., 0., 1., 1.], [1., 1., 1., 0.]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.MeanAbsoluteError()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) name for the loss. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='mean_absolute_error'): | |
| super(MeanAbsoluteError, self).__init__( | |
| mean_absolute_error, name=name, reduction=reduction) | |
| class MeanAbsolutePercentageError(LossFunctionWrapper): | |
| """Computes the mean absolute percentage error between `y_true` and `y_pred`. | |
| Standalone usage: | |
| ```python | |
| mape = keras.losses.MeanAbsolutePercentageError() | |
| loss = mape([0., 0., 1., 1.], [1., 1., 1., 0.]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.MeanAbsolutePercentageError()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) name for the loss. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='mean_absolute_percentage_error'): | |
| super(MeanAbsolutePercentageError, self).__init__( | |
| mean_absolute_percentage_error, name=name, reduction=reduction) | |
| class MeanSquaredLogarithmicError(LossFunctionWrapper): | |
| """Computes the mean squared logarithmic error between `y_true` and `y_pred`. | |
| Standalone usage: | |
| ```python | |
| msle = keras.losses.MeanSquaredLogarithmicError() | |
| loss = msle([0., 0., 1., 1.], [1., 1., 1., 0.]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.MeanSquaredLogarithmicError()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) name for the loss. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='mean_squared_logarithmic_error'): | |
| super(MeanSquaredLogarithmicError, self).__init__( | |
| mean_squared_logarithmic_error, name=name, reduction=reduction) | |
| class BinaryCrossentropy(LossFunctionWrapper): | |
| """Computes the cross-entropy loss between true labels and predicted labels. | |
| Use this cross-entropy loss when there are only two label classes (assumed to | |
| be 0 and 1). For each example, there should be a single floating-point value | |
| per prediction. | |
| In the snippet below, each of the four examples has only a single | |
| floating-pointing value, and both `y_pred` and `y_true` have the shape | |
| `[batch_size]`. | |
| Standalone usage: | |
| ```python | |
| bce = keras.losses.BinaryCrossentropy() | |
| loss = bce([0., 0., 1., 1.], [1., 1., 1., 0.]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.BinaryCrossentropy()) | |
| ``` | |
| # Arguments | |
| from_logits: Whether to interpret `y_pred` as a tensor of | |
| [logit](https://en.wikipedia.org/wiki/Logit) values. By default, | |
| we assume that `y_pred` contains probabilities | |
| (i.e., values in [0, 1]). | |
| label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > 0, we | |
| compute the loss between the predicted labels and a smoothed version of | |
| the true labels, where the smoothing squeezes the labels towards 0.5. | |
| Larger values of `label_smoothing` correspond to heavier smoothing. | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| from_logits=False, | |
| label_smoothing=0, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='binary_crossentropy'): | |
| super(BinaryCrossentropy, self).__init__( | |
| binary_crossentropy, | |
| name=name, | |
| reduction=reduction, | |
| from_logits=from_logits, | |
| label_smoothing=label_smoothing) | |
| self.from_logits = from_logits | |
| class CategoricalCrossentropy(LossFunctionWrapper): | |
| """Computes the crossentropy loss between the labels and predictions. | |
| Use this crossentropy loss function when there are two or more label classes. | |
| We expect labels to be provided in a `one_hot` representation. If you want to | |
| provide labels as integers, please use `SparseCategoricalCrossentropy` loss. | |
| There should be `# classes` floating point values per feature. | |
| In the snippet below, there is `# classes` floating pointing values per | |
| example. The shape of both `y_pred` and `y_true` are | |
| `[batch_size, num_classes]`. | |
| Standalone usage: | |
| ```python | |
| cce = keras.losses.CategoricalCrossentropy() | |
| loss = cce( | |
| [[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], | |
| [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.CategoricalCrossentropy()) | |
| ``` | |
| # Arguments | |
| from_logits: Whether to interpret `y_pred` as a tensor of | |
| [logit](https://en.wikipedia.org/wiki/Logit) values. By default, | |
| we assume that `y_pred` contains probabilities | |
| (i.e., values in [0, 1]). | |
| label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > 0, we | |
| compute the loss between the predicted labels and a smoothed version of | |
| the true labels, where the smoothing squeezes the labels towards 0.5. | |
| Larger values of `label_smoothing` correspond to heavier smoothing. | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| from_logits=False, | |
| label_smoothing=0, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='categorical_crossentropy'): | |
| super(CategoricalCrossentropy, self).__init__( | |
| categorical_crossentropy, | |
| name=name, | |
| reduction=reduction, | |
| from_logits=from_logits, | |
| label_smoothing=label_smoothing) | |
| class SparseCategoricalCrossentropy(LossFunctionWrapper): | |
| """Computes the crossentropy loss between the labels and predictions. | |
| Use this crossentropy loss function when there are two or more label classes. | |
| We expect labels to be provided as integers. If you want to provide labels | |
| using `one-hot` representation, please use `CategoricalCrossentropy` loss. | |
| There should be `# classes` floating point values per feature for `y_pred` | |
| and a single floating point value per feature for `y_true`. | |
| In the snippet below, there is a single floating point value per example for | |
| `y_true` and `# classes` floating pointing values per example for `y_pred`. | |
| The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is | |
| `[batch_size, num_classes]`. | |
| Standalone usage: | |
| ```python | |
| cce = keras.losses.SparseCategoricalCrossentropy() | |
| loss = cce( | |
| [0, 1, 2], | |
| [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.SparseCategoricalCrossentropy()) | |
| ``` | |
| # Arguments | |
| from_logits: Whether to interpret `y_pred` as a tensor of | |
| [logit](https://en.wikipedia.org/wiki/Logit) values. By default, | |
| we assume that `y_pred` contains probabilities | |
| (i.e., values in [0, 1]). | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| from_logits=False, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='sparse_categorical_crossentropy'): | |
| super(SparseCategoricalCrossentropy, self).__init__( | |
| sparse_categorical_crossentropy, | |
| name=name, | |
| reduction=reduction, | |
| from_logits=from_logits) | |
| class Hinge(LossFunctionWrapper): | |
| """Computes the hinge loss between `y_true` and `y_pred`. | |
| `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are | |
| provided we will convert them to -1 or 1. | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.Hinge()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='hinge'): | |
| super(Hinge, self).__init__(hinge, name=name, reduction=reduction) | |
| class SquaredHinge(LossFunctionWrapper): | |
| """Computes the squared hinge loss between `y_true` and `y_pred`. | |
| `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are | |
| provided we will convert them to -1 or 1. | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.SquaredHinge()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='squared_hinge'): | |
| super(SquaredHinge, self).__init__( | |
| squared_hinge, name=name, reduction=reduction) | |
| class CategoricalHinge(LossFunctionWrapper): | |
| """Computes the categorical hinge loss between `y_true` and `y_pred`. | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.CategoricalHinge()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='categorical_hinge'): | |
| super(CategoricalHinge, self).__init__( | |
| categorical_hinge, name=name, reduction=reduction) | |
| class Poisson(LossFunctionWrapper): | |
| """Computes the Poisson loss between `y_true` and `y_pred`. | |
| `loss = y_pred - y_true * log(y_pred)` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.Poisson()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='poisson'): | |
| super(Poisson, self).__init__(poisson, name=name, reduction=reduction) | |
| class LogCosh(LossFunctionWrapper): | |
| """Computes the logarithm of the hyperbolic cosine of the prediction error. | |
| `logcosh = log((exp(x) + exp(-x))/2)`, | |
| where x is the error (y_pred - y_true) | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.LogCosh()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='logcosh'): | |
| super(LogCosh, self).__init__(logcosh, name=name, reduction=reduction) | |
| class KLDivergence(LossFunctionWrapper): | |
| """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`. | |
| `loss = y_true * log(y_true / y_pred)` | |
| See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.KLDivergence()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='kullback_leibler_divergence'): | |
| super(KLDivergence, self).__init__( | |
| kullback_leibler_divergence, name=name, reduction=reduction) | |
| class Huber(LossFunctionWrapper): | |
| """Computes the Huber loss between `y_true` and `y_pred`. | |
| Given `x = y_true - y_pred`: | |
| ``` | |
| loss = 0.5 * x^2 if |x| <= d | |
| loss = 0.5 * d^2 + d * (|x| - d) if |x| > d | |
| ``` | |
| where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.Huber()) | |
| ``` | |
| # Arguments | |
| delta: A float, the point where the Huber loss function changes from a | |
| quadratic to linear. | |
| reduction: (Optional) Type of reduction to apply to loss. | |
| name: Optional name for the object. | |
| """ | |
| def __init__(self, | |
| delta=1.0, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='huber_loss'): | |
| super(Huber, self).__init__( | |
| huber_loss, name=name, reduction=reduction, delta=delta) | |
| def mean_squared_error(y_true, y_pred): | |
| if not K.is_tensor(y_pred): | |
| y_pred = K.constant(y_pred) | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| return K.mean(K.square(y_pred - y_true), axis=-1) | |
| def mean_absolute_error(y_true, y_pred): | |
| if not K.is_tensor(y_pred): | |
| y_pred = K.constant(y_pred) | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| return K.mean(K.abs(y_pred - y_true), axis=-1) | |
| def mean_absolute_percentage_error(y_true, y_pred): | |
| if not K.is_tensor(y_pred): | |
| y_pred = K.constant(y_pred) | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), | |
| K.epsilon(), | |
| None)) | |
| return 100. * K.mean(diff, axis=-1) | |
| def mean_squared_logarithmic_error(y_true, y_pred): | |
| if not K.is_tensor(y_pred): | |
| y_pred = K.constant(y_pred) | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) | |
| second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) | |
| return K.mean(K.square(first_log - second_log), axis=-1) | |
| def squared_hinge(y_true, y_pred): | |
| y_true = _maybe_convert_labels(y_true) | |
| return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1) | |
| def hinge(y_true, y_pred): | |
| y_true = _maybe_convert_labels(y_true) | |
| return K.mean(K.maximum(1. - y_true * y_pred, 0.), axis=-1) | |
| def categorical_hinge(y_true, y_pred): | |
| pos = K.sum(y_true * y_pred, axis=-1) | |
| neg = K.max((1. - y_true) * y_pred, axis=-1) | |
| return K.maximum(0., neg - pos + 1.) | |
| def logcosh(y_true, y_pred): | |
| """Logarithm of the hyperbolic cosine of the prediction error. | |
| `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and | |
| to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly | |
| like the mean squared error, but will not be so strongly affected by the | |
| occasional wildly incorrect prediction. | |
| # Arguments | |
| y_true: tensor of true targets. | |
| y_pred: tensor of predicted targets. | |
| # Returns | |
| Tensor with one scalar loss entry per sample. | |
| """ | |
| def _logcosh(x): | |
| return x + K.softplus(-2. * x) - K.log(2.) | |
| return K.mean(_logcosh(y_pred - y_true), axis=-1) | |
| def huber_loss(y_true, y_pred, delta=1.0): | |
| error = y_pred - y_true | |
| abs_error = K.abs(error) | |
| quadratic = K.minimum(abs_error, delta) | |
| linear = abs_error - quadratic | |
| return 0.5 * K.square(quadratic) + delta * linear | |
| def categorical_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): | |
| y_pred = K.constant(y_pred) if not K.is_tensor(y_pred) else y_pred | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| if label_smoothing is not 0: | |
| smoothing = K.cast_to_floatx(label_smoothing) | |
| def _smooth_labels(): | |
| num_classes = K.cast(K.shape(y_true)[1], y_pred.dtype) | |
| return y_true * (1.0 - smoothing) + (smoothing / num_classes) | |
| y_true = K.switch(K.greater(smoothing, 0), _smooth_labels, lambda: y_true) | |
| return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits) | |
| def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1): | |
| return K.sparse_categorical_crossentropy( | |
| y_true, y_pred, from_logits=from_logits, axis=axis) | |
| def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): | |
| y_pred = K.constant(y_pred) if not K.is_tensor(y_pred) else y_pred | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| if label_smoothing is not 0: | |
| smoothing = K.cast_to_floatx(label_smoothing) | |
| y_true = K.switch(K.greater(smoothing, 0), | |
| lambda: y_true * (1.0 - smoothing) + 0.5 * smoothing, | |
| lambda: y_true) | |
| return K.mean( | |
| K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1) | |
| def kullback_leibler_divergence(y_true, y_pred): | |
| y_true = K.clip(y_true, K.epsilon(), 1) | |
| y_pred = K.clip(y_pred, K.epsilon(), 1) | |
| return K.sum(y_true * K.log(y_true / y_pred), axis=-1) | |
| def poisson(y_true, y_pred): | |
| return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()), axis=-1) | |
| def cosine_proximity(y_true, y_pred, axis=-1): | |
| y_true = K.l2_normalize(y_true, axis=axis) | |
| y_pred = K.l2_normalize(y_pred, axis=axis) | |
| return - K.sum(y_true * y_pred, axis=axis) | |
| def _maybe_convert_labels(y_true): | |
| """Converts binary labels into -1/1.""" | |
| are_zeros = K.equal(y_true, 0) | |
| are_ones = K.equal(y_true, 1) | |
| are_zeros = K.expand_dims(are_zeros, 0) | |
| are_ones = K.expand_dims(are_ones, 0) | |
| are_different = K.concatenate([are_zeros, are_ones], axis=0) | |
| are_different = K.any(are_different, axis=0) | |
| is_binary = K.all(are_different) | |
| def _convert_binary_labels(): | |
| # Convert the binary labels to -1 or 1. | |
| return 2. * y_true - 1. | |
| updated_y_true = K.switch(is_binary, | |
| _convert_binary_labels, | |
| lambda: y_true) | |
| return updated_y_true | |
| # Aliases. | |
| mse = MSE = mean_squared_error | |
| mae = MAE = mean_absolute_error | |
| mape = MAPE = mean_absolute_percentage_error | |
| msle = MSLE = mean_squared_logarithmic_error | |
| kld = KLD = kullback_leibler_divergence | |
| cosine = cosine_similarity = cosine_proximity | |
| def is_categorical_crossentropy(loss): | |
| return (isinstance(loss, CategoricalCrossentropy) or | |
| (isinstance(loss, LossFunctionWrapper) and | |
| loss.fn == categorical_crossentropy) or | |
| (hasattr(loss, '__name__') and | |
| loss.__name__ == 'categorical_crossentropy') or | |
| loss == 'categorical_crossentropy') | |
| def serialize(loss): | |
| return serialize_keras_object(loss) | |
| def deserialize(name, custom_objects=None): | |
| return deserialize_keras_object(name, | |
| module_objects=globals(), | |
| custom_objects=custom_objects, | |
| printable_module_name='loss function') | |
| def get(identifier): | |
| """Get the `identifier` loss function. | |
| # Arguments | |
| identifier: None or str, name of the function. | |
| # Returns | |
| The loss function or None if `identifier` is None. | |
| # Raises | |
| ValueError if unknown identifier. | |
| """ | |
| if identifier is None: | |
| return None | |
| if isinstance(identifier, six.string_types): | |
| identifier = str(identifier) | |
| return deserialize(identifier) | |
| if isinstance(identifier, dict): | |
| return deserialize(identifier) | |
| elif callable(identifier): | |
| return identifier | |
| else: | |
| raise ValueError('Could not interpret ' | |
| 'loss function identifier:', identifier) |
| """Built-in loss functions. | |
| """ | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| import abc | |
| import six | |
| from . import backend as K | |
| from .utils import losses_utils | |
| from .utils.generic_utils import deserialize_keras_object | |
| from .utils.generic_utils import serialize_keras_object | |
| @six.add_metaclass(abc.ABCMeta) | |
| class Loss(object): | |
| """Loss base class. | |
| To be implemented by subclasses: | |
| * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`. | |
| Example subclass implementation: | |
| ```python | |
| class MeanSquaredError(Loss): | |
| def call(self, y_true, y_pred): | |
| y_pred = ops.convert_to_tensor(y_pred) | |
| y_true = math_ops.cast(y_true, y_pred.dtype) | |
| return K.mean(math_ops.square(y_pred - y_true), axis=-1) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss Reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: Optional name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name=None): | |
| self.reduction = reduction | |
| self.name = name | |
| def __call__(self, y_true, y_pred, sample_weight=None): | |
| """Invokes the `Loss` instance. | |
| # Arguments | |
| y_true: Ground truth values. | |
| y_pred: The predicted values. | |
| sample_weight: Optional `Tensor` whose rank is either 0, or the same rank | |
| as `y_true`, or is broadcastable to `y_true`. `sample_weight` acts as a | |
| coefficient for the loss. If a scalar is provided, then the loss is | |
| simply scaled by the given value. If `sample_weight` is a tensor of size | |
| `[batch_size]`, then the total loss for each sample of the batch is | |
| rescaled by the corresponding element in the `sample_weight` vector. If | |
| the shape of `sample_weight` matches the shape of `y_pred`, then the | |
| loss of each measurable element of `y_pred` is scaled by the | |
| corresponding value of `sample_weight`. | |
| # Returns | |
| Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same | |
| shape as `y_true`; otherwise, it is scalar. | |
| # Raises | |
| ValueError: If the shape of `sample_weight` is invalid. | |
| """ | |
| # If we are wrapping a lambda function strip '<>' from the name as it is not | |
| # accepted in scope name. | |
| scope_name = 'lambda' if self.name == '<lambda>' else self.name | |
| with K.name_scope(scope_name): | |
| losses = self.call(y_true, y_pred) | |
| return losses_utils.compute_weighted_loss( | |
| losses, sample_weight, reduction=self.reduction) | |
| @classmethod | |
| def from_config(cls, config): | |
| """Instantiates a `Loss` from its config (output of `get_config()`). | |
| # Arguments | |
| config: Output of `get_config()`. | |
| # Returns | |
| A `Loss` instance. | |
| """ | |
| return cls(**config) | |
| def get_config(self): | |
| return {'reduction': self.reduction, 'name': self.name} | |
| @abc.abstractmethod | |
| def call(self, y_true, y_pred): | |
| """Invokes the `Loss` instance. | |
| # Arguments | |
| y_true: Ground truth values, with the same shape as 'y_pred'. | |
| y_pred: The predicted values. | |
| """ | |
| raise NotImplementedError('Must be implemented in subclasses.') | |
| class LossFunctionWrapper(Loss): | |
| """Wraps a loss function in the `Loss` class. | |
| # Arguments | |
| fn: The loss function to wrap, with signature `fn(y_true, y_pred, | |
| **kwargs)`. | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) name for the loss. | |
| **kwargs: The keyword arguments that are passed on to `fn`. | |
| """ | |
| def __init__(self, | |
| fn, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name=None, | |
| **kwargs): | |
| super(LossFunctionWrapper, self).__init__(reduction=reduction, name=name) | |
| self.fn = fn | |
| self._fn_kwargs = kwargs | |
| def call(self, y_true, y_pred): | |
| """Invokes the `LossFunctionWrapper` instance. | |
| # Arguments | |
| y_true: Ground truth values. | |
| y_pred: The predicted values. | |
| # Returns | |
| Loss values per sample. | |
| """ | |
| return self.fn(y_true, y_pred, **self._fn_kwargs) | |
| def get_config(self): | |
| config = {} | |
| for k, v in six.iteritems(self._fn_kwargs): | |
| config[k] = K.eval(v) if K.is_tensor(v) or K.is_variable(v) else v | |
| base_config = super(LossFunctionWrapper, self).get_config() | |
| return dict(list(base_config.items()) + list(config.items())) | |
| class MeanSquaredError(LossFunctionWrapper): | |
| """Computes the mean of squares of errors between labels and predictions. | |
| Standalone usage: | |
| ```python | |
| mse = keras.losses.MeanSquaredError() | |
| loss = mse([0., 0., 1., 1.], [1., 1., 1., 0.]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.MeanSquaredError()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) name for the loss. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='mean_squared_error'): | |
| super(MeanSquaredError, self).__init__( | |
| mean_squared_error, name=name, reduction=reduction) | |
| class MeanAbsoluteError(LossFunctionWrapper): | |
| """Computes the mean of absolute difference between labels and predictions. | |
| Standalone usage: | |
| ```python | |
| mae = keras.losses.MeanAbsoluteError() | |
| loss = mae([0., 0., 1., 1.], [1., 1., 1., 0.]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.MeanAbsoluteError()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) name for the loss. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='mean_absolute_error'): | |
| super(MeanAbsoluteError, self).__init__( | |
| mean_absolute_error, name=name, reduction=reduction) | |
| class MeanAbsolutePercentageError(LossFunctionWrapper): | |
| """Computes the mean absolute percentage error between `y_true` and `y_pred`. | |
| Standalone usage: | |
| ```python | |
| mape = keras.losses.MeanAbsolutePercentageError() | |
| loss = mape([0., 0., 1., 1.], [1., 1., 1., 0.]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.MeanAbsolutePercentageError()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) name for the loss. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='mean_absolute_percentage_error'): | |
| super(MeanAbsolutePercentageError, self).__init__( | |
| mean_absolute_percentage_error, name=name, reduction=reduction) | |
| class MeanSquaredLogarithmicError(LossFunctionWrapper): | |
| """Computes the mean squared logarithmic error between `y_true` and `y_pred`. | |
| Standalone usage: | |
| ```python | |
| msle = keras.losses.MeanSquaredLogarithmicError() | |
| loss = msle([0., 0., 1., 1.], [1., 1., 1., 0.]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.MeanSquaredLogarithmicError()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) name for the loss. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='mean_squared_logarithmic_error'): | |
| super(MeanSquaredLogarithmicError, self).__init__( | |
| mean_squared_logarithmic_error, name=name, reduction=reduction) | |
| class BinaryCrossentropy(LossFunctionWrapper): | |
| """Computes the cross-entropy loss between true labels and predicted labels. | |
| Use this cross-entropy loss when there are only two label classes (assumed to | |
| be 0 and 1). For each example, there should be a single floating-point value | |
| per prediction. | |
| In the snippet below, each of the four examples has only a single | |
| floating-pointing value, and both `y_pred` and `y_true` have the shape | |
| `[batch_size]`. | |
| Standalone usage: | |
| ```python | |
| bce = keras.losses.BinaryCrossentropy() | |
| loss = bce([0., 0., 1., 1.], [1., 1., 1., 0.]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.BinaryCrossentropy()) | |
| ``` | |
| # Arguments | |
| from_logits: Whether to interpret `y_pred` as a tensor of | |
| [logit](https://en.wikipedia.org/wiki/Logit) values. By default, | |
| we assume that `y_pred` contains probabilities | |
| (i.e., values in [0, 1]). | |
| label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > 0, we | |
| compute the loss between the predicted labels and a smoothed version of | |
| the true labels, where the smoothing squeezes the labels towards 0.5. | |
| Larger values of `label_smoothing` correspond to heavier smoothing. | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| from_logits=False, | |
| label_smoothing=0, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='binary_crossentropy'): | |
| super(BinaryCrossentropy, self).__init__( | |
| binary_crossentropy, | |
| name=name, | |
| reduction=reduction, | |
| from_logits=from_logits, | |
| label_smoothing=label_smoothing) | |
| self.from_logits = from_logits | |
| class CategoricalCrossentropy(LossFunctionWrapper): | |
| """Computes the crossentropy loss between the labels and predictions. | |
| Use this crossentropy loss function when there are two or more label classes. | |
| We expect labels to be provided in a `one_hot` representation. If you want to | |
| provide labels as integers, please use `SparseCategoricalCrossentropy` loss. | |
| There should be `# classes` floating point values per feature. | |
| In the snippet below, there is `# classes` floating pointing values per | |
| example. The shape of both `y_pred` and `y_true` are | |
| `[batch_size, num_classes]`. | |
| Standalone usage: | |
| ```python | |
| cce = keras.losses.CategoricalCrossentropy() | |
| loss = cce( | |
| [[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], | |
| [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.CategoricalCrossentropy()) | |
| ``` | |
| # Arguments | |
| from_logits: Whether to interpret `y_pred` as a tensor of | |
| [logit](https://en.wikipedia.org/wiki/Logit) values. By default, | |
| we assume that `y_pred` contains probabilities | |
| (i.e., values in [0, 1]). | |
| label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > 0, we | |
| compute the loss between the predicted labels and a smoothed version of | |
| the true labels, where the smoothing squeezes the labels towards 0.5. | |
| Larger values of `label_smoothing` correspond to heavier smoothing. | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| from_logits=False, | |
| label_smoothing=0, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='categorical_crossentropy'): | |
| super(CategoricalCrossentropy, self).__init__( | |
| categorical_crossentropy, | |
| name=name, | |
| reduction=reduction, | |
| from_logits=from_logits, | |
| label_smoothing=label_smoothing) | |
| class SparseCategoricalCrossentropy(LossFunctionWrapper): | |
| """Computes the crossentropy loss between the labels and predictions. | |
| Use this crossentropy loss function when there are two or more label classes. | |
| We expect labels to be provided as integers. If you want to provide labels | |
| using `one-hot` representation, please use `CategoricalCrossentropy` loss. | |
| There should be `# classes` floating point values per feature for `y_pred` | |
| and a single floating point value per feature for `y_true`. | |
| In the snippet below, there is a single floating point value per example for | |
| `y_true` and `# classes` floating pointing values per example for `y_pred`. | |
| The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is | |
| `[batch_size, num_classes]`. | |
| Standalone usage: | |
| ```python | |
| cce = keras.losses.SparseCategoricalCrossentropy() | |
| loss = cce( | |
| [0, 1, 2], | |
| [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]]) | |
| ``` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.SparseCategoricalCrossentropy()) | |
| ``` | |
| # Arguments | |
| from_logits: Whether to interpret `y_pred` as a tensor of | |
| [logit](https://en.wikipedia.org/wiki/Logit) values. By default, | |
| we assume that `y_pred` contains probabilities | |
| (i.e., values in [0, 1]). | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| from_logits=False, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='sparse_categorical_crossentropy'): | |
| super(SparseCategoricalCrossentropy, self).__init__( | |
| sparse_categorical_crossentropy, | |
| name=name, | |
| reduction=reduction, | |
| from_logits=from_logits) | |
| class Hinge(LossFunctionWrapper): | |
| """Computes the hinge loss between `y_true` and `y_pred`. | |
| `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are | |
| provided we will convert them to -1 or 1. | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.Hinge()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='hinge'): | |
| super(Hinge, self).__init__(hinge, name=name, reduction=reduction) | |
| class SquaredHinge(LossFunctionWrapper): | |
| """Computes the squared hinge loss between `y_true` and `y_pred`. | |
| `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are | |
| provided we will convert them to -1 or 1. | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.SquaredHinge()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='squared_hinge'): | |
| super(SquaredHinge, self).__init__( | |
| squared_hinge, name=name, reduction=reduction) | |
| class CategoricalHinge(LossFunctionWrapper): | |
| """Computes the categorical hinge loss between `y_true` and `y_pred`. | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.CategoricalHinge()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='categorical_hinge'): | |
| super(CategoricalHinge, self).__init__( | |
| categorical_hinge, name=name, reduction=reduction) | |
| class Poisson(LossFunctionWrapper): | |
| """Computes the Poisson loss between `y_true` and `y_pred`. | |
| `loss = y_pred - y_true * log(y_pred)` | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.Poisson()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='poisson'): | |
| super(Poisson, self).__init__(poisson, name=name, reduction=reduction) | |
| class LogCosh(LossFunctionWrapper): | |
| """Computes the logarithm of the hyperbolic cosine of the prediction error. | |
| `logcosh = log((exp(x) + exp(-x))/2)`, | |
| where x is the error (y_pred - y_true) | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.LogCosh()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='logcosh'): | |
| super(LogCosh, self).__init__(logcosh, name=name, reduction=reduction) | |
| class KLDivergence(LossFunctionWrapper): | |
| """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`. | |
| `loss = y_true * log(y_true / y_pred)` | |
| See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.KLDivergence()) | |
| ``` | |
| # Arguments | |
| reduction: (Optional) Type of loss reduction to apply to loss. | |
| Default value is `SUM_OVER_BATCH_SIZE`. | |
| name: (Optional) Name for the object. | |
| """ | |
| def __init__(self, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='kullback_leibler_divergence'): | |
| super(KLDivergence, self).__init__( | |
| kullback_leibler_divergence, name=name, reduction=reduction) | |
| class Huber(LossFunctionWrapper): | |
| """Computes the Huber loss between `y_true` and `y_pred`. | |
| Given `x = y_true - y_pred`: | |
| ``` | |
| loss = 0.5 * x^2 if |x| <= d | |
| loss = 0.5 * d^2 + d * (|x| - d) if |x| > d | |
| ``` | |
| where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss | |
| Usage with the `compile` API: | |
| ```python | |
| model = keras.Model(inputs, outputs) | |
| model.compile('sgd', loss=keras.losses.Huber()) | |
| ``` | |
| # Arguments | |
| delta: A float, the point where the Huber loss function changes from a | |
| quadratic to linear. | |
| reduction: (Optional) Type of reduction to apply to loss. | |
| name: Optional name for the object. | |
| """ | |
| def __init__(self, | |
| delta=1.0, | |
| reduction=losses_utils.Reduction.SUM_OVER_BATCH_SIZE, | |
| name='huber_loss'): | |
| super(Huber, self).__init__( | |
| huber_loss, name=name, reduction=reduction, delta=delta) | |
| def mean_squared_error(y_true, y_pred): | |
| if not K.is_tensor(y_pred): | |
| y_pred = K.constant(y_pred) | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| return K.mean(K.square(y_pred - y_true), axis=-1) | |
| def mean_absolute_error(y_true, y_pred): | |
| if not K.is_tensor(y_pred): | |
| y_pred = K.constant(y_pred) | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| return K.mean(K.abs(y_pred - y_true), axis=-1) | |
| def mean_absolute_percentage_error(y_true, y_pred): | |
| if not K.is_tensor(y_pred): | |
| y_pred = K.constant(y_pred) | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), | |
| K.epsilon(), | |
| None)) | |
| return 100. * K.mean(diff, axis=-1) | |
| def mean_squared_logarithmic_error(y_true, y_pred): | |
| if not K.is_tensor(y_pred): | |
| y_pred = K.constant(y_pred) | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) | |
| second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) | |
| return K.mean(K.square(first_log - second_log), axis=-1) | |
| def squared_hinge(y_true, y_pred): | |
| y_true = _maybe_convert_labels(y_true) | |
| return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1) | |
| def hinge(y_true, y_pred): | |
| y_true = _maybe_convert_labels(y_true) | |
| return K.mean(K.maximum(1. - y_true * y_pred, 0.), axis=-1) | |
| def categorical_hinge(y_true, y_pred): | |
| pos = K.sum(y_true * y_pred, axis=-1) | |
| neg = K.max((1. - y_true) * y_pred, axis=-1) | |
| return K.maximum(0., neg - pos + 1.) | |
| def logcosh(y_true, y_pred): | |
| """Logarithm of the hyperbolic cosine of the prediction error. | |
| `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and | |
| to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly | |
| like the mean squared error, but will not be so strongly affected by the | |
| occasional wildly incorrect prediction. | |
| # Arguments | |
| y_true: tensor of true targets. | |
| y_pred: tensor of predicted targets. | |
| # Returns | |
| Tensor with one scalar loss entry per sample. | |
| """ | |
| def _logcosh(x): | |
| return x + K.softplus(-2. * x) - K.log(2.) | |
| return K.mean(_logcosh(y_pred - y_true), axis=-1) | |
| def huber_loss(y_true, y_pred, delta=1.0): | |
| error = y_pred - y_true | |
| abs_error = K.abs(error) | |
| quadratic = K.minimum(abs_error, delta) | |
| linear = abs_error - quadratic | |
| return 0.5 * K.square(quadratic) + delta * linear | |
| def categorical_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): | |
| y_pred = K.constant(y_pred) if not K.is_tensor(y_pred) else y_pred | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| if label_smoothing is not 0: | |
| smoothing = K.cast_to_floatx(label_smoothing) | |
| def _smooth_labels(): | |
| num_classes = K.cast(K.shape(y_true)[1], y_pred.dtype) | |
| return y_true * (1.0 - smoothing) + (smoothing / num_classes) | |
| y_true = K.switch(K.greater(smoothing, 0), _smooth_labels, lambda: y_true) | |
| return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits) | |
| def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1): | |
| return K.sparse_categorical_crossentropy( | |
| y_true, y_pred, from_logits=from_logits, axis=axis) | |
| def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): | |
| y_pred = K.constant(y_pred) if not K.is_tensor(y_pred) else y_pred | |
| y_true = K.cast(y_true, y_pred.dtype) | |
| if label_smoothing is not 0: | |
| smoothing = K.cast_to_floatx(label_smoothing) | |
| y_true = K.switch(K.greater(smoothing, 0), | |
| lambda: y_true * (1.0 - smoothing) + 0.5 * smoothing, | |
| lambda: y_true) | |
| return K.mean( | |
| K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1) | |
| def kullback_leibler_divergence(y_true, y_pred): | |
| y_true = K.clip(y_true, K.epsilon(), 1) | |
| y_pred = K.clip(y_pred, K.epsilon(), 1) | |
| return K.sum(y_true * K.log(y_true / y_pred), axis=-1) | |
| def poisson(y_true, y_pred): | |
| return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()), axis=-1) | |
| def cosine_proximity(y_true, y_pred, axis=-1): | |
| y_true = K.l2_normalize(y_true, axis=axis) | |
| y_pred = K.l2_normalize(y_pred, axis=axis) | |
| return - K.sum(y_true * y_pred, axis=axis) | |
| def _maybe_convert_labels(y_true): | |
| """Converts binary labels into -1/1.""" | |
| are_zeros = K.equal(y_true, 0) | |
| are_ones = K.equal(y_true, 1) | |
| are_zeros = K.expand_dims(are_zeros, 0) | |
| are_ones = K.expand_dims(are_ones, 0) | |
| are_different = K.concatenate([are_zeros, are_ones], axis=0) | |
| are_different = K.any(are_different, axis=0) | |
| is_binary = K.all(are_different) | |
| def _convert_binary_labels(): | |
| # Convert the binary labels to -1 or 1. | |
| return 2. * y_true - 1. | |
| updated_y_true = K.switch(is_binary, | |
| _convert_binary_labels, | |
| lambda: y_true) | |
| return updated_y_true | |
| # Aliases. | |
| mse = MSE = mean_squared_error | |
| mae = MAE = mean_absolute_error | |
| mape = MAPE = mean_absolute_percentage_error | |
| msle = MSLE = mean_squared_logarithmic_error | |
| kld = KLD = kullback_leibler_divergence | |
| cosine = cosine_similarity = cosine_proximity | |
| def is_categorical_crossentropy(loss): | |
| return (isinstance(loss, CategoricalCrossentropy) or | |
| (isinstance(loss, LossFunctionWrapper) and | |
| loss.fn == categorical_crossentropy) or | |
| (hasattr(loss, '__name__') and | |
| loss.__name__ == 'categorical_crossentropy') or | |
| loss == 'categorical_crossentropy') | |
| def serialize(loss): | |
| return serialize_keras_object(loss) | |
| def deserialize(name, custom_objects=None): | |
| return deserialize_keras_object(name, | |
| module_objects=globals(), | |
| custom_objects=custom_objects, | |
| printable_module_name='loss function') | |
| def get(identifier): | |
| """Get the `identifier` loss function. | |
| # Arguments | |
| identifier: None or str, name of the function. | |
| # Returns | |
| The loss function or None if `identifier` is None. | |
| # Raises | |
| ValueError if unknown identifier. | |
| """ | |
| if identifier is None: | |
| return None | |
| if isinstance(identifier, six.string_types): | |
| identifier = str(identifier) | |
| return deserialize(identifier) | |
| if isinstance(identifier, dict): | |
| return deserialize(identifier) | |
| elif callable(identifier): | |
| return identifier | |
| else: | |
| raise ValueError('Could not interpret ' | |
| 'loss function identifier:', identifier) |
浙公网安备 33010602011771号