|
|
|
@ -77,6 +77,7 @@ __all__ = [
|
|
|
|
|
'lod_reset',
|
|
|
|
|
'lrn',
|
|
|
|
|
'pad',
|
|
|
|
|
'label_smooth',
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -3678,3 +3679,68 @@ def pad(x, paddings, pad_value=0., name=None):
|
|
|
|
|
attrs={'paddings': paddings,
|
|
|
|
|
'pad_value': float(pad_value)})
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def label_smooth(label,
|
|
|
|
|
prior_dist=None,
|
|
|
|
|
epsilon=0.1,
|
|
|
|
|
dtype="float32",
|
|
|
|
|
name=None):
|
|
|
|
|
"""
|
|
|
|
|
Label smoothing is a mechanism to regularize the classifier layer and is
|
|
|
|
|
called label-smoothing regularization (LSR).
|
|
|
|
|
|
|
|
|
|
Label smoothing is proposed to encourage the model to be less confident,
|
|
|
|
|
since optimizing the log-likelihood of the correct label directly may
|
|
|
|
|
cause overfitting and reduce the ability of the model to adapt. Label
|
|
|
|
|
smoothing replaces the ground-truth label :math:`y` with the weighted sum
|
|
|
|
|
of itself and some fixed distribution :math:`\mu`. For class :math:`k`,
|
|
|
|
|
i.e.
|
|
|
|
|
|
|
|
|
|
.. math::
|
|
|
|
|
|
|
|
|
|
\\tilde{y_k} = (1 - \epsilon) * y_k + \epsilon * \mu_k,
|
|
|
|
|
|
|
|
|
|
where :math:`1 - \epsilon` and :math:`\epsilon` are the weights
|
|
|
|
|
respectively, and :math:`\\tilde{y}_k` is the smoothed label. Usually
|
|
|
|
|
uniform distribution is used for :math:`\mu`.
|
|
|
|
|
|
|
|
|
|
See more details about label smoothing in https://arxiv.org/abs/1512.00567.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
label(Variable): The input variable containing the label data. The
|
|
|
|
|
label data should use one-hot representation.
|
|
|
|
|
prior_dist(Variable): The prior distribution to be used to smooth
|
|
|
|
|
labels. If not provided, an uniform distribution
|
|
|
|
|
is used. The shape of :attr:`prior_dist` should
|
|
|
|
|
be :math:`(1, class\_num)`.
|
|
|
|
|
epsilon(float): The weight used to mix up the original ground-truth
|
|
|
|
|
distribution and the fixed distribution.
|
|
|
|
|
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32,
|
|
|
|
|
float_64, int etc.
|
|
|
|
|
name(str|None): A name for this layer(optional). If set None, the layer
|
|
|
|
|
will be named automatically.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Variable: The tensor variable containing the smoothed labels.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
label = layers.data(name="label", shape=[1], dtype="float32")
|
|
|
|
|
one_hot_label = layers.one_hot(input=label, depth=10)
|
|
|
|
|
smooth_label = layers.label_smooth(
|
|
|
|
|
label=one_hot_label, epsilon=0.1, dtype="float32")
|
|
|
|
|
"""
|
|
|
|
|
if epsilon > 1. or epsilon < 0.:
|
|
|
|
|
raise ValueError("The value of epsilon must be between 0 and 1.")
|
|
|
|
|
helper = LayerHelper("label_smooth", **locals())
|
|
|
|
|
label.stop_gradient = True
|
|
|
|
|
smooth_label = helper.create_tmp_variable(dtype)
|
|
|
|
|
helper.append_op(
|
|
|
|
|
type="label_smooth",
|
|
|
|
|
inputs={"X": label,
|
|
|
|
|
"PriorDist": prior_dist} if prior_dist else {"X": label},
|
|
|
|
|
outputs={"Out": smooth_label},
|
|
|
|
|
attrs={"epsilon": float(epsilon)})
|
|
|
|
|
return smooth_label
|
|
|
|
|