Merge pull request #3560 from guoshengCS/add-ShiftLayer

Add ScaleShiftLayer
8 years ago · b7a6cc9cc3
parent 9871c6db6c f6dc56aabd
commit b7a6cc9cc3
8 changed files with 265 additions and 1 deletions
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@ -362,6 +362,11 @@ trans
 ..  autoclass:: paddle.v2.layer.trans
    :noindex:
 scale_shift
 -----------
 ..  autoclass:: paddle.v2.layer.scale_shift
    :noindex:
 Sampling Layers
 ===============
--- a/paddle/gserver/layers/ScaleShiftLayer.cpp
+++ b/paddle/gserver/layers/ScaleShiftLayer.cpp
@ -0,0 +1,107 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "Layer.h"
 namespace paddle {
 /**
 * A layer applies a linear transformation to each element in each row of
 * the input matrix. For each element, the layer first re-scale it and then
 * adds a bias to it.
 *
 * \f[
 *    y = wx + b
 * \f]
 *
 * Here, w is the scale and b is the bias. Both w and b are trainable scalars.
 *
 */
 class ScaleShiftLayer : public Layer {
 protected:
  std::unique_ptr<Weight> scale_;
  std::unique_ptr<Weight> offset_;
 public:
  explicit ScaleShiftLayer(const LayerConfig& config) : Layer(config) {}
  bool init(const LayerMap& layerMap,
            const ParameterMap& parameterMap) override;
  void forward(PassType passType) override;
  void backward(const UpdateCallback& callback = nullptr) override;
 };
 REGISTER_LAYER(scale_shift, ScaleShiftLayer);
 bool ScaleShiftLayer::init(const LayerMap& layerMap,
                           const ParameterMap& parameterMap) {
  Layer::init(layerMap, parameterMap);
  CHECK_EQ(inputLayers_.size(), 1U);
  scale_.reset(new Weight(1, 1, parameters_[0]));
  if (biasParameter_.get() != NULL) {
    offset_ = std::unique_ptr<Weight>(new Weight(1, 1, biasParameter_));
  }
  return true;
 }
 void ScaleShiftLayer::forward(PassType passType) {
  Layer::forward(passType);
  MatrixPtr inV = getInputValue(0);
  resetOutput(inV->getHeight(), inV->getWidth());
  MatrixPtr outV = getOutputValue();
  real scaleValue = scale_->getW()->getElement(0, 0);
  outV->mulScalar(*inV, scaleValue);
  if (offset_) {
    real offsetValue = offset_->getW()->getElement(0, 0);
    outV->add(offsetValue);
  }
 }
 void ScaleShiftLayer::backward(const UpdateCallback& callback) {
  MatrixPtr inV = getInputValue(0);
  MatrixPtr inG = getInputGrad(0);
  MatrixPtr outV = getOutputValue();
  MatrixPtr outG = getOutputGrad();
  /* Calculate the parameter gradient for the current layer */
  if (scale_->getWGrad()) {
    MatrixPtr rowSumMtx;
    Matrix::resizeOrCreate(rowSumMtx, outG->getHeight(), 1, false, useGpu_);
    // this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} * c_{ij}
    rowSumMtx->sumOfProducts(
        /* b= */ *inV, /* c= */ *outG, /* scaleSum= */ 1, /* scaleDest= */ 0.);
    // this_i = scaleDest * this_i + scaleSum * \sum_j b_{ji}
    scale_->getWGrad()->sumCols(
        /* b= */ *rowSumMtx, /* scaleSum= */ 1., /* scaleDest= */ 1.);
    scale_->getParameterPtr()->incUpdate(callback);
  }
  if (offset_ && offset_->getWGrad()) {
    MatrixPtr rowSumMtx;
    Matrix::resizeOrCreate(rowSumMtx, outG->getHeight(), 1, false, useGpu_);
    rowSumMtx->sumRows(*outG, 1., 0.);
    offset_->getWGrad()->sumCols(*rowSumMtx, 1., 1.);
    offset_->getParameterPtr()->incUpdate(callback);
  }
  /* Calculate the input layers error */
  if (inG) {
    real scaleValue = scale_->getW()->getElement(0, 0);
    inG->add(*outG, scaleValue);
  }
 }
 }  // namespace paddle
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@ -2007,6 +2007,21 @@ TEST(Layer, RowL2NormLayer) {
  }
 }
 TEST(Layer, ScaleShiftLayer) {
  const size_t batchSize = 16;
  const size_t size = 32;
  TestConfig config;
  config.layerConfig.set_type("scale_shift");
  config.layerConfig.set_size(size);
  config.biasSize = 1;
  config.inputDefs.push_back(
      {INPUT_DATA, "input", /* dim= */ size, /* paraSize= */ 1});
  config.layerConfig.add_inputs();
  for (auto useGpu : {false, true}) {
    testLayerGrad(config, "scale_shift", batchSize, false, useGpu, false);
  }
 }
 int main(int argc, char** argv) {
  testing::InitGoogleTest(&argc, argv);
  initMain(argc, argv);
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@ -2232,6 +2232,20 @@ class ClipLayer(LayerBase):
        self.config.inputs[0].clip_conf.max = max
@config_layer('scale_shift')
 class ScaleShiftLayer(LayerBase):
    def __init__(self, name, inputs, bias=True, **xargs):
        super(ScaleShiftLayer, self).__init__(
            name, 'scale_shift', 0, inputs=inputs, **xargs)
        config_assert(
            len(self.inputs) == 1,
            'ScaleShiftLayer must have one and only one input.')
        input_layer = self.get_input_layer(0)
        self.set_layer_size(input_layer.size)
        self.create_input_parameter(0, 1, [1, 1])
        self.create_bias_parameter(bias, 1)
 # key: cost type
 # value: cost class
 g_cost_map = {}
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@ -133,6 +133,7 @@ __all__ = [
    'clip_layer',
    'slice_projection',
    'kmax_sequence_score_layer',
    'scale_shift_layer',
 ]
@ -230,6 +231,7 @@ class LayerType(object):
    CLIP_LAYER = 'clip'
    KMAX_SEQ_SCORE = 'kmax_seq_score'
    SCALE_SHIFT_LAYER = 'scale_shift'
    @staticmethod
    def is_layer_type(type_name):
@ -6210,3 +6212,43 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
    return LayerOutput(
        name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size)
@wrap_name_default("scale_shift")
@wrap_param_attr_default()
@wrap_bias_attr_default()
 def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None):
    """
    A layer applies a linear transformation to each element in each row of 
    the input matrix. For each element, the layer first re-scale it and then 
    adds a bias to it.
    This layer is very like the SlopeInterceptLayer, except the scale and 
    bias are trainable.
    .. math::
        y = w * x + b
    .. code-block:: python
        scale_shift = scale_shift_layer(input=input_layer, bias_attr=False)
    :param name: The Layer Name.
    :type name: basestring
    :param input: The input layer.
    :type input: LayerOutput.
    :param param_attr: The parameter attribute of scaling.
    :type param_attr: ParameterAttribute
    :param bias_attr: The parameter attribute of shifting.
    :type bias_attr: ParameterAttribute
    :return: LayerOutput object.
    :rtype: LayerOutput
    """
    Layer(
        name=name,
        type=LayerType.SCALE_SHIFT_LAYER,
        inputs=Input(input.name, **param_attr.attr),
        bias=ParamAttr.to_bias(bias_attr))
    return LayerOutput(
        name, LayerType.SCALE_SHIFT_LAYER, parents=[input], size=input.size)
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
 test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
 test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
 test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
-test_kmax_seq_socre_layer test_seq_select_layers)
+test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer)
 export whole_configs=(test_split_datasource)
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr
@ -0,0 +1,72 @@
 type: "nn"
 layers {
  name: "data"
  type: "data"
  size: 100
  active_type: ""
 }
 layers {
  name: "__scale_shift_0__"
  type: "scale_shift"
  size: 100
  active_type: ""
  inputs {
    input_layer_name: "data"
    input_parameter_name: "___scale_shift_0__.w0"
  }
 }
 layers {
  name: "__scale_shift_1__"
  type: "scale_shift"
  size: 100
  active_type: ""
  inputs {
    input_layer_name: "data"
    input_parameter_name: "___scale_shift_1__.w0"
  }
  bias_parameter_name: "___scale_shift_1__.wbias"
 }
 parameters {
  name: "___scale_shift_0__.w0"
  size: 1
  initial_mean: 0.0
  initial_std: 1.0
  dims: 1
  dims: 1
  initial_strategy: 0
  initial_smart: true
 }
 parameters {
  name: "___scale_shift_1__.w0"
  size: 1
  initial_mean: 0.0
  initial_std: 1.0
  dims: 1
  dims: 1
  initial_strategy: 0
  initial_smart: true
 }
 parameters {
  name: "___scale_shift_1__.wbias"
  size: 1
  initial_mean: 0.0
  initial_std: 0.0
  dims: 1
  dims: 1
  initial_strategy: 0
  initial_smart: false
 }
 input_layer_names: "data"
 output_layer_names: "__scale_shift_0__"
 output_layer_names: "__scale_shift_1__"
 sub_models {
  name: "root"
  layer_names: "data"
  layer_names: "__scale_shift_0__"
  layer_names: "__scale_shift_1__"
  input_layer_names: "data"
  output_layer_names: "__scale_shift_0__"
  output_layer_names: "__scale_shift_1__"
  is_recurrent_layer_group: false
 }
--- a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
@ -0,0 +1,9 @@
 from paddle.trainer_config_helpers import *
 data = data_layer(name='data', size=100)
 scale = scale_shift_layer(input=data, bias_attr=False)
 scale_shift = scale_shift_layer(input=data)
 outputs(scale, scale_shift)