Add elementwise math operations (#343)

* Add elementwise math operations This allows use to use expressions like: y=log(1+exp(x)) Also added unittests for ActivationFunction * Enforce keyword arguments for non-positional arguments * Add LogActivation to doc
8 years ago · 6c3a678c9a
parent 568d9cff1d
commit 6c3a678c9a
13 changed files with 229 additions and 13 deletions
--- a/doc/ui/api/trainer_config_helpers/activations.rst
+++ b/doc/ui/api/trainer_config_helpers/activations.rst
@ -33,6 +33,13 @@ LinearActivation
    :members: LinearActivation
    :noindex:
 LogActivation
 ==================
 ..  automodule:: paddle.trainer_config_helpers.activations
    :members: LogActivation
    :noindex:
 SquareActivation
 ================
--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
@ -295,6 +295,7 @@ void forward(Argument& act) {
 void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
 END_DEFINE_ACTIVATION(square)
 /**
 * @brief Exponential Activation.
 * \f[
@ -307,8 +308,36 @@ void forward(Argument& act) { act.value->exp(*act.value); }
 void backward(Argument& act) { act.grad->expDerivative(*act.value); }
 END_DEFINE_ACTIVATION(exponential)
 /**
 * @brief Logarithm Activation.
 * \f[
 * f(z) = log(z)
 * \f]
 */
 BEGIN_DEFINE_ACTIVATION(log)
 void forward(Argument& act) {
  SetDevice device(act.deviceId);
  Matrix::resizeOrCreate(act.in, act.value->getHeight(), act.value->getWidth(),
                         /* trans */ false, useGpu(act.deviceId));
  act.in->copyFrom(*act.value);
  act.value->log(*act.value);
 }
 void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); }
 END_DEFINE_ACTIVATION(log)
 ActivationFunction* ActivationFunction::create(const std::string& type) {
  return gActivationRegistrar.createByType(type);
 }
 std::vector<std::string> ActivationFunction::getAllRegisteredTypes() {
  std::vector<std::string> types;
  gActivationRegistrar.forEachType([&](const std::string& type) {
      types.push_back(type);
    });
  return types;
 }
 }  // namespace paddle
--- a/paddle/gserver/activations/ActivationFunction.h
+++ b/paddle/gserver/activations/ActivationFunction.h
@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 #include <string>
 #include <vector>
 namespace paddle {
@ -32,6 +33,7 @@ struct Argument;
 class ActivationFunction {
 public:
  static ActivationFunction* create(const std::string& type);
  static std::vector<std::string> getAllRegisteredTypes();
  ActivationFunction() {}
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@ -20,6 +20,13 @@ add_unittest_without_exec(test_LayerGrad
 add_test(NAME test_LayerGrad
    COMMAND test_LayerGrad)
 add_unittest_without_exec(test_ActivationGrad
    test_ActivationGrad.cpp
    LayerGradUtil.cpp
    TestUtil.cpp)
 add_test(NAME test_ActivationGrad
    COMMAND test_ActivationGrad)
 ################## test_Evaluator #######################
 add_unittest(test_Evaluator
    test_Evaluator.cpp
--- a/paddle/gserver/tests/test_ActivationGrad.cpp
+++ b/paddle/gserver/tests/test_ActivationGrad.cpp
@ -0,0 +1,66 @@
 /* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include <gtest/gtest.h>
 #include <vector>
 #include <string>
 #include "paddle/gserver/layers/DataLayer.h"
 #include "ModelConfig.pb.h"
 #include "paddle/trainer/Trainer.h"
 #include "TestUtil.h"
 #include "LayerGradUtil.h"
 using namespace paddle;  // NOLINT
 using namespace std;     // NOLINT
 P_DECLARE_bool(use_gpu);
 P_DECLARE_bool(thread_local_rand_use_global_seed);
 void testActivation(const string& act) {
  LOG(INFO) << "test activation: " << act;
  size_t size = 10;
  TestConfig config;
  config.biasSize = 0;
  config.layerConfig.set_type("addto");
  config.layerConfig.set_size(size);
  config.layerConfig.set_active_type(act);
  config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0});
  config.layerConfig.add_inputs();
  for (auto useGpu : {false, true}) {
    testLayerGrad(config,
                  act + "_activation",
                  100,
                  /* trans= */false,
                  useGpu,
                  /* useWeight */true);
  }
 }
 TEST(Activation, activation) {
  auto types = ActivationFunction::getAllRegisteredTypes();
  std::set<string> excluded{"sequence_softmax"};
  for (auto type : types) {
    if (excluded.count(type)) continue;
    testActivation(type);
  }
 }
 int main(int argc, char** argv) {
  testing::InitGoogleTest(&argc, argv);
  initMain(argc, argv);
  FLAGS_thread_local_rand_use_global_seed = true;
  srand(1);
  return RUN_ALL_TESTS();
 }
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@ -2573,6 +2573,7 @@ class MixedLayer(LayerBase):
            for input in self.inputs:
                psize += input.calc_bias_size()
        if bias:
            self.config.bias_size = psize
            self.create_bias_parameter(bias, psize)
@ -2659,6 +2660,7 @@ class ConcatenateLayer2(LayerBase):
            for input in self.inputs:
                psize += input.calc_bias_size()
        if bias:
            self.config.bias_size = psize
            self.create_bias_parameter(bias, psize)
--- a/python/paddle/trainer_config_helpers/activations.py
+++ b/python/paddle/trainer_config_helpers/activations.py
@ -199,3 +199,12 @@ class ExpActivation(BaseActivation):
       f(z) = e^z.
    """
    def __init__(self): BaseActivation.__init__(self, 'exponential', False)
 class LogActivation(BaseActivation):
    """
    Logarithm Activation.
    .. math::
       f(z) = log(z)
    """
    def __init__(self): BaseActivation.__init__(self, 'log', False)
--- a/python/paddle/trainer_config_helpers/default_decorators.py
+++ b/python/paddle/trainer_config_helpers/default_decorators.py
@ -13,6 +13,7 @@
 # limitations under the License.
 import functools
 import inspect
 from .attrs import ParamAttr
 from .activations import TanhActivation
 from paddle.trainer.config_parser import *
@ -37,8 +38,12 @@ def wrap_param_default(param_names=None, default_factory=None,
        @functools.wraps(func)
        def __wrapper__(*args, **kwargs):
            if len(args) != 0:
-                logger.warning("please use keyword arguments in paddle config.")
+                argspec = inspect.getargspec(func)
-
+                num_positional = len(argspec.args)
                if argspec.defaults:
                    num_positional -= len(argspec.defaults)
                if not argspec.varargs and len(args) > num_positional:
                    logger.fatal("Must use keyword arguments for non-positional args")
            for name in param_names:
                if not_set_callback(kwargs, name):  # Not set
                    kwargs[name] = default_factory(func)
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@ -564,7 +564,7 @@ class MixedLayerType(LayerOutput):
        self.inputs = []
        self.finalized = False
-    def __add__(self, other):
+    def __iadd__(self, other):
        """
        + += operator
        :param other: Other projection.
--- a/python/paddle/trainer_config_helpers/math.py
+++ b/python/paddle/trainer_config_helpers/math.py
@ -0,0 +1,64 @@
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .layers import LayerOutput, mixed_layer, identity_projection, \
    slope_intercept_layer
 from .attrs import is_compatible_with
 from .default_decorators import *
 import activations as act
 __all__ = []
 def register_unary_math_op(op_name, act):
    def op(input, name=None):
        return mixed_layer(input=[identity_projection(input=input)],
                           name=name,
                           act=act)
    op = wrap_name_default(op_name)(op)
    op.__doc__ = type(act).__doc__
    globals()[op_name] = op
    __all__.append(op_name)
 register_unary_math_op('exp', act.ExpActivation())
 register_unary_math_op('log', act.LogActivation())
 register_unary_math_op('abs', act.AbsActivation())
 register_unary_math_op('sigmoid', act.SigmoidActivation())
 register_unary_math_op('tanh', act.TanhActivation())
 register_unary_math_op('square', act.SquareActivation())
 def add(layeroutput, other):
    if is_compatible_with(other, float):
        return slope_intercept_layer(input=layeroutput, intercept=other)
    assert isinstance(other, LayerOutput)
    return mixed_layer(input=[identity_projection(input=layeroutput),
                              identity_projection(input=other)])
 LayerOutput.__radd__ = add
 LayerOutput.__add__ = add
 def sub(layeroutput, other):
    if is_compatible_with(other, float):
        return slope_intercept_layer(input=layeroutput, intercept=other)
    assert isinstance(other, LayerOutput)
    neg = slope_intercept_layer(input=other, slope=-1.0)
    return mixed_layer(input=[identity_projection(input=layeroutput),
                              identity_projection(input=neg)])
 LayerOutput.__sub__ = sub
 def rsub(layeroutput, other):
    neg = slope_intercept_layer(input=layeroutput, slope=-1.0)
    return add(neg, other)
 LayerOutput.__rsub__ = rsub
--- a/python/paddle/trainer_config_helpers/tests/configs/check.md5
+++ b/python/paddle/trainer_config_helpers/tests/configs/check.md5
@ -1,11 +1,11 @@
 86c0815275a9d5eb902e23c6a592f58a  img_layers.protostr
 a5d9259ff1fd7ca23d0ef090052cb1f2  last_first_seq.protostr
 9c038249ec8ff719753a746cdb04c026  layer_activations.protostr
-34e04043cbb12931c47fa44ec50eeffc  projections.protostr
+5913f87b39cee3b2701fa158270aca26  projections.protostr
 7334ba0a4544f0623231330fc51d390d  shared_fc.protostr
-bb8e233b05b8e07f9ed386b7aee4f2c6  shared_lstm.protostr
+8b8b6bb128a7dfcc937be86145f53e2f  shared_lstm.protostr
 6b39e34beea8dfb782bee9bd3dea9eb5  simple_rnn_layers.protostr
-f98e79e1630d5eb827c300e64836d269  test_bi_grumemory.protostr
+4e78f0ded79f6fefb58ca0c104b57c79  test_bi_grumemory.protostr
 0fc1409600f1a3301da994ab9d28b0bf  test_cost_layers.protostr
 6cd5f28a3416344f20120698470e0a4c  test_cost_layers_with_weight.protostr
 144bc6d3a509de74115fa623741797ed  test_expand_layer.protostr
@ -16,7 +16,8 @@ d350bd91a0dc13e854b1364c3d9339c6  test_lstmemory_layer.protostr
 5433ed33d4e7414eaf658f2a55946186  test_maxout.protostr
 251a948ba41c1071afcd3d9cf9c233f7  test_ntm_layers.protostr
 e6ff04e70aea27c7b06d808cc49c9497  test_print_layer.protostr
-fded24727338fb8ce44d9951ed8aea08  test_rnn_group.protostr
+2a75dd33b640c49a8821c2da6e574577  test_rnn_group.protostr
 67d6fde3afb54f389d0ce4ff14726fe1  test_sequence_pooling.protostr
 f586a548ef4350ba1ed47a81859a64cb  unused_layers.protostr
-f937a5a6e7e8864b4d8cf56b0f7c7f44  util_layers.protostr
+8122477f4f65244580cec09edc590041  util_layers.protostr
 dcd76bebb5f9c755f481c26192917818  math_ops.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
@ -9,7 +9,7 @@ test_sequence_pooling test_lstmemory_layer test_grumemory_layer
 last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
 img_layers util_layers simple_rnn_layers unused_layers test_cost_layers
 test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
-test_maxout test_bi_grumemory)
+test_maxout test_bi_grumemory math_ops)
 for conf in ${configs[*]}
--- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
@ -0,0 +1,24 @@
 from paddle.trainer_config_helpers import *
 from paddle.trainer_config_helpers import math
 settings(
    batch_size=1000,
    learning_rate=1e-5
 )
 x = data_layer(name='data', size=100)
 x = math.exp(x)
 x = math.log(x)
 x = math.abs(x)
 x = math.sigmoid(x)
 x = math.square(x)
 x = math.square(x)
 y = 1 + x
 y = y + 1
 y = x + y
 y = y - x
 y = y - 2
 y = 2 - y
 outputs(y)