Merge pull request #7269 from emailweixu/calc_gradient

Calculating gradients for partial graph
8 years ago · df92776850
parent 5f98500009 6e5eae137d
commit df92776850
8 changed files with 302 additions and 45 deletions
--- a/paddle/framework/grad_op_desc_maker.h
+++ b/paddle/framework/grad_op_desc_maker.h
@ -87,7 +87,11 @@ class GradOpDescMakerBase {
    auto onames = this->Output(name);
    ret_val.reserve(onames.size());
    std::transform(onames.begin(), onames.end(), std::back_inserter(ret_val),
-                   GradVarName);
+                   [this](const std::string& fwd_var_name) -> std::string {
                     auto g_name = GradVarName(fwd_var_name);
                     (*this->grad_to_var_)[g_name] = fwd_var_name;
                     return g_name;
                   });
    return ret_val;
  }
--- a/paddle/framework/op_desc.h
+++ b/paddle/framework/op_desc.h
@ -129,7 +129,7 @@ class OpDesc {
  }
  proto::OpDesc desc_;
-  // input arg name => output variable names
+  // input arg name => input variable names
  VariableNameMap inputs_;
  // output arg name => output variable names
  VariableNameMap outputs_;
--- a/paddle/operators/norm_op.cc
+++ b/paddle/operators/norm_op.cc
@ -39,7 +39,7 @@ class NormOpMaker : public framework::OpProtoAndCheckerMaker {
              "M = C * H * W");
    AddComment(R"DOC(
       "Input shape: $(N, C, H, W)$
-        Sclae shape: $(C, 1)$
+        Scale shape: $(C, 1)$
        Output shape: $(N, C, H, W)$
        Where
        forward
--- a/paddle/operators/norm_op.h
+++ b/paddle/operators/norm_op.h
@ -66,7 +66,7 @@ class NormKernel : public framework::OpKernel<T> {
                                 context.GetPlace());
      auto tmp = framework::EigenVector<T, Eigen::RowMajor,
                                        Eigen::DenseIndex>::Flatten(tmp_tensor);
-      // get colsum  and sqrt , inverse
+      // get colsum and sqrt , inverse
      auto dim = Eigen::array<int, 1>({{0}});
      tmp.device(*place) = x_square_batch_eigen.sum(dim);
      tmp.device(*place) = (tmp + epsilon).sqrt().inverse();
--- a/python/paddle/v2/fluid/backward.py
+++ b/python/paddle/v2/fluid/backward.py
--- a/python/paddle/v2/fluid/layers/ops.py
+++ b/python/paddle/v2/fluid/layers/ops.py
@ -1,7 +1,17 @@
 from ..registry import register_layer
 __activations__ = [
-    'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round'
+    'abs',
    'ceil',
    'exp',
    'floor',
    'log',
    'relu',
    'round',
    'sigmoid',
    'sqrt',
    'square',
    'tanh',
 ]
 __all__ = [
--- a/python/paddle/v2/fluid/layers/tensor.py
+++ b/python/paddle/v2/fluid/layers/tensor.py
@ -1,7 +1,8 @@
 from ..layer_helper import LayerHelper
 from ..param_attr import ParamAttr
 __all__ = [
-    'create_tensor', 'cast', 'concat', 'sums', 'assign',
+    'create_tensor', 'create_parameter', 'cast', 'concat', 'sums', 'assign',
    'fill_constant_batch_size_like', 'fill_constant', 'ones', 'zeros'
 ]
@ -11,6 +12,33 @@ def create_tensor(dtype, name=None):
    return helper.create_variable(name=helper.name, dtype=dtype)
 def create_parameter(shape,
                     dtype,
                     attr=None,
                     is_bias=False,
                     default_initializer=None):
    """
    Create a parameter
    Args:
        shape(list[int]): shape of the parameter
        dtype(string): element type of the parameter
        attr(ParamAttr): attributes of the parameter
        is_bias(bool): This can affect which default initializer is chosen
                       when default_initializer is None. If is_bias,
                       initializer.Constant(0.0) will be used. Otherwise,
                       Xavier() will be used.
        default_initializer(Initializer): initializer for the parameter
    Returns:
        Parameter: the created parameter
    """
    helper = LayerHelper("create_parameter")
    if attr is None:
        attr = ParamAttr()
    return helper.create_parameter(attr, shape, dtype, is_bias,
                                   default_initializer)
 def cast(x, dtype):
    """
    This function takes in the input with input_dtype
@ -180,7 +208,8 @@ def fill_constant_batch_size_like(input,
    Examples:
        .. code-block:: python
-          data = fluid.layers.fill_constant(shape=[1], value=0, dtype='int64')
+          data = fluid.layers.fill_constant_batch_size_like(
              input=like, shape=[1], value=0, dtype='int64')
    """
    helper = LayerHelper("fill_constant_batch_size_like", **locals())
    out = helper.create_tmp_variable(dtype=dtype)
--- a/python/paddle/v2/fluid/tests/test_calc_gradient.py
+++ b/python/paddle/v2/fluid/tests/test_calc_gradient.py
@ -0,0 +1,25 @@
 import unittest
 import paddle.v2.fluid as fluid
 import paddle.v2.fluid.layers as layers
 import paddle.v2.fluid.framework as framework
 import paddle.v2.fluid.optimizer as optimizer
 from paddle.v2.fluid.backward import calc_gradient
 class TestCalcGradient(unittest.TestCase):
    def test_calc_gradient(self):
        x = layers.create_parameter(dtype="float32", shape=[5, 10])
        y = layers.create_parameter(dtype="float32", shape=[10, 8])
        mul_out = layers.mul(x=x, y=y)
        mean_out = layers.mean(x=mul_out)
        a = calc_gradient(mean_out, mul_out)
        b = calc_gradient(mean_out, x)
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        exe.run(fluid.default_main_program(), feed={}, fetch_list=[a, b])
 if __name__ == "__main__":
    unittest.main()