Remove `sorted_sum_gradient_` form BasicEngine and PartialGradTask. (#26766)

Use `Tensor` instead of `Variable` in the doc of paddle.grad.
5 years ago · f32ae272ec
parent 7b78bfc089
commit f32ae272ec
4 changed files with 14 additions and 17 deletions
--- a/paddle/fluid/imperative/basic_engine.cc
+++ b/paddle/fluid/imperative/basic_engine.cc
@ -36,7 +36,6 @@ namespace paddle {
 namespace imperative {

 void BasicEngine::Init(VarBase* var, bool retain_graph) {
-  sorted_sum_gradient_ = FLAGS_sort_sum_gradient;
  retain_graph_ = retain_graph;
  init_node_ = var->GradVarBase()->GradNode();
  var->GradVarBase()->ClearGradNode();
@ -106,7 +105,7 @@ void BasicEngine::PrepareGradAccumulators(const OpBase& op) {

      auto& accumulator = accumulators_[var.get()];
      if (!accumulator) {
-        if (sorted_sum_gradient_) {
+        if (FLAGS_sort_sum_gradient) {
          accumulator.reset(new SortedGradientAccumulator(var.get()));
        } else {
          accumulator.reset(new EagerGradientAccumulator(var.get()));
--- a/paddle/fluid/imperative/basic_engine.h
+++ b/paddle/fluid/imperative/basic_engine.h
@ -44,7 +44,6 @@ class BasicEngine : public Engine {

 private:
  std::shared_ptr<GradOpNode> init_node_;
-  bool sorted_sum_gradient_;
  std::unordered_map<GradOpNode*, size_t> node_deps_;
  std::unordered_map<VariableWrapper*, std::unique_ptr<GradientAccumulator>>
      accumulators_;
--- a/paddle/fluid/imperative/partial_grad_engine.cc
+++ b/paddle/fluid/imperative/partial_grad_engine.cc
@ -578,7 +578,6 @@ class PartialGradTask {
  bool retain_graph_;
  bool allow_unused_;
  bool only_inputs_;
-  bool sorted_sum_gradient_{FLAGS_sort_sum_gradient};
 };

 PartialGradTask::PartialGradTask(
@ -981,7 +980,7 @@ void PartialGradTask::PrepareInitialGradientAccumulators(const OpBase *op) {

      if (!accumulator) {
        accumulator.reset(new GradientAccumulationInfo(
-            var, sorted_sum_gradient_, create_graph_));
+            var, FLAGS_sort_sum_gradient, create_graph_));
      }

      accumulator->IncreaseTotalRefCnt();
--- a/python/paddle/fluid/dygraph/base.py
+++ b/python/paddle/fluid/dygraph/base.py
@ -327,19 +327,19 @@ def grad(outputs,
    This API computes the sum of gradients of `outputs` with respect to each `inputs` .

    Parameters:
-        outputs (Variable|list(Variable)|tuple(Variable)): the output Variable or 
-            Variable list/tuple of the graph to compute gradients.
-        inputs (Variable|list(Variable)|tuple(Variable)): the input Variable or 
-            Variable list/tuple of the graph to compute gradients. The returned
+        outputs (Tensor|list(Tensor)|tuple(Tensor)): the output Tensor or 
+            Tensor list/tuple of the graph to compute gradients.
+        inputs (Tensor|list(Tensor)|tuple(Tensor)): the input Tensor or 
+            Tensor list/tuple of the graph to compute gradients. The returned
            values of this API are the gradients of `inputs` . 
-        grad_outputs (Variable|list(Variable|None)|tuple(Variable|None), optional): 
+        grad_outputs (Tensor|list(Tensor|None)|tuple(Tensor|None), optional): 
            initial gradient values of `outputs` . If `grad_outputs` is None, 
            the initial gradient values of `outputs` would be Tensors filled with 1; 
            if `grad_outputs` is not None, it must have the same length as `outputs` , 
            and in this case, the initial gradient value of the i-th `outputs` would
            be: (1) a Tensor filled with 1 when the i-th element of `grad_outputs` 
            is None; (2) the i-th element of `grad_outputs` when the i-th element of
-            `grad_outputs` is a Variable. Default None.
+            `grad_outputs` is a Tensor. Default None.
        retain_graph (bool, optional): whether to retain the forward graph which 
            is used to calculate the gradient. When it is True, the graph would 
            be retained, in which way users can calculate backward twice for the 
@ -351,21 +351,21 @@ def grad(outputs,
            computing process would be discarded. Default False.
        only_inputs (bool, optional): whether to only compute the gradients of
            `inputs` . If it is False, the gradients of all remaining leaf 
-            Variables in the graph would be also computed and accumulated. 
+            Tensors in the graph would be also computed and accumulated. 
            If it is True, only the gradients of `inputs` would be computed.
            Default True. only_inputs=False is under development, and it is
            not supported yet.    
        allow_unused (bool, optional): whether to raise error or return None if some 
-            Variables of `inputs` are unreachable in the graph. If some Variables of 
+            Tensors of `inputs` are unreachable in the graph. If some Tensors of 
            `inputs` are unreachable in the graph (i.e., their gradients are None),  
            error would be raised if allow_unused=False, or None would be returned as
            their gradients if allow_unused=True. Default False.
-        no_grad_vars (Variable|list(Variable)|tuple(Variable)|set(Variable), optional): 
-            the Variables whose gradients are not needed to compute. Default None.
+        no_grad_vars (Tensor|list(Tensor)|tuple(Tensor)|set(Tensor), optional): 
+            the Tensors whose gradients are not needed to compute. Default None.

    Returns:
-        tuple: a tuple of Variables, whose length is the same as the Variable number 
-        inside `inputs`, and the i-th returned Variable is the sum of gradients of 
+        tuple: a tuple of Tensors, whose length is the same as the Tensor number 
+        inside `inputs`, and the i-th returned Tensor is the sum of gradients of 
        `outputs` with respect to the i-th `inputs`.

    Examples 1: