Merge branch 'develop' into quantize_transpiler_update

6 years ago · 479ad4bb92
parent bd0a9fb7aa 4e81e22827
commit 479ad4bb92
10 changed files with 595 additions and 10 deletions
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -302,6 +302,7 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b
 paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
 paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
+paddle.fluid.contrib.op_freq_statistic ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
--- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
@ -257,6 +257,22 @@ std::unique_ptr<ir::Graph> AttentionLSTMFusePass::ApplyImpl(
    std::unique_ptr<ir::Graph> graph) const {
  PDPattern external_pattern, subblock_pattern;

+  // Use the following variables to tell whether this model is RNN1.
+  // This fuse can only works on the RNN1 model.
+  std::unordered_set<std::string> specified_vars({"data_lod_attention",
+                                                  "cell_init", "hidden_init",
+                                                  "data", "week", "minute"});
+  int count = 0;
+  for (auto* node : graph->Nodes()) {
+    if (node->IsVar() && specified_vars.count(node->Name())) {
+      ++count;
+    }
+  }
+  if (count < specified_vars.size()) {
+    return graph;
+  }
+
+  // Continue to fuse.
  FindWhileOp(graph.get());
  return graph;
 }
--- a/paddle/fluid/inference/api/paddle_inference_api.h
+++ b/paddle/fluid/inference/api/paddle_inference_api.h
@ -212,10 +212,11 @@ struct AnalysisConfig : public NativeConfig {
    kExclude   // Specify the disabled passes in `ir_passes`.
  };

+  // Determine whether to perform graph optimization.
  bool enable_ir_optim = true;
+  // Manually determine the IR passes to run.
  IrPassMode ir_mode{IrPassMode::kExclude};
-  // attention lstm fuse works only on some specific models, disable as default.
-  std::vector<std::string> ir_passes{"attention_lstm_fuse_pass"};
+  std::vector<std::string> ir_passes;

  // NOTE this is just for internal development, please not use it.
  bool _use_mkldnn{false};
--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
@ -30,7 +30,13 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
 polygon_box_transform_op.cu)
 detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
 detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc)
-detection_library(generate_proposals_op SRCS generate_proposals_op.cc)
+
+if(WITH_GPU)
+  detection_library(generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS memory cub)
+else()
+  detection_library(generate_proposals_op SRCS generate_proposals_op.cc)
+endif()
+
 detection_library(roi_perspective_transform_op SRCS roi_perspective_transform_op.cc roi_perspective_transform_op.cu)
 #Export local libraries to parent
 set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE)
--- a/paddle/fluid/operators/detection/generate_proposals_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposals_op.cc
@ -15,6 +15,7 @@ limitations under the License. */
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/var_type.h"
 #include "paddle/fluid/operators/gather.h"
 #include "paddle/fluid/operators/math/math_function.h"

@ -69,7 +70,7 @@ class GenerateProposalsOp : public framework::OperatorWithKernel {
      const framework::ExecutionContext &ctx) const override {
    return framework::OpKernelType(
        framework::ToDataType(ctx.Input<Tensor>("Anchors")->type()),
-        platform::CPUPlace());
+        ctx.device_context());
  }
 };

@ -162,7 +163,7 @@ void FilterBoxes(const platform::DeviceContext &ctx, Tensor *boxes,
  const T *im_info_data = im_info.data<T>();
  T *boxes_data = boxes->mutable_data<T>(ctx.GetPlace());
  T im_scale = im_info_data[2];
-  keep->Resize({boxes->dims()[0], 1});
+  keep->Resize({boxes->dims()[0]});
  min_size = std::max(min_size, 1.0f);
  int *keep_data = keep->mutable_data<int>(ctx.GetPlace());

@ -463,7 +464,7 @@ class GenerateProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
    AddAttr<int>("post_nms_topN", "post_nms_topN");
    AddAttr<float>("nms_thresh", "nms_thres");
    AddAttr<float>("min_size", "min size");
-    AddAttr<float>("eta", "eta");
+    AddAttr<float>("eta", "The parameter for adaptive NMS.");
    AddComment(R"DOC(
 Generate Proposals OP

--- a/paddle/fluid/operators/detection/generate_proposals_op.cu
+++ b/paddle/fluid/operators/detection/generate_proposals_op.cu
--- a/python/paddle/fluid/contrib/init.py
+++ b/python/paddle/fluid/contrib/init.py
@ -18,8 +18,13 @@ from . import decoder
 from .decoder import *
 from . import memory_usage_calc
 from .memory_usage_calc import *
+from . import op_frequence
+from .op_frequence import *
 from . import quantize
 from .quantize import *

-__all__ = decoder.__all__ + memory_usage_calc.__all__
-__all__ += quantize.__all__
+__all__ = []
+__all__ += decoder.__all__
+__all__ += memory_usage_calc.__all__
+__all__ += op_frequence.__all__
+__all__ += quantize.__all__
--- a/python/paddle/fluid/contrib/op_frequence.py
+++ b/python/paddle/fluid/contrib/op_frequence.py
@ -0,0 +1,104 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+from collections import OrderedDict
+
+from ..framework import Program
+
+__all__ = ['op_freq_statistic']
+
+
+def op_freq_statistic(program):
+    """
+    Statistics of Op frequency.
+
+    Args:
+        program(Program): The current Program.
+
+    Returns:
+        uni_op_freq(dict): the single op frequency.
+        adj_2_op_freq(dict): the two adjacent ops frequency.
+
+    Examples:
+
+        >>> import paddle.fluid as fluid
+        >>> uni_op_freq, adj_2_op_freq = fluid.contrib.op_freq_statistic(
+        >>>        fluid.default_main_program())
+        >>> for op_type, op_num in uni_op_freq:
+        >>>     print("%s  \t  %d" % (op_type, op_num))
+        >>> for op_type, op_num in adj_2_op_freq:
+        >>>     print("%s  \t  %d" % (op_type, op_num))
+
+    """
+
+    if not isinstance(program, Program):
+        raise TypeError("The input type should be Porgram."
+                        "But you passed in %s" % (type(program)))
+
+    uni_op_freq = OrderedDict()
+    adj_2_op_freq = OrderedDict()
+    op_in_ops = OrderedDict()
+
+    parameters = [p.name for p in program.blocks[0].all_parameters()]
+
+    # get uni_op_freq
+    for op in program.global_block().ops:
+        had_recorded = False
+        for var_name in op.output_arg_names:
+            if var_name in parameters:
+                continue
+            if not had_recorded and uni_op_freq.has_key(op.type):
+                uni_op_freq[op.type] += 1
+                had_recorded = True
+            elif not had_recorded:
+                uni_op_freq[op.type] = 1
+                had_recorded = True
+
+    # get adj_2_op_freq
+    var_gen_op = {}
+    for op in program.global_block().ops:
+        for var_name in op.input_arg_names:
+            if var_name in parameters:
+                continue
+            if var_gen_op.has_key(var_name):
+                assert len(var_gen_op[var_name]) > 0
+                if op_in_ops.has_key(op.type):
+                    op_in_ops[op.type].append(var_gen_op[var_name][-1])
+                else:
+                    op_in_ops[op.type] = [var_gen_op[var_name][-1]]
+            else:
+                print("Var's generate op is not found,%s, %s" %
+                      (var_name, op.type))
+
+        for var_name in op.output_arg_names:
+            if var_gen_op.has_key(var_name):
+                var_gen_op[var_name].append(op.type)
+            else:
+                var_gen_op[var_name] = [op.type]
+
+    for op, in_ops in op_in_ops.iteritems():
+        for in_op in in_ops:
+            op_op = in_op + "->" + op
+            if adj_2_op_freq.has_key(op_op):
+                adj_2_op_freq[op_op] += 1
+            else:
+                adj_2_op_freq[op_op] = 1
+
+    uni_op_freq = sorted(
+        uni_op_freq.items(), key=lambda item: item[1], reverse=True)
+    adj_2_op_freq = sorted(
+        adj_2_op_freq.items(), key=lambda item: item[1], reverse=True)
+
+    return uni_op_freq, adj_2_op_freq
--- a/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
+++ b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
@ -277,7 +277,6 @@ class TestGenerateProposalsOp(OpTest):
            'eta': self.eta
        }

-        print("lod = ", self.lod)
        self.outputs = {
            'RpnRois': (self.rpn_rois[0], [self.lod]),
            'RpnRoiProbs': (self.rpn_roi_probs[0], [self.lod])
@ -295,7 +294,7 @@ class TestGenerateProposalsOp(OpTest):
        self.post_nms_topN = 5000  # train 6000, test 1000
        self.nms_thresh = 0.7
        self.min_size = 3.0
-        self.eta = 0.8
+        self.eta = 1.

    def init_test_input(self):
        batch_size = 1
--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
@ -470,7 +470,10 @@ class DistributeTranspiler(object):
        """
        # remove optimize ops and add a send op to main_program
        # FIXME(typhoonzero): Also ops like clip_gradient, lrn_decay?
+        lr_ops = self._get_lr_ops()
        delete_ops(self.origin_program.global_block(), self.optimize_ops)
+        delete_ops(self.origin_program.global_block(), lr_ops)
+
        self.origin_program.__str__()

        if wait_port: