merge conflicts

8 years ago · 477d92bcd2
parent dd64349a92 4fbc03d351
commit 477d92bcd2
21 changed files with 535 additions and 66 deletions
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@ -123,6 +123,15 @@ OperatorBase::OperatorBase(const std::string& type,
  CheckAllInputOutputSet();
 }
 std::vector<std::string> OperatorBase::InputVars() const {
  std::vector<std::string> ret_val;
  for (auto& o : outputs_) {
    ret_val.reserve(ret_val.size() + o.second.size());
    ret_val.insert(ret_val.end(), o.second.begin(), o.second.end());
  }
  return ret_val;
 }
 std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
  std::vector<std::string> ret_val;
  if (has_intermediate) {
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@ -94,11 +94,14 @@ class OperatorBase {
  const VariableNameMap& Inputs() const { return inputs_; }
  const VariableNameMap& Outputs() const { return outputs_; }
  //! Get a input with argument's name described in `op_proto`
  std::string Input(const std::string& name) const;
  //! Get a input which has multiple variables.
  const std::vector<std::string>& Inputs(const std::string& name) const;
  std::vector<std::string> InputVars() const;
  //! Get a output with argument's name described in `op_proto`
  std::string Output(const std::string& name) const;
  //! Get an output which has multiple variables.
@ -311,9 +314,9 @@ class InferShapeContext {
  }
  template <typename T>
-  std::vector<const T*> MultiOutput(const std::string& name) const {
+  std::vector<T*> MultiOutput(const std::string& name) const {
    auto names = op_.Outputs(name);
-    std::vector<const T*> res;
+    std::vector<T*> res;
    res.reserve(names.size());
    std::transform(names.begin(), names.end(), std::back_inserter(res),
                   [&](const std::string& sub_name) {
--- a/paddle/gserver/layers/DetectionOutputLayer.cpp
+++ b/paddle/gserver/layers/DetectionOutputLayer.cpp
@ -139,7 +139,13 @@ void DetectionOutputLayer::forward(PassType passType) {
                                       allDecodedBBoxes,
                                       &allIndices);
-  resetOutput(numKept, 7);
+  if (numKept > 0) {
    resetOutput(numKept, 7);
  } else {
    MatrixPtr outV = getOutputValue();
    outV = NULL;
    return;
  }
  MatrixPtr outV = getOutputValue();
  getDetectionOutput(confBuffer_->getData(),
                     numKept,
--- a/paddle/gserver/layers/DetectionUtil.cpp
+++ b/paddle/gserver/layers/DetectionUtil.cpp
@ -469,7 +469,7 @@ size_t getDetectionIndices(
    const size_t numClasses,
    const size_t backgroundId,
    const size_t batchSize,
-    const size_t confThreshold,
+    const real confThreshold,
    const size_t nmsTopK,
    const real nmsThreshold,
    const size_t keepTopK,
--- a/paddle/gserver/layers/DetectionUtil.h
+++ b/paddle/gserver/layers/DetectionUtil.h
@ -275,7 +275,7 @@ size_t getDetectionIndices(
    const size_t numClasses,
    const size_t backgroundId,
    const size_t batchSize,
-    const size_t confThreshold,
+    const real confThreshold,
    const size_t nmsTopK,
    const real nmsThreshold,
    const size_t keepTopK,
--- a/paddle/operators/sum_op.cc
+++ b/paddle/operators/sum_op.cc
@ -0,0 +1,73 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/operators/sum_op.h"
 #include <vector>
 namespace paddle {
 namespace operators {
 using framework::Tensor;
 class SumOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
    auto ins = ctx.MultiInput<framework::Tensor>("X");
    auto *out = ctx.Output<framework::Tensor>("Out");
    int N = ins.size();
    auto in_dim = ins[0]->dims();
    PADDLE_ENFORCE_GT(N, 1, "Input tensors count should > 1.");
    for (int i = 1; i < N; i++) {
      auto dim = ins[i]->dims();
      PADDLE_ENFORCE(in_dim == dim, "Input tensors must have same shape");
    }
    out->Resize(in_dim);
  }
 };
 class SumOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  SumOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput("X", "the input tensors of sum operator.").AsDuplicable();
    AddOutput("Out", "the output tensor of sum operator.");
    AddComment(R"DOC(
            Sum the input tensors.
        )DOC");
  }
 };
 class SumGradOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
    auto outputs = ctx.MultiOutput<Tensor>(framework::GradVarName("X"));
    auto dims = ctx.Input<Tensor>(framework::GradVarName("Out"))->dims();
    for (auto output : outputs) {
      output->Resize(dims);
    }
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OP(sum, ops::SumOp, ops::SumOpMaker, sum_grad, ops::SumGradOp);
 REGISTER_OP_CPU_KERNEL(sum, ops::SumKernel<paddle::platform::CPUPlace, float>);
 REGISTER_OP_CPU_KERNEL(sum_grad,
                       ops::SumGradKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/sum_op.cu
+++ b/paddle/operators/sum_op.cu
@ -0,0 +1,18 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #define EIGEN_USE_GPU
 #include "paddle/operators/sum_op.h"
 namespace ops = paddle::operators;
 REGISTER_OP_GPU_KERNEL(sum, ops::SumKernel<paddle::platform::GPUPlace, float>);
 REGISTER_OP_GPU_KERNEL(sum_grad,
                       ops::SumGradKernel<paddle::platform::GPUPlace, float>);
--- a/paddle/operators/sum_op.h
+++ b/paddle/operators/sum_op.h
@ -0,0 +1,65 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include "paddle/framework/eigen.h"
 #include "paddle/framework/op_registry.h"
 namespace paddle {
 namespace operators {
 using Tensor = framework::Tensor;
 template <typename T, int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>
 using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
 template <typename Place, typename T>
 class SumKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto ins = context.MultiInput<Tensor>("X");
    auto* out = context.Output<Tensor>("Out");
    out->mutable_data<T>(context.GetPlace());
    auto place = context.GetEigenDevice<Place>();
    auto result = EigenVector<T>::Flatten(*out);
    int N = ins.size();
    auto in = EigenVector<T>::Flatten(*(ins[0]));
    result.device(place) = in;
    for (int i = 1; i < N; i++) {
      auto in = EigenVector<T>::Flatten(*(ins[i]));
      result.device(place) = result + in;
    }
  }
 };
 template <typename Place, typename T>
 class SumGradKernel : public framework::OpKernel {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* input = context.Input<Tensor>(framework::GradVarName("Out"));
    auto outs = context.MultiOutput<Tensor>(framework::GradVarName("X"));
    for (auto out : outs) {
      out->mutable_data<T>(context.GetPlace());
    }
    auto place = context.GetEigenDevice<Place>();
    auto in = EigenVector<T>::Flatten(*input);
    for (auto out : outs) {
      auto result = EigenVector<T>::Flatten(*out);
      result.device(place) = in;
    }
  }
 };
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/platform/enforce.h
+++ b/paddle/platform/enforce.h
@ -25,10 +25,6 @@ limitations under the License. */
 #include "paddle/string/printf.h"
 #include "paddle/string/to_string.h"
 #ifdef __GNUC__
 #include <cxxabi.h>  // for __cxa_demangle
 #endif
 #ifndef PADDLE_ONLY_CPU
 #include "paddle/platform/dynload/cublas.h"
@ -46,19 +42,6 @@ limitations under the License. */
 namespace paddle {
 namespace platform {
 namespace {
 #ifdef __GNUC__
 inline std::string demangle(std::string name) {
  int status = -4;  // some arbitrary value to eliminate the compiler warning
  std::unique_ptr<char, void (*)(void*)> res{
      abi::__cxa_demangle(name.c_str(), NULL, NULL, &status), std::free};
  return (status == 0) ? res.get() : name;
 }
 #else
 inline std::string demangle(std::string name) { return name; }
 #endif
 }
 struct EnforceNotMet : public std::exception {
  std::exception_ptr exp_;
  std::string err_str_;
@ -79,7 +62,7 @@ struct EnforceNotMet : public std::exception {
      Dl_info info;
      for (int i = 0; i < size; ++i) {
        if (dladdr(call_stack[i], &info)) {
-          auto demangled = demangle(info.dli_sname);
+          auto demangled = info.dli_sname;
          auto addr_offset = static_cast<char*>(call_stack[i]) -
                             static_cast<char*>(info.dli_saddr);
          sout << string::Sprintf("%-3d %*0p %s + %zd\n", i,
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@ -51,6 +51,7 @@ USE_CPU_ONLY_OP(gather);
 USE_CPU_ONLY_OP(scatter);
 USE_OP(top_k);
 USE_OP(squared_l2_distance);
 USE_OP(sum);
 USE_OP(reshape);
 namespace paddle {
@ -217,7 +218,10 @@ All parameter, weight, gradient are variables in Paddle.
               -> std::map<std::string, std::vector<std::string>> {
                 return op.Outputs();
               })
      .def("output_vars",
           [](const OperatorBase &op) { return op.OutputVars(true); })
      .def("inputs", [](const OperatorBase &op) { return op.Inputs(); })
      .def("input_vars", [](const OperatorBase &op) { return op.InputVars(); })
      .def("__str__", &OperatorBase::DebugString)
      .def("no_intermediate_outputs",
           [](const OperatorBase &op) { return op.OutputVars(false); })
--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
@ -30,6 +30,8 @@ Configuring cmake in /paddle/build ...
      -DCMAKE_BUILD_TYPE=Release
      -DWITH_DOC=OFF
      -DWITH_GPU=${WITH_GPU:-OFF}
      -DWITH_MKLDNN=${WITH_MKLDNN:-ON}
      -DWITH_MKLML=${WITH_MKLML:-ON}
      -DWITH_AVX=${WITH_AVX:-OFF}
      -DWITH_GOLANG=${WITH_GOLANG:-ON}
      -DWITH_SWIG_PY=ON
@ -50,6 +52,8 @@ cmake .. \
      -DCMAKE_BUILD_TYPE=Release \
      -DWITH_DOC=OFF \
      -DWITH_GPU=${WITH_GPU:-OFF} \
      -DWITH_MKLDNN=${WITH_MKLDNN:-ON} \
      -DWITH_MKLML=${WITH_MKLML:-ON} \
      -DWITH_AVX=${WITH_AVX:-OFF} \
      -DWITH_GOLANG=${WITH_GOLANG:-ON} \
      -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@ -3748,8 +3748,8 @@ class SwitchOrderLayer(LayerBase):
    def __init__(self, name, inputs, reshape, **xargs):
        super(SwitchOrderLayer, self).__init__(
            name, 'switch_order', 0, inputs=inputs, **xargs)
-        self.config.reshape_conf.heightAxis.extend(reshape['height'])
+        self.config.reshape_conf.height_axis.extend(reshape['height'])
-        self.config.reshape_conf.widthAxis.extend(reshape['width'])
+        self.config.reshape_conf.width_axis.extend(reshape['width'])
 # Deprecated, use a new layer specific class instead
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@ -1223,7 +1223,8 @@ def detection_output_layer(input_loc,
                           name=None):
    """
    Apply the NMS to the output of network and compute the predict bounding
-    box location.
+    box location. The output of this layer could be None if there is no valid
    bounding box.
    :param name: The Layer Name.
    :type name: basestring
@ -6460,6 +6461,7 @@ def switch_order_layer(input,
    return LayerOutput(
        name=name,
        layer_type=LayerType.SWITCH_ORDER_LAYER,
        activation=act,
        parents=input,
        size=l.config.size)
--- a/python/paddle/v2/event.py
+++ b/python/paddle/v2/event.py
@ -53,10 +53,13 @@ class BeginPass(object):
 class EndPass(WithMetric):
    """
    Event On One Pass Training Complete.
    To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')"
    in your event_handler call back
    """
-    def __init__(self, pass_id, evaluator):
+    def __init__(self, pass_id, evaluator, gm):
        self.pass_id = pass_id
        self.gm = gm
        WithMetric.__init__(self, evaluator)
@ -73,10 +76,13 @@ class BeginIteration(object):
 class EndIteration(WithMetric):
    """
    Event On One Batch Training Complete.
    To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')"
    in your event_handler call back
    """
-    def __init__(self, pass_id, batch_id, cost, evaluator):
+    def __init__(self, pass_id, batch_id, cost, evaluator, gm):
        self.pass_id = pass_id
        self.batch_id = batch_id
        self.cost = cost
        self.gm = gm
        WithMetric.__init__(self, evaluator)
--- a/python/paddle/v2/framework/op.py
+++ b/python/paddle/v2/framework/op.py
@ -142,8 +142,8 @@ def create_op_creation_method(op_proto):
    return OpInfo(
        method=__impl__,
        name=op_proto.type,
-        inputs=[var.name for var in op_proto.inputs],
+        inputs=[(var.name, var.duplicable) for var in op_proto.inputs],
-        outputs=[var.name for var in op_proto.outputs],
+        outputs=[(var.name, var.duplicable) for var in op_proto.outputs],
        attrs=[attr.name for attr in op_proto.attrs])
@ -180,9 +180,15 @@ class OperatorFactory(object):
        return self.op_methods.get(t)
    def get_op_input_names(self, type):
        return map(lambda x: x[0], self.get_op_info(type).inputs)
    def get_op_inputs(self, type):
        return self.get_op_info(type).inputs
    def get_op_output_names(self, type):
        return map(lambda x: x[0], self.get_op_info(type).outputs)
    def get_op_outputs(self, type):
        return self.get_op_info(type).outputs
    def get_op_attr_names(self, type):
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@ -33,6 +33,7 @@ py_test(test_sgd_op SRCS test_sgd_op.py)
 py_test(test_gradient_checker SRCS test_gradient_checker.py)
 py_test(test_lookup_table SRCS test_lookup_table.py)
 py_test(test_scale_and_identity_op SRCS test_scale_and_identity_op.py)
 py_test(test_sum_op SRCS test_sum_op.py)
 py_test(mnist SRCS mnist.py)
 py_test(test_squared_l2_distance_op SRCS test_squared_l2_distance_op.py)
 py_test(test_reshape_op SRCS test_reshape_op.py)
--- a/python/paddle/v2/framework/tests/op_test.py
+++ b/python/paddle/v2/framework/tests/op_test.py
--- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py
+++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py
@ -1,36 +1,27 @@
 import unittest
 import numpy
-from op_test_util import OpTestMeta
+from op_test import OpTest
 from gradient_checker import GradientChecker, create_op
-class TestCrossEntropy(unittest.TestCase):
+class TestCrossEntropy(OpTest):
    __metaclass__ = OpTestMeta
    def setUp(self):
-        self.type = "onehot_cross_entropy"
+        self.op_type = "onehot_cross_entropy"
        batch_size = 30
        class_num = 10
-        X = numpy.random.random((batch_size, class_num)).astype("float32")
+        X = numpy.random.uniform(0.1, 1.0,
-        label = 5 * numpy.ones(batch_size).astype("int32")
+                                 [batch_size, class_num]).astype("float32")
        label = (class_num / 2) * numpy.ones(batch_size).astype("int32")
        self.inputs = {'X': X, 'label': label}
        Y = []
        for i in range(0, batch_size):
            Y.append(-numpy.log(X[i][label[i]]))
        self.outputs = {'Y': numpy.array(Y).astype("float32")}
    def test_check_output(self):
        self.check_output()
 class CrossEntropyGradOpTest(GradientChecker):
    def test_check_grad(self):
-        op = create_op("onehot_cross_entropy")
+        self.check_grad(["X"], "Y")
        batch_size = 30
        class_num = 10
        inputs = {
            "X": numpy.random.uniform(
                0.1, 1.0, [batch_size, class_num]).astype("float32"),
            "label": (class_num / 2) * numpy.ones(batch_size).astype("int32")
        }
        self.check_grad(op, inputs, set("X"), "Y")
 if __name__ == "__main__":
--- a/python/paddle/v2/framework/tests/test_sigmoid_op.py
+++ b/python/paddle/v2/framework/tests/test_sigmoid_op.py
@ -1,27 +1,21 @@
 import unittest
 import numpy as np
-from op_test_util import OpTestMeta
+from op_test import OpTest
 from gradient_checker import GradientChecker, create_op
-class TestSigmoidOp(unittest.TestCase):
+class TestSigmoid(OpTest):
    __metaclass__ = OpTestMeta
    def setUp(self):
-        self.type = "sigmoid"
+        self.op_type = "sigmoid"
-        self.inputs = {'X': np.random.random((15, 31)).astype("float32")}
+        self.inputs = {
            'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
        }
        self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))}
    def test_check_output(self):
        self.check_output()
-class TestSigmoidGradOp(GradientChecker):
+    def test_check_grad(self):
-    def test_grad(self):
+        self.check_grad(["X"], "Y", max_relative_error=0.007)
        op = create_op("sigmoid")
        inputs = {"X": np.random.uniform(0.1, 1, [11, 17]).astype("float32")}
        # compare gpu and cpu results for backward op.
        # this test will be skiped if only compiling CPU version.
        self.compare_grad(op, inputs)
        # check gradients 
        self.check_grad(op, inputs, set("X"), "Y", max_relative_error=0.007)
 if __name__ == '__main__':
--- a/python/paddle/v2/framework/tests/test_sum_op.py
+++ b/python/paddle/v2/framework/tests/test_sum_op.py
@ -0,0 +1,24 @@
 import unittest
 import numpy as np
 from op_test import OpTest
 class TestSumOp(OpTest):
    def setUp(self):
        self.op_type = "sum"
        x0 = np.random.random((3, 4)).astype('float32')
        x1 = np.random.random((3, 4)).astype('float32')
        x2 = np.random.random((3, 4)).astype('float32')
        self.inputs = {"X": {"x0": x0, "x1": x1, "x2": x2}}
        y = x0 + x1 + x2
        self.outputs = {'Out': y}
    def test_check_output(self):
        self.check_output()
    def test_check_grad(self):
        self.check_grad(["x0"], "Out")
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
@ -174,13 +174,18 @@ class SGD(object):
                        pass_id=pass_id,
                        batch_id=batch_id,
                        cost=cost,
-                        evaluator=batch_evaluator))
+                        evaluator=batch_evaluator,
                        gm=self.__gradient_machine__))
                self.__parameter_updater__.finishBatch(cost)
                batch_evaluator.finish()
            self.__parameter_updater__.finishPass()
            pass_evaluator.finish()
-            event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator))
+            event_handler(
                v2_event.EndPass(
                    pass_id,
                    evaluator=pass_evaluator,
                    gm=self.__gradient_machine__))
        self.__gradient_machine__.finish()
    def test(self, reader, feeding=None):