Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into cross_entropy

revert-3824-remove_grad_op_type
dangqingqing 8 years ago
commit 70285cce32

@ -0,0 +1,15 @@
#!/bin/bash
set -e
readonly VERSION="3.8"
version=$(clang-format -version)
if ! [[ $version == *"$VERSION"* ]]; then
echo "clang-format version check failed."
echo "a version contains '$VERSION' is needed, but get '$version'"
echo "you can install the right version, and make an soft-link to '\$PATH' env"
exit -1
fi
clang-format $@

@ -19,10 +19,10 @@
- id: end-of-file-fixer - id: end-of-file-fixer
- repo: local - repo: local
hooks: hooks:
- id: clang-format - id: clang-format-with-version-check
name: clang-format name: clang-format
description: Format files with ClangFormat. description: Format files with ClangFormat.
entry: clang-format -i entry: ./.clang_format.hook -i
language: system language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
- repo: https://github.com/PaddlePaddle/pre-commit-golang - repo: https://github.com/PaddlePaddle/pre-commit-golang

@ -137,9 +137,9 @@ set(EXTERNAL_LIBS
) )
if(WITH_GPU) if(WITH_GPU)
list(APPEND EXTERNAL_LIB ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO) if(NOT WITH_DSO)
list(APPEND EXTERNAL_LIB ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY}) list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
endif(NOT WITH_DSO) endif(NOT WITH_DSO)
endif(WITH_GPU) endif(WITH_GPU)

@ -71,20 +71,6 @@ RUN pip install -r /root/requirements.txt
RUN apt-get install -y libssl-dev libffi-dev RUN apt-get install -y libssl-dev libffi-dev
RUN pip install certifi urllib3[secure] RUN pip install certifi urllib3[secure]
# TODO(qijun) The template library Eigen doesn't work well with GCC 5
# coming with the default Docker image, so we switch to use GCC 4.8
# by default. And I will check Eigen library later.
RUN ln -sf gcc-4.8 /usr/bin/gcc && \
ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \
ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \
ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \
ln -sf gcc-4.8 /usr/bin/x86_64-linux-gnu-gcc && \
ln -sf gcc-ar-4.8 /usr/bin/x86_64-linux-gnu-gcc-ar && \
ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \
ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \
ln -sf g++-4.8 /usr/bin/g++ && \
ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++
# Install woboq_codebrowser to /woboq # Install woboq_codebrowser to /woboq
RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \ RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \

@ -9,13 +9,6 @@ function(CheckCompilerCXX11Flag)
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8)
message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.") message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.")
endif() endif()
if(NOT ANDROID)
# TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem.
# Use Debug mode instead for now.
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9)
set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE)
endif()
endif()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang"
# Apple Clang is a different compiler than upstream Clang which havs different version numbers. # Apple Clang is a different compiler than upstream Clang which havs different version numbers.
@ -160,7 +153,7 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF)
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc. # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here. # So, don't set these flags here.
LIST(APPEND CUDA_NVCC_FLAGS -std=c++11 --default-stream per-thread) LIST(APPEND CUDA_NVCC_FLAGS -std=c++11)
LIST(APPEND CUDA_NVCC_FLAGS --use_fast_math) LIST(APPEND CUDA_NVCC_FLAGS --use_fast_math)
if(CMAKE_BUILD_TYPE STREQUAL "Debug") if(CMAKE_BUILD_TYPE STREQUAL "Debug")

@ -101,6 +101,7 @@ if use_mkldnn
5. 在**Argument**里添加两个`MkldnnMatrixPtr`,取名为`mkldnnValue`和`mkldnnGrad`,用于存放`MkldnnLayer`会用到的memory buffer。 并且添加函数cvt(会修改为一个更加合适的函数名),用于处理"CPU device"和"MKL-DNN device"之间memory的相互转化。 5. 在**Argument**里添加两个`MkldnnMatrixPtr`,取名为`mkldnnValue`和`mkldnnGrad`,用于存放`MkldnnLayer`会用到的memory buffer。 并且添加函数cvt(会修改为一个更加合适的函数名),用于处理"CPU device"和"MKL-DNN device"之间memory的相互转化。
6. 在父类`Layer`中的`getOutput`函数中添加一段逻辑,用于判断`deviceId`并针对device在MKL-DNN和CPU之间不统一的情况做一个前期转换。 也就是调用`Argument`的cvt函数把output统一到需要的device上。 6. 在父类`Layer`中的`getOutput`函数中添加一段逻辑,用于判断`deviceId`并针对device在MKL-DNN和CPU之间不统一的情况做一个前期转换。 也就是调用`Argument`的cvt函数把output统一到需要的device上。
7. 在原来的`FLAGS`中添加一个`use_mkldnn`的flag用于选择是否使用MKL-DNN的相关功能。 7. 在原来的`FLAGS`中添加一个`use_mkldnn`的flag用于选择是否使用MKL-DNN的相关功能。
8. 关于MKLDNN参数的保存。由于MKLDNN参数的格式与PaddlePaddle原有的格式存在不一样的情况所以需要在保存参数时同时保存该格式信息。目前准备扩展[Header](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/parameter/Parameter.h#L247)里面的`int32_t version`。这个值不管是在v1还是在v2里面一直保存的是0所以可以充分利用这个信息定义一个枚举处理所有MKLDNN的参数格式从而`MKLDNNLayer`就可以从输入的参数中获取需要的格式信息。
## References ## References

@ -68,7 +68,7 @@ As a simple example, consider the following:
1. **BLAS Dependencies(optional)** 1. **BLAS Dependencies(optional)**
CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically. CMake will search BLAS libraries from the system. If not found, OpenBLAS will be downloaded, built and installed automatically.
To utilize preinstalled BLAS you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`. To utilize preinstalled BLAS you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.
```bash ```bash
@ -131,9 +131,9 @@ As a simple example, consider the following:
To build GPU version, you will need the following installed: To build GPU version, you will need the following installed:
1. a CUDA-capable GPU 1. a CUDA-capable GPU
2. A supported version of Linux with a gcc compiler and toolchain 2. A supported version of Linux with a GCC compiler and toolchain
3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads) 3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn) 4. NVIDIA cuDNN Library (available at https://developer.nvidia.com/cudnn)
The CUDA development environment relies on tight integration with the host development environment, The CUDA development environment relies on tight integration with the host development environment,
including the host compiler and C runtime libraries, and is therefore only supported on including the host compiler and C runtime libraries, and is therefore only supported on
@ -172,6 +172,7 @@ export PATH=<path to install>/bin:$PATH
# install PaddlePaddle Python modules. # install PaddlePaddle Python modules.
sudo pip install <path to install>/opt/paddle/share/wheels/*.whl sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
``` ```
## <span id="centos">Build on Centos 7</span> ## <span id="centos">Build on Centos 7</span>
### Install Dependencies ### Install Dependencies
@ -192,9 +193,9 @@ sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
To build GPU version, you will need the following installed: To build GPU version, you will need the following installed:
1. a CUDA-capable GPU 1. a CUDA-capable GPU
2. A supported version of Linux with a gcc compiler and toolchain 2. A supported version of Linux with a GCC compiler and toolchain
3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads) 3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn) 4. NVIDIA cuDNN Library (available at https://developer.nvidia.com/cudnn)
The CUDA development environment relies on tight integration with the host development environment, The CUDA development environment relies on tight integration with the host development environment,
including the host compiler and C runtime libraries, and is therefore only supported on including the host compiler and C runtime libraries, and is therefore only supported on
@ -222,7 +223,7 @@ mkdir build && cd build
``` ```
Finally, you can build and install PaddlePaddle: Finally, you can build and install PaddlePaddle:
```bash ```bash
# you can add build option here, such as: # you can add build option here, such as:
cmake3 .. -DCMAKE_INSTALL_PREFIX=<path to install> cmake3 .. -DCMAKE_INSTALL_PREFIX=<path to install>

@ -146,3 +146,19 @@ paddle_error paddle_gradient_machine_randomize_param(
m->machine->randParameters(); m->machine->randParameters();
return kPD_NO_ERROR; return kPD_NO_ERROR;
} }
paddle_error paddle_gradient_machine_get_layer_output(
paddle_gradient_machine machine,
const char* layerName,
paddle_arguments args) {
auto m = cast(machine);
auto out = paddle::capi::cast<paddle::capi::CArguments>(args);
if (m == nullptr || layerName == nullptr || out == nullptr ||
m->machine == nullptr) {
return kPD_NULLPTR;
}
auto layerOutput = m->machine->getLayerOutput(layerName);
out->args.push_back(layerOutput);
return kPD_NO_ERROR;
}

@ -39,7 +39,11 @@ PD_API paddle_error paddle_gradient_machine_create_for_inference(
/** /**
* @brief Create a gradient machine used for model inference, using config with * @brief Create a gradient machine used for model inference, using config with
* parameters which is generated by `paddle merge_model`. * parameters which is generated by `paddle merge_model`.
* @param [out] machine that used for model inference. * Example:
* paddle merge_model \
* --model_dir="pass-00000" \
* --model_file="merged_model.paddle"
* @param [out] machine that used for model inference
* @param [in] mergedModel * @param [in] mergedModel
* @param [in] size * @param [in] size
* @return paddle_error * @return paddle_error
@ -97,6 +101,18 @@ paddle_gradient_machine_randomize_param(paddle_gradient_machine machine);
PD_API paddle_error PD_API paddle_error
paddle_gradient_machine_destroy(paddle_gradient_machine machine); paddle_gradient_machine_destroy(paddle_gradient_machine machine);
/**
* @brief Get the output of the layer named `layerName`.
* @param [in] gradient machine that have run a inference
* @param [in] layerName name of specified layer
* @param [out] args output of the specified layer
* @return paddle_error
*/
PD_API paddle_error
paddle_gradient_machine_get_layer_output(paddle_gradient_machine machine,
const char* layerName,
paddle_arguments args);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

@ -38,7 +38,7 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
cc_library(backward SRCS backward.cc DEPS net_op) cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
if(WITH_PYTHON) if(WITH_PYTHON)
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED

@ -15,8 +15,11 @@
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include <list> #include <list>
#include <memory>
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/operators/recurrent_op.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
@ -42,11 +45,11 @@ static bool AllInSet(
return all_in_set; return all_in_set;
} }
static std::shared_ptr<OperatorBase> NOP() { static std::unique_ptr<OperatorBase> NOP() {
auto net_op = std::make_shared<operators::NetOp>(); auto net_op = new operators::NetOp();
net_op->SetType("@NOP@"); net_op->SetType("@NOP@");
net_op->CompleteAddOp(); net_op->CompleteAddOp();
return net_op; return std::unique_ptr<OperatorBase>(net_op);
} }
// Get backward operator from a forward operator, a recursive implementation. // Get backward operator from a forward operator, a recursive implementation.
@ -61,11 +64,7 @@ static std::shared_ptr<OperatorBase> NOP() {
// operator, in a complex situation, it maybe a NetOp. // operator, in a complex situation, it maybe a NetOp.
// //
// See Backward.h for details // See Backward.h for details
static std::shared_ptr<OperatorBase> BackwardRecursive( static std::unique_ptr<OperatorBase> BackwardRecursive(
const OperatorBase& forwardOp,
std::unordered_set<std::string>& no_grad_names, size_t& uniq_id);
std::shared_ptr<OperatorBase> BackwardRecursive(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
std::unordered_set<std::string>& no_grad_names, size_t& uniq_id) { std::unordered_set<std::string>& no_grad_names, size_t& uniq_id) {
// If all input gradients of forwarding operator do not need to calculate, // If all input gradients of forwarding operator do not need to calculate,
@ -90,7 +89,7 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
} }
// Returned gradient network // Returned gradient network
auto net = std::make_shared<operators::NetOp>(); auto net = std::unique_ptr<operators::NetOp>(new operators::NetOp());
if (forwardOp.IsNetOp()) { if (forwardOp.IsNetOp()) {
// Because forwardOp is a net op, it can static_cast. // Because forwardOp is a net op, it can static_cast.
@ -104,14 +103,14 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
// reversely travel forwardNet and collect all duplicate outputs. // reversely travel forwardNet and collect all duplicate outputs.
for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend(); for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend();
++it, ++local_op_id) { ++it, ++local_op_id) {
auto fwd = *it; auto& fwd = *it;
auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id); auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id);
net->AddOp(bwd);
ForEachVarName(bwd->Outputs(), ForEachVarName(bwd->Outputs(),
[&dup_output_ops, local_op_id](const std::string& out) { [&dup_output_ops, local_op_id](const std::string& out) {
dup_output_ops[out].emplace_back(local_op_id); dup_output_ops[out].emplace_back(local_op_id);
return false; return false;
}); });
net->AddOp(std::move(bwd));
} }
// Get unique ID for this method. // Get unique ID for this method.
auto uid = uniq_id++; auto uid = uniq_id++;
@ -121,7 +120,7 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
// to handle this case. For each duplicate output, rename it to an alias // to handle this case. For each duplicate output, rename it to an alias
// (original name with a offset), append an `add` op for its operator, // (original name with a offset), append an `add` op for its operator,
// and finally sum all the alias variable to the final output variable y. // and finally sum all the alias variable to the final output variable y.
using Pos = std::pair<size_t, std::shared_ptr<OperatorBase>>; using Pos = std::pair<size_t, std::unique_ptr<OperatorBase>>;
std::list<Pos> insert_position; std::list<Pos> insert_position;
for (auto& dup_output_op : dup_output_ops) { for (auto& dup_output_op : dup_output_ops) {
const std::string& name = dup_output_op.first; const std::string& name = dup_output_op.first;
@ -149,13 +148,13 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
[](const Pos& l, const Pos& r) { return l.first > r.first; }); [](const Pos& l, const Pos& r) { return l.first > r.first; });
for (auto& pos : insert_position) { for (auto& pos : insert_position) {
net->InsertOp(pos.first + 1, pos.second); net->InsertOp(pos.first + 1, std::move(pos.second));
} }
} else { } else {
std::shared_ptr<OperatorBase> grad_op = OpRegistry::CreateGradOp(forwardOp); std::unique_ptr<OperatorBase> grad_op(OpRegistry::CreateGradOp(forwardOp));
ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op](
grad_op](const std::string& grad_input) { const std::string& grad_input) {
if (no_grad_names.count(grad_input)) { if (no_grad_names.count(grad_input)) {
// +1 for \0 // +1 for \0
std::string prefix = grad_input.substr( std::string prefix = grad_input.substr(
@ -178,18 +177,34 @@ std::shared_ptr<OperatorBase> BackwardRecursive(
return false; return false;
}); });
// process recurrent gradient op as a special operator.
if (forwardOp.Type() == "recurrent_op") {
// NOTE clean up cycle call somewhere (RNN's stepnet constains itself), or
// this will result in infinite loop.
const auto& rnnop =
*static_cast<const operators::RecurrentOp*>(&forwardOp);
auto rnn_grad_op =
static_cast<operators::RecurrentGradientOp*>(grad_op.get());
const auto& stepnet_op =
*static_cast<const OperatorBase*>(&rnnop.stepnet());
// create stepnet's gradient op
rnn_grad_op->set_stepnet(
BackwardRecursive(stepnet_op, no_grad_names, uniq_id));
}
if (net->ops_.empty()) { // Current no aux op is added to network if (net->ops_.empty()) { // Current no aux op is added to network
return grad_op; return grad_op;
} }
net->AddOp(grad_op); net->AddOp(std::move(grad_op));
} }
net->SetType("@GENERATED_BACKWARD@"); net->SetType("@GENERATED_BACKWARD@");
net->CompleteAddOp(); net->CompleteAddOp();
return net; return std::unique_ptr<OperatorBase>(
} // namespace framework static_cast<OperatorBase*>(net.release()));
}
// See header for comments // See header for comments
std::shared_ptr<OperatorBase> Backward( std::unique_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars) { const std::unordered_set<std::string>& no_grad_vars) {
std::unordered_set<std::string> no_grad_names; std::unordered_set<std::string> no_grad_names;

@ -20,7 +20,7 @@ namespace framework {
// Create the backward operator from a forward operator. // Create the backward operator from a forward operator.
// TODO(yuyang18): Add more API reference comment. // TODO(yuyang18): Add more API reference comment.
extern std::shared_ptr<OperatorBase> Backward( extern std::unique_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars); const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework } // namespace framework

@ -32,9 +32,9 @@ class RowWiseAddOpMaker : public OpProtoAndCheckerMaker {
public: public:
RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input X of Add").AsNoGradient(); AddInput("X", "Input X of Add").NotInGradient();
AddInput("b", "Bias of Add").AsNoGradient(); AddInput("b", "Bias of Add").NotInGradient();
AddOutput("Out", "Out of Add").AsNoGradient(); AddOutput("Out", "Out of Add").NotInGradient();
AddComment("Add Op"); AddComment("Add Op");
} }
}; };
@ -180,8 +180,7 @@ TEST(Backward, simple_op_not_need_grad) {
auto no_input_gop = f::Backward(*fwd, {"x", "b"}); auto no_input_gop = f::Backward(*fwd, {"x", "b"});
ASSERT_NE(no_input_gop, nullptr); ASSERT_NE(no_input_gop, nullptr);
ASSERT_TRUE(no_input_gop->IsNetOp()); ASSERT_TRUE(no_input_gop->IsNetOp());
ASSERT_EQ(0UL, ASSERT_EQ(0UL, static_cast<ops::NetOp *>(no_input_gop.get())->ops_.size());
std::static_pointer_cast<ops::NetOp>(no_input_gop)->ops_.size());
} }
TEST(Backward, net_fc_backward_normal) { TEST(Backward, net_fc_backward_normal) {

@ -60,7 +60,7 @@ message OpProto {
optional bool duplicable = 3 [ default = false ]; optional bool duplicable = 3 [ default = false ];
optional bool intermediate = 4 [ default = false ]; optional bool intermediate = 4 [ default = false ];
optional bool no_gradient = 5 [ default = false ]; optional bool not_in_gradient = 5 [ default = false ];
} }
// AttrProto describes the C++ type Attribute. // AttrProto describes the C++ type Attribute.

@ -28,7 +28,7 @@ static void TransOpArg(const OperatorBase* src_op, const OpArgType& src_type,
const auto& src_arg_list = const auto& src_arg_list =
src_type == OpArgType::IN ? proto->inputs() : proto->outputs(); src_type == OpArgType::IN ? proto->inputs() : proto->outputs();
for (const auto& arg : src_arg_list) { for (const auto& arg : src_arg_list) {
if (arg.no_gradient() && !is_grad) continue; if (arg.not_in_gradient() && !is_grad) continue;
const std::string src_name = arg.name(); const std::string src_name = arg.name();
std::string dst_name = is_grad ? GradVarName(src_name) : src_name; std::string dst_name = is_grad ? GradVarName(src_name) : src_name;
dst_inout[dst_name].reserve(src_inout.at(src_name).size()); dst_inout[dst_name].reserve(src_inout.at(src_name).size());

@ -26,10 +26,10 @@ class IOIgnoredOpMaker : public OpProtoAndCheckerMaker {
IOIgnoredOpMaker(OpProto *proto, OpAttrChecker *op_checker) IOIgnoredOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("In1", "a single input"); AddInput("In1", "a single input");
AddInput("In2_mult", "a multiple input").AsDuplicable().AsNoGradient(); AddInput("In2_mult", "a multiple input").AsDuplicable().NotInGradient();
AddInput("In3_mult", "another multiple input").AsDuplicable(); AddInput("In3_mult", "another multiple input").AsDuplicable();
AddOutput("Out1_mult", "a multiple output").AsDuplicable(); AddOutput("Out1_mult", "a multiple output").AsDuplicable();
AddOutput("Out2", "a single output").AsNoGradient(); AddOutput("Out2", "a single output").NotInGradient();
AddComment("op with inputs and outputs ignored in gradient calculating"); AddComment("op with inputs and outputs ignored in gradient calculating");
} }
}; };

@ -17,5 +17,48 @@ limitations under the License. */
#include <vector> #include <vector>
namespace paddle { namespace paddle {
namespace framework {} // namespace framework namespace framework {
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const std::string& type,
const VarNameMap& inputs,
const VarNameMap& outputs,
AttributeMap attrs) {
auto it = op_info_map().find(type);
PADDLE_ENFORCE(it != op_info_map().end(),
"Operator '%s' has not been registered.", type);
it->second.checker_->Check(attrs);
auto op = it->second.creator_(type, inputs, outputs, attrs);
return std::unique_ptr<OperatorBase>(op);
}
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) {
VarNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VarNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs;
for (auto& attr : op_desc.attrs()) {
attrs[attr.name()] = GetAttrValue(attr);
}
return CreateOp(op_desc.type(), inputs, outputs, attrs);
}
OperatorBase::VarNameMap OpRegistry::ConvertOpDescVarsToVarNameMap(
const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars) {
VarNameMap ret_val;
for (auto& var : op_desc_vars) {
auto& var_names = ret_val[var.parameter()];
auto& var_names_in_proto = var.arguments();
var_names.reserve(static_cast<size_t>(var_names_in_proto.size()));
std::copy(var_names_in_proto.begin(), var_names_in_proto.end(),
std::back_inserter(var_names));
}
return ret_val;
}
std::unique_ptr<OperatorBase> OpRegistry::CreateGradOp(const OperatorBase& op) {
PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops");
return std::unique_ptr<OperatorBase>(BuildGradOp(&op));
}
} // namespace framework
} // namespace paddle } // namespace paddle

@ -29,103 +29,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
// this class not only make proto but also init attribute checkers.
class OpProtoAndCheckerMaker {
public:
OpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: proto_(proto), op_checker_(op_checker) {}
~OpProtoAndCheckerMaker() {
PADDLE_ENFORCE(validated_, "should call Validate after build");
}
void Validate() {
validated_ = true;
CheckNoDuplicatedInOutAttrs();
}
protected:
struct VariableBuilder {
OpProto::Var* var_;
VariableBuilder& AsDuplicable() {
var_->set_duplicable(true);
return *this;
}
VariableBuilder& AsIntermediate() {
var_->set_intermediate(true);
return *this;
}
// TODO(FengJiayi, yuyang18): `AsNoGradient` is a very bad name, because it
// means that input/output is not needed when calculate gradient. It does
// not mean no gradient when backward. It should be changed soon.
VariableBuilder& AsNoGradient() {
var_->set_no_gradient(true);
return *this;
}
};
VariableBuilder AddInput(const std::string& name,
const std::string& comment) {
auto* input = proto_->add_inputs();
input->set_name(name);
input->set_comment(comment);
return VariableBuilder{input};
}
VariableBuilder AddOutput(const std::string& name,
const std::string& comment) {
auto* output = proto_->add_outputs();
output->set_name(name);
output->set_comment(comment);
return VariableBuilder{output};
}
template <typename T>
TypedAttrChecker<T>& AddAttr(const std::string& name,
const std::string& comment,
bool generated = false) {
auto* attr = proto_->add_attrs();
attr->set_name(name);
attr->set_comment(comment);
attr->set_generated(generated);
attr->set_type(AttrTypeID<T>());
return op_checker_->AddAttrChecker<T>(name);
}
void AddComment(const std::string& comment) { proto_->set_comment(comment); }
private:
void CheckNoDuplicatedInOutAttrs() {
std::unordered_set<std::string> names;
auto checker = [&](const std::string& name) {
PADDLE_ENFORCE(!names.count(name), "[%s] is duplicated", name);
names.insert(name);
};
for (auto& attr : proto_->attrs()) {
checker(attr.name());
}
for (auto& input : proto_->inputs()) {
checker(input.name());
}
for (auto& output : proto_->outputs()) {
checker(output.name());
}
}
OpProto* proto_;
OpAttrChecker* op_checker_;
bool validated_{false};
};
class NOPMaker : public OpProtoAndCheckerMaker {
public:
NOPMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {}
};
class OpRegistry { class OpRegistry {
using VarNameMap = OperatorBase::VarNameMap; using VarNameMap = OperatorBase::VarNameMap;
using OpCreator = std::function<OperatorBase*( using OpCreator = std::function<OperatorBase*(
@ -174,48 +77,17 @@ class OpRegistry {
} }
} }
static std::shared_ptr<OperatorBase> CreateOp(const std::string& type, static std::unique_ptr<OperatorBase> CreateOp(const std::string& type,
const VarNameMap& inputs, const VarNameMap& inputs,
const VarNameMap& outputs, const VarNameMap& outputs,
AttributeMap attrs) { AttributeMap attrs);
auto it = op_info_map().find(type);
PADDLE_ENFORCE(it != op_info_map().end(),
"Operator '%s' has not been registered.", type);
it->second.checker_->Check(attrs);
auto op = it->second.creator_(type, inputs, outputs, attrs);
return std::shared_ptr<OperatorBase>(op);
}
static VarNameMap ConvertOpDescVarsToVarNameMap(
const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars) {
VarNameMap ret_val;
for (auto& var : op_desc_vars) {
auto& var_names = ret_val[var.parameter()];
auto& var_names_in_proto = var.arguments();
var_names.reserve(static_cast<size_t>(var_names_in_proto.size()));
std::copy(var_names_in_proto.begin(), var_names_in_proto.end(),
std::back_inserter(var_names));
}
return ret_val;
}
static std::shared_ptr<OperatorBase> CreateOp(const OpDesc& op_desc) { static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
VarNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VarNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs;
for (auto& attr : op_desc.attrs()) {
attrs[attr.name()] = GetAttrValue(attr);
}
return CreateOp(op_desc.type(), inputs, outputs, attrs); static VarNameMap ConvertOpDescVarsToVarNameMap(
} const google::protobuf::RepeatedPtrField<OpDesc::Var>& op_desc_vars);
static std::shared_ptr<OperatorBase> CreateGradOp(const OperatorBase& op) { static std::unique_ptr<OperatorBase> CreateGradOp(const OperatorBase& op);
PADDLE_ENFORCE(!op.IsNetOp(),
"Use framework::Backward to get backward ops");
std::shared_ptr<OperatorBase> grad_op(BuildGradOp(&op));
return grad_op;
}
static std::unordered_map<std::string, const OpInfo>& op_info_map() { static std::unordered_map<std::string, const OpInfo>& op_info_map() {
static std::unordered_map<std::string, const OpInfo> op_info_map_; static std::unordered_map<std::string, const OpInfo> op_info_map_;
@ -272,8 +144,18 @@ class OpKernelRegistrar : public Registrar {
grad_op_class) \ grad_op_class) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \ STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op__##op_type, "REGISTER_OP must be called in global namespace"); \ __reg_op__##op_type, "REGISTER_OP must be called in global namespace"); \
static ::paddle::framework::OpRegistrar<op_class, op_maker_class, \ class _OpClass_##op_type##_ : public op_class { \
grad_op_class> \ public: \
DEFINE_OP_CLONE_METHOD(_OpClass_##op_type##_); \
DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_, op_class); \
}; \
class _OpGradClass_##op_type##_ : public grad_op_class { \
public: \
DEFINE_OP_CLONE_METHOD(_OpGradClass_##op_type##_); \
DEFINE_OP_CONSTRUCTOR(_OpGradClass_##op_type##_, grad_op_class); \
}; \
static ::paddle::framework::OpRegistrar< \
_OpClass_##op_type##_, op_maker_class, _OpGradClass_##op_type##_> \
__op_registrar_##op_type##__(#op_type, #grad_op_type); \ __op_registrar_##op_type##__(#op_type, #grad_op_type); \
int TouchOpRegistrar_##op_type() { \ int TouchOpRegistrar_##op_type() { \
__op_registrar_##op_type##__.Touch(); \ __op_registrar_##op_type##__.Touch(); \
@ -304,7 +186,8 @@ class OpKernelRegistrar : public Registrar {
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
/** /**
* Macro to mark what Operator and Kernel we will use and tell the compiler to * Macro to mark what Operator and Kernel
* we will use and tell the compiler to
* link them into target. * link them into target.
*/ */
#define USE_OP_ITSELF(op_type) \ #define USE_OP_ITSELF(op_type) \
@ -324,7 +207,8 @@ class OpKernelRegistrar : public Registrar {
__attribute__((unused)) = \ __attribute__((unused)) = \
TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE()
// TODO(fengjiayi): The following macros seems ugly, do we have better method? // TODO(fengjiayi): The following macros
// seems ugly, do we have better method?
#ifdef PADDLE_ONLY_CPU #ifdef PADDLE_ONLY_CPU
#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU) #define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU)

@ -76,8 +76,7 @@ TEST(OpRegistry, CreateOp) {
attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_type(paddle::framework::AttrType::FLOAT);
attr->set_f(scale); attr->set_f(scale);
std::shared_ptr<paddle::framework::OperatorBase> op = auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::framework::Scope scope; paddle::framework::Scope scope;
paddle::platform::CPUDeviceContext dev_ctx; paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, dev_ctx); op->Run(scope, dev_ctx);
@ -118,8 +117,7 @@ TEST(OpRegistry, DefaultValue) {
ASSERT_TRUE(op_desc.IsInitialized()); ASSERT_TRUE(op_desc.IsInitialized());
std::shared_ptr<paddle::framework::OperatorBase> op = auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::framework::OpRegistry::CreateOp(op_desc);
paddle::framework::Scope scope; paddle::framework::Scope scope;
paddle::platform::CPUDeviceContext dev_ctx; paddle::platform::CPUDeviceContext dev_ctx;
op->Run(scope, dev_ctx); op->Run(scope, dev_ctx);

@ -164,5 +164,43 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
return ret_val; return ret_val;
} }
void OpProtoAndCheckerMaker::Validate() {
validated_ = true;
CheckNoDuplicatedInOutAttrs();
}
OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddInput(
const std::string& name, const std::string& comment) {
auto* input = proto_->add_inputs();
input->set_name(name);
input->set_comment(comment);
return OpProtoAndCheckerMaker::VariableBuilder{input};
}
OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput(
const std::string& name, const std::string& comment) {
auto* output = proto_->add_outputs();
output->set_name(name);
output->set_comment(comment);
return OpProtoAndCheckerMaker::VariableBuilder{output};
}
void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() {
std::unordered_set<std::string> names;
auto checker = [&](const std::string& name) {
PADDLE_ENFORCE(!names.count(name), "[%s] is duplicated", name);
names.insert(name);
};
for (auto& attr : proto_->attrs()) {
checker(attr.name());
}
for (auto& input : proto_->inputs()) {
checker(input.name());
}
for (auto& output : proto_->outputs()) {
checker(output.name());
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -67,10 +67,6 @@ class OperatorBase {
OperatorBase(const std::string& type, const VarNameMap& inputs, OperatorBase(const std::string& type, const VarNameMap& inputs,
const VarNameMap& outputs, const AttributeMap& attrs); const VarNameMap& outputs, const AttributeMap& attrs);
OperatorBase(const OperatorBase& o) = delete;
OperatorBase& operator=(const OperatorBase& o) = delete;
OperatorBase(OperatorBase&& o) = delete;
virtual ~OperatorBase() {} virtual ~OperatorBase() {}
template <typename T> template <typename T>
@ -116,10 +112,14 @@ class OperatorBase {
void SetType(const std::string& type) { type_ = type; } void SetType(const std::string& type) { type_ = type; }
const AttributeMap& Attrs() const { return attrs_; } const AttributeMap& Attrs() const { return attrs_; }
// Return a new operator instance, which is as same as this.
// Use unique_ptr to prevent caller forget to delete this pointer.
virtual std::unique_ptr<OperatorBase> Clone() const = 0;
protected: protected:
std::string type_; std::string type_;
// NOTE: in case of OpGrad, inputs_ contains: // NOTE: in case of OpGrad, inputs_ contains:
// I (Inputs) // I (Inputs)opear
// O (Outputs) // O (Outputs)
// OG (Output Gradients) // OG (Output Gradients)
VarNameMap inputs_; VarNameMap inputs_;
@ -130,12 +130,97 @@ class OperatorBase {
AttributeMap attrs_; AttributeMap attrs_;
}; };
// Macro for define a clone method.
// If you are writing an kernel operator, `Clone` will be defined when you
// register it. i.e. `Clone` method is not needed to define by yourself.
#define DEFINE_OP_CLONE_METHOD(CLS) \
std::unique_ptr<OperatorBase> Clone() const final { \
return std::unique_ptr<OperatorBase>(new CLS(*this)); \
}
// Macro for define a default constructor for Operator.
// You can also use
// using PARENT_CLASS::PARENT_CLASS;
// to use parent's constructor.
#define DEFINE_OP_CONSTRUCTOR(CLS, PARENT_CLS) \
CLS(const std::string& type, const VarNameMap& inputs, \
const VarNameMap& outputs, const paddle::framework::AttributeMap& attrs) \
: PARENT_CLS(type, inputs, outputs, attrs) {}
class NOP : public OperatorBase { class NOP : public OperatorBase {
public: public:
using OperatorBase::OperatorBase; using OperatorBase::OperatorBase;
void InferShape(const Scope& scope) const override {} void InferShape(const Scope& scope) const override {}
void Run(const Scope& scope, void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override {} const platform::DeviceContext& dev_ctx) const override {}
std::unique_ptr<OperatorBase> Clone() const override {
return std::unique_ptr<OperatorBase>(new NOP(*this));
}
};
// this class not only make proto but also init attribute checkers.
class OpProtoAndCheckerMaker {
public:
OpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: proto_(proto), op_checker_(op_checker) {}
~OpProtoAndCheckerMaker() {
PADDLE_ENFORCE(validated_, "should call Validate after build");
}
void Validate();
protected:
struct VariableBuilder {
OpProto::Var* var_;
VariableBuilder& AsDuplicable() {
var_->set_duplicable(true);
return *this;
}
VariableBuilder& AsIntermediate() {
var_->set_intermediate(true);
return *this;
}
VariableBuilder& NotInGradient() {
var_->set_not_in_gradient(true);
return *this;
}
};
VariableBuilder AddInput(const std::string& name, const std::string& comment);
VariableBuilder AddOutput(const std::string& name,
const std::string& comment);
template <typename T>
TypedAttrChecker<T>& AddAttr(const std::string& name,
const std::string& comment,
bool generated = false) {
auto* attr = proto_->add_attrs();
attr->set_name(name);
attr->set_comment(comment);
attr->set_generated(generated);
attr->set_type(AttrTypeID<T>());
return op_checker_->AddAttrChecker<T>(name);
}
void AddComment(const std::string& comment) { proto_->set_comment(comment); }
private:
void CheckNoDuplicatedInOutAttrs();
OpProto* proto_;
OpAttrChecker* op_checker_;
bool validated_{false};
};
class NOPMaker : public OpProtoAndCheckerMaker {
public:
NOPMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {}
}; };
class InferShapeContext { class InferShapeContext {

@ -245,3 +245,21 @@ TEST(OpKernel, multi_inputs) {
auto op = paddle::framework::OpRegistry::CreateOp(op_desc); auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(scope, cpu_device_context); op->Run(scope, cpu_device_context);
} }
class OperatorClone : public paddle::framework::OperatorBase {
public:
DEFINE_OP_CLONE_METHOD(OperatorClone);
OperatorClone(const std::string& type, const VarNameMap& inputs,
const VarNameMap& outputs,
const paddle::framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void InferShape(const paddle::framework::Scope& scope) const override {}
void Run(const paddle::framework::Scope& scope,
const paddle::platform::DeviceContext& dev_ctx) const override {}
};
TEST(Operator, Clone) {
OperatorClone a("ABC", {}, {}, {});
auto b = a.Clone();
ASSERT_EQ(a.Type(), b->Type());
}

@ -48,29 +48,6 @@ namespace framework {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
template <typename ClassType>
void ExposeOperator(ClassType &m) {
m.def("infer_shape", &ClassType::type::InferShape)
.def("run", &ClassType::type::Run)
.def("type",
[](const typename ClassType::type &op) -> std::string {
return op.Type();
})
.def("outputs",
[](const typename ClassType::type &op)
-> std::map<std::string, std::vector<std::string>> {
return op.Outputs();
})
.def("inputs",
[](const typename ClassType::type &op) { return op.Inputs(); })
.def("__str__", &ClassType::type::DebugString)
.def("no_intermediate_outputs",
[](const typename ClassType::type &op) {
return op.OutputVars(false);
})
.def("support_gpu", &ClassType::type::SupportGPU);
}
static size_t UniqueIntegerGenerator() { static size_t UniqueIntegerGenerator() {
static std::atomic<size_t> generator; static std::atomic<size_t> generator;
return generator.fetch_add(1); return generator.fetch_add(1);
@ -207,75 +184,69 @@ All parameter, weight, gradient are variables in Paddle.
.def(py::init<>()) .def(py::init<>())
.def("__str__", string::to_string<const platform::CPUPlace &>); .def("__str__", string::to_string<const platform::CPUPlace &>);
py::class_<OperatorBase, std::shared_ptr<OperatorBase>> operator_base( py::class_<OperatorBase>(m, "Operator")
m, "Operator"); .def_static("create",
[](py::bytes protobin) {
operator_base.def_static("create", [](py::bytes protobin) { OpDesc desc;
OpDesc desc; PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), "Cannot parse user input to OpDesc");
"Cannot parse user input to OpDesc"); PADDLE_ENFORCE(desc.IsInitialized(),
PADDLE_ENFORCE(desc.IsInitialized(), "User OpDesc is not initialized, reason %s",
"User OpDesc is not initialized, reason %s", desc.InitializationErrorString());
desc.InitializationErrorString()); return OpRegistry::CreateOp(desc);
return OpRegistry::CreateOp(desc); })
}); .def("backward",
[](const OperatorBase &forwardOp,
operator_base.def("backward", const std::unordered_set<std::string> &no_grad_vars) {
[](const OperatorBase &forwardOp, return Backward(forwardOp, no_grad_vars).release();
const std::unordered_set<std::string> &no_grad_vars) {
return Backward(forwardOp, no_grad_vars);
});
ExposeOperator(operator_base);
py::class_<operators::NetOp, std::shared_ptr<operators::NetOp>> net(m, "Net");
net.def_static("create",
[]() -> std::shared_ptr<operators::NetOp> {
auto retv = std::make_shared<operators::NetOp>();
retv->SetType("plain_net");
return retv;
})
.def("add_op", &operators::NetOp::AddOp)
.def("add_op",
[](operators::NetOp &self,
const std::shared_ptr<operators::NetOp> &net) -> void {
self.AddOp(std::static_pointer_cast<OperatorBase>(net));
})
.def("add_op",
[](operators::NetOp &self,
const std::shared_ptr<operators::RecurrentOp> &rnn) -> void {
self.AddOp(std::static_pointer_cast<OperatorBase>(rnn));
}) })
.def("infer_shape", &OperatorBase::InferShape)
.def("run", &OperatorBase::Run)
.def("type",
[](const OperatorBase &op) -> std::string { return op.Type(); })
.def("outputs",
[](const OperatorBase &op)
-> std::map<std::string, std::vector<std::string>> {
return op.Outputs();
})
.def("inputs", [](const OperatorBase &op) { return op.Inputs(); })
.def("__str__", &OperatorBase::DebugString)
.def("no_intermediate_outputs",
[](const OperatorBase &op) { return op.OutputVars(false); })
.def("support_gpu", &OperatorBase::SupportGPU);
py::class_<operators::NetOp, OperatorBase>(m, "Net")
.def_static("create",
[]() -> operators::NetOp * {
auto *retv = new operators::NetOp;
retv->SetType("plain_net");
return retv;
})
.def("add_op", [](operators::NetOp &self,
const OperatorBase &op) { self.AddOp(op); })
.def("complete_add_op", &operators::NetOp::CompleteAddOp) .def("complete_add_op", &operators::NetOp::CompleteAddOp)
.def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) { .def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) {
self->CompleteAddOp(); self->CompleteAddOp();
}); });
ExposeOperator(net);
// recurrent_op // recurrent_op
py::class_<operators::RecurrentOp, std::shared_ptr<operators::RecurrentOp>> py::class_<operators::RecurrentOp, OperatorBase>(m, "RecurrentOp")
rnn(m, "RecurrentOp"); .def_static(
"create",
rnn.def_static( [](py::bytes protobin) -> operators::RecurrentOp * {
"create", OpDesc desc;
[](py::bytes protobin) -> std::shared_ptr<operators::RecurrentOp> { PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
OpDesc desc; "Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), PADDLE_ENFORCE(desc.IsInitialized(),
"Cannot parse user input to OpDesc"); "User OpDesc is not initialized, reason %s",
PADDLE_ENFORCE(desc.IsInitialized(), desc.InitializationErrorString());
"User OpDesc is not initialized, reason %s", auto rnn_op = OpRegistry::CreateOp(desc);
desc.InitializationErrorString()); return static_cast<operators::RecurrentOp *>(rnn_op.release());
auto rnn_op = OpRegistry::CreateOp(desc); })
return std::dynamic_pointer_cast<operators::RecurrentOp>(rnn_op); .def("set_stepnet", [](operators::RecurrentOp &self,
}) const operators::NetOp &net) -> void {
.def("set_stepnet", self.set_stepnet(net.Clone());
[](operators::RecurrentOp &self, });
const std::shared_ptr<operators::NetOp> &net) -> void {
self.set_stepnet(net);
});
ExposeOperator(rnn);
m.def("unique_integer", UniqueIntegerGenerator); m.def("unique_integer", UniqueIntegerGenerator);

@ -57,11 +57,14 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap,
} }
void MKLDNNFcLayer::convertWeightsFromPaddle() { void MKLDNNFcLayer::convertWeightsFromPaddle() {
if (FLAGS_use_mkldnn_wgt) { if (hasInitedWgt_) {
return; return;
} }
if (hasInitedWgt_) { // TODO(TJ): dst format should get from wgtVal_
int dstFmt = PARAM_FORMAT_MKLDNN_OI;
int srcFmt = weight_->getParameterPtr()->getHeaderFormat();
if (srcFmt == dstFmt) {
return; return;
} }
@ -78,6 +81,7 @@ void MKLDNNFcLayer::convertWeightsFromPaddle() {
MatrixPtr paddleWgtT; MatrixPtr paddleWgtT;
paddleWgt->transpose(paddleWgtT, true); paddleWgt->transpose(paddleWgtT, true);
weight_->getW()->copyFrom(*paddleWgtT); weight_->getW()->copyFrom(*paddleWgtT);
weight_->getParameterPtr()->setHeaderFormat(dstFmt);
hasInitedWgt_ = true; hasInitedWgt_ = true;
} }

@ -330,9 +330,7 @@ void MKLDNNTester::run(const TestConfig& dnn,
log_ = log; log_ = log;
lvl_ = level; lvl_ = level;
// Firstly test FLAGS_use_mkldnn_wgt = false // Firstly test mkldnn init from PARAM_FORMAT_ORIGINAL weight
FLAGS_use_mkldnn_wgt = false;
// reset and run once
reset(dnn, ref, batchSize); reset(dnn, ref, batchSize);
randomWgtDatas(); randomWgtDatas();
clearWgtDiffs(); clearWgtDiffs();
@ -342,17 +340,32 @@ void MKLDNNTester::run(const TestConfig& dnn,
runOnce(); runOnce();
} }
// Then test FLAGS_use_mkldnn_wgt = true if (parameters_[DNN].empty()) {
FLAGS_use_mkldnn_wgt = true; // has no paramters
// after run once the mkldnn weight has been stored in dnnlayer return;
}
// After run some iterations, the mkldnn weight has been stored in dnnLayer
// and we can also get the mkldnn weight parameter header format.
// Weight parameter should always be index 0 (and bias index 1).
// TODO(TJ): should also consider mean and var format when batchnorm ready
int dnnWgtFmt = parameters_[DNN][0]->getHeaderFormat();
int refWgtFmt = parameters_[REF][0]->getHeaderFormat();
if (dnnWgtFmt == refWgtFmt) {
// weight format are equal, so no need check more
return;
}
// then save the weights and restart again // then save the weights and restart again
vector<VectorPtr> dnnWgts, refWgts; vector<VectorPtr> dnnWgts, refWgts;
CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size());
saveWgt(parameters_[DNN], dnnWgts); saveWgt(parameters_[DNN], dnnWgts);
saveWgt(parameters_[REF], refWgts); saveWgt(parameters_[REF], refWgts);
// restart again with flag true // restart again with dnn weight format
reset(dnn, ref, batchSize); reset(dnn, ref, batchSize);
// TODO(TJ): should also considerate mean and var format when batchnorm ready
parameters_[DNN][0]->setHeaderFormat(dnnWgtFmt);
// restore wgt // restore wgt
restoreWgt(dnnWgts, parameters_[DNN]); restoreWgt(dnnWgts, parameters_[DNN]);

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save