From def959a8909c6425ca96c1deec7b00e08ae0df81 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 4 Aug 2017 13:33:50 +0800 Subject: [PATCH 01/76] add mkldnn fc files --- paddle/gserver/CMakeLists.txt | 11 ++++++ paddle/gserver/layers/MkldnnFcLayer.cpp | 30 +++++++++++++++++ paddle/gserver/layers/MkldnnFcLayer.h | 42 +++++++++++++++++++++++ paddle/gserver/layers/MkldnnLayer.h | 45 +++++++++++++++++++++++++ 4 files changed, 128 insertions(+) create mode 100644 paddle/gserver/layers/MkldnnFcLayer.cpp create mode 100644 paddle/gserver/layers/MkldnnFcLayer.h create mode 100644 paddle/gserver/layers/MkldnnLayer.h diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 0012636b8f..1305d5438a 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -23,6 +23,17 @@ endmacro() filter_test(GSERVER_HEADER) filter_test(GSERVER_SOURCES) + +if(NOT WITH_MKLDNN) + file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "Mkldnn*.h") + file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "Mkldnn*.cpp") + list(REMOVE_ITEM GSERVER_HEADER ${DNN_HEADER}) + list(REMOVE_ITEM GSERVER_SOURCES ${DNN_SOURCES}) + message(STATUS "Skip compiling with Mkldnnlayers and MkldnnActivations") +else() + message(STATUS "Compile with Mkldnnlayers and MkldnnActivations") +endif() + if(NOT WITH_GPU) list(REMOVE_ITEM GSERVER_HEADER layers/CudnnConvBaseLayer.h diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp new file mode 100644 index 0000000000..f8220a2553 --- /dev/null +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -0,0 +1,30 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MkldnnFcLayer.h" + +namespace paddle { + +REGISTER_LAYER(mkldnn_fc, MkldnnFcLayer); + +bool MkldnnFcLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + return MkldnnLayer::init(layerMap, parameterMap); +} + +void MkldnnFcLayer::forward(PassType passType) {} + +void MkldnnFcLayer::backward(const UpdateCallback& callback) {} + +} // namespace paddle diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h new file mode 100644 index 0000000000..430567949d --- /dev/null +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MkldnnLayer.h" +#include "mkldnn.hpp" + +namespace paddle { + +/** + * @brief A subclass of MkldnnLayer fc layer. + * + * The config file api is mkldnn_fc + */ +class MkldnnFcLayer : public MkldnnLayer { +protected: +public: + explicit MkldnnFcLayer(const LayerConfig& config) : MkldnnLayer(config) {} + + ~MkldnnFcLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + + void backward(const UpdateCallback& callback) override; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h new file mode 100644 index 0000000000..e9bab68b07 --- /dev/null +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "Layer.h" +#include "mkldnn.hpp" + +namespace paddle { + +class MkldnnLayer; +typedef std::shared_ptr MkldnnLayerPtr; + +/** + * @brief Base class of Mkldnnlayer. + * + */ +class MkldnnLayer : public Layer { +public: + explicit MkldnnLayer(const LayerConfig& config) : Layer(config) {} + + ~MkldnnLayer() {} + + virtual bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + return Layer::init(layerMap, parameterMap); + // TODO(TJ): deivecId + } + + void resetOutput(size_t height, size_t width) { ; } +}; + +} // namespace paddle From 3c3a11a0dc780498a7c890be90b9df922b426d90 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 4 Aug 2017 13:50:41 +0800 Subject: [PATCH 02/76] add use_mkldnn flag --- paddle/gserver/layers/MkldnnLayer.h | 4 +++- paddle/trainer/TrainerConfigHelper.cpp | 2 ++ paddle/utils/Flags.cpp | 7 +++++++ paddle/utils/Flags.h | 1 + python/paddle/trainer/config_parser.py | 24 +++++++++++++++++++++--- 5 files changed, 34 insertions(+), 4 deletions(-) diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index e9bab68b07..7e6d88b273 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -35,8 +35,10 @@ public: virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { - return Layer::init(layerMap, parameterMap); + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." + << "Please set WITH_MKLDNN=ON"; // TODO(TJ): deivecId + return Layer::init(layerMap, parameterMap); } void resetOutput(size_t height, size_t width) { ; } diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/trainer/TrainerConfigHelper.cpp index 133e2be104..a0a365aa0b 100644 --- a/paddle/trainer/TrainerConfigHelper.cpp +++ b/paddle/trainer/TrainerConfigHelper.cpp @@ -28,6 +28,7 @@ DECLARE_bool(with_cost); DECLARE_bool(with_gpu); DECLARE_bool(parallel_nn); DECLARE_string(config_args); +DECLARE_bool(use_mkldnn); const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserFuncName = "parse_config_and_serialize"; @@ -44,6 +45,7 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) configArgs << "trainer_id=" << FLAGS_trainer_id << ",local=" << FLAGS_local << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",parallel_nn=" << FLAGS_parallel_nn + << ",use_mkldnn=" << FLAGS_use_mkldnn << ",cudnn_version=" << hl_get_cudnn_lib_version(); if (!FLAGS_config_args.empty()) { configArgs << "," << FLAGS_config_args; diff --git a/paddle/utils/Flags.cpp b/paddle/utils/Flags.cpp index 320f671ed9..ab1c181c62 100644 --- a/paddle/utils/Flags.cpp +++ b/paddle/utils/Flags.cpp @@ -20,6 +20,13 @@ DEFINE_bool(use_gpu, false, "Only support CPU training"); DEFINE_bool(use_gpu, true, "Whether to use GPU for training"); #endif +#ifdef PADDLE_USE_MKLDNN +// TODO(TJ): change to true when MKLDNN layers support multi-inputs +DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training"); +#else +DEFINE_bool(use_mkldnn, false, "Only support CPU training"); +#endif + DEFINE_bool(parallel_nn, false, "Whether to use multi-threads to calculate one neural network." diff --git a/paddle/utils/Flags.h b/paddle/utils/Flags.h index dc4faef833..1832bb515e 100644 --- a/paddle/utils/Flags.h +++ b/paddle/utils/Flags.h @@ -40,3 +40,4 @@ DECLARE_bool(show_layer_stat); DECLARE_string(predict_file); DECLARE_bool(prev_batch_state); DECLARE_string(init_model_path); +DECLARE_bool(use_mkldnn); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 9ea69fc5e5..ae39abc081 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1604,6 +1604,8 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): @config_layer('fc') class FCLayer(LayerBase): + layer_type = 'fc' + def __init__(self, name, size, @@ -1611,14 +1613,25 @@ class FCLayer(LayerBase): bias=True, error_clipping_threshold=None, **xargs): - super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) + use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) + if use_mkldnn: + self.layer_type = 'mkldnn_fc' + config_assert( + len(inputs) == 1, + "MkldnnFCLayer support one and only one input!") + super(FCLayer, self).__init__( + name, self.layer_type, size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) psize = self.config.size * input_layer.size - dims = [input_layer.size, self.config.size] format = self.inputs[input_index].format sparse = format == "csr" or format == "csc" - + if use_mkldnn: + dims = [self.config.size, input_layer.size] + config_assert(not sparse, + "MkldnnFCLayer do not support sparse format yet") + else: + dims = [input_layer.size, self.config.size] if sparse: psize = self.inputs[input_index].nnz else: @@ -1631,6 +1644,11 @@ class FCLayer(LayerBase): self.config.error_clipping_threshold = error_clipping_threshold +@config_layer('mkldnn_fc') +class MkldnnFcLayer(FCLayer): + layer_type = 'mkldnn_fc' + + @config_layer('selective_fc') class SelectiveFCLayer(LayerBase): def __init__(self, From 94b172a7e8a0abb93129ec6b85758779c8dc7596 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sun, 6 Aug 2017 18:08:17 +0800 Subject: [PATCH 03/76] fix mkldnn lib bug, and mkldnnbase --- CMakeLists.txt | 2 +- paddle/gserver/layers/MkldnnBase.h | 99 +++++++++++++++++++++++++++++ paddle/gserver/layers/MkldnnLayer.h | 1 + 3 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 paddle/gserver/layers/MkldnnBase.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b174831109..db9ff86baf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,7 +144,7 @@ if(WITH_GPU) endif(WITH_GPU) if(WITH_MKLDNN) - list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB}) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB}) endif() if(USE_NNPACK) diff --git a/paddle/gserver/layers/MkldnnBase.h b/paddle/gserver/layers/MkldnnBase.h new file mode 100644 index 0000000000..eba72e58e5 --- /dev/null +++ b/paddle/gserver/layers/MkldnnBase.h @@ -0,0 +1,99 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "mkldnn.hpp" + +namespace paddle { + +typedef enum { + DNN_BASE = 1, + DNN_TESTS = 1, + DNN_SIZES, + DNN_FMTS, + DNN_TESTS_DETAILS, + DNN_TESTS_MORE, + DNN_ALL, +} DNN_LOG_LEVEL; + +/** + * @brief MKLDNN CPU engine. + * + */ +class CpuEngine { +public: + static CpuEngine& Instance() { + // Thread-safe in C++11. + static CpuEngine myInstance; + return myInstance; + } + + // Disallow copy or move + CpuEngine(const CpuEngine&) = delete; // Copy constructor + CpuEngine(CpuEngine&&) = delete; // Move constructor + CpuEngine& operator=(const CpuEngine&) = delete; // Copy assignment + CpuEngine& operator=(CpuEngine&&) = delete; // Move assignment + + mkldnn::engine& getEngine() { return cpuEngine_; } + +protected: + CpuEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {} + // CpuEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {} + ~CpuEngine() {} + +private: + mkldnn::engine cpuEngine_; +}; + +/** + * @brief MKLDNN Stream. + * + */ +class MkldnnStream { +public: + MkldnnStream() : ready_(false) { resetState(); } + + virtual ~MkldnnStream() {} + + /** + * @brief Submit stream + * @param prims The primitives vector + * block Waiting for the stream to complete + */ + void submit(std::vector& prims, bool block = true) { + resetState(); + stream_->submit(prims).wait(block); + ready_ = false; + } + + /** + * @brief Reset the mkldnn stream + */ + void resetState() { + if (ready_) { + return; + } + // TODO(TJ): change me when mkldnn have method to reset this state + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); + // stream_.reset(new mkldnn::stream(mkldnn::stream::kind::lazy)); + ready_ = true; + } + +private: + bool ready_; + std::shared_ptr stream_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index 7e6d88b273..e69c9d6a1a 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include "Layer.h" +#include "MkldnnBase.h" #include "mkldnn.hpp" namespace paddle { From 90d5be74176bd7b69ce9494ebffae38f7323d639 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sun, 6 Aug 2017 22:14:26 +0800 Subject: [PATCH 04/76] add mkldnn fc forward --- paddle/gserver/layers/MkldnnFcLayer.cpp | 78 +++++++++++++++++++- paddle/gserver/layers/MkldnnFcLayer.h | 9 +++ paddle/gserver/layers/MkldnnLayer.cpp | 98 +++++++++++++++++++++++++ paddle/gserver/layers/MkldnnLayer.h | 63 +++++++++++++--- 4 files changed, 236 insertions(+), 12 deletions(-) create mode 100644 paddle/gserver/layers/MkldnnLayer.cpp diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index f8220a2553..5584b43ff1 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MkldnnFcLayer.h" +#include "paddle/utils/Stat.h" namespace paddle { @@ -20,11 +21,82 @@ REGISTER_LAYER(mkldnn_fc, MkldnnFcLayer); bool MkldnnFcLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { - return MkldnnLayer::init(layerMap, parameterMap); + if (!MkldnnLayer::init(layerMap, parameterMap)) { + return false; + } + + CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet!"; + CHECK_EQ(inputLayers_.size(), parameters_.size()); + CHECK(!parameters_[0]->isSparse()) << "Do not support sparse yet"; + + // output size, cat not be changed + oc_ = getSize(); + oh_ = 1; + ow_ = 1; + + // input size can not change in FC + iLayerSize_ = inputLayers_[0]->getSize(); + CHECK_EQ(parameters_[0]->getSize(), iLayerSize_ * oc_); + + // create weight + weight_ = + std::unique_ptr(new Weight(oc_, iLayerSize_, parameters_[0], 0)); + + // create biases + if (biasParameter_.get() != NULL) { + biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_)); + } + return true; +} + +void MkldnnFcLayer::reshape() { + const Argument& input = getInput(0); + int batchSize = input.getBatchSize(); + if (bs_ == batchSize) { + return; + } + bs_ = batchSize; + ih_ = input.getFrameHeight(); + iw_ = input.getFrameWidth(); + if (ih_ == 0) { + ih_ = 1; + } + if (iw_ == 0) { + iw_ = 1; + } + CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); + ic_ = iLayerSize_ / (ih_ * iw_); + CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; + CHECK_EQ(size_t(oc_), getSize()); + + // reset output + output_.setFrameHeight(oh_); + output_.setFrameWidth(ow_); + resetOutput(bs_, oc_); } -void MkldnnFcLayer::forward(PassType passType) {} +void MkldnnFcLayer::forward(PassType passType) { + Layer::forward(passType); + + reshape(); -void MkldnnFcLayer::backward(const UpdateCallback& callback) {} + { + REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); + real* input = getInputValue(0)->getData(); + real* output = getOutputValue()->getData(); + real* wgt = weight_->getW()->getData(); + bool hasBias = biases_ && biases_->getW(); + real* bias = hasBias ? biases_->getW()->getData() : NULL; + mkldnnForwardFC(bs_, ic_, ih_, iw_, input, oc_, output, wgt, bias); + } + /* activation */ { + REGISTER_TIMER_INFO("FwActTimer", getName().c_str()); + forwardActivation(); + } +} + +void MkldnnFcLayer::backward(const UpdateCallback& callback) { + ; // bool hasBias = biases_ && biases_->getWGrad(); +} } // namespace paddle diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h index 430567949d..6167702771 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -26,6 +26,13 @@ namespace paddle { */ class MkldnnFcLayer : public MkldnnLayer { protected: + // input layer size, can not be change after init + size_t iLayerSize_; // == ic * ih * iw + + // fc weight and bias + std::unique_ptr weight_; + std::unique_ptr biases_; + public: explicit MkldnnFcLayer(const LayerConfig& config) : MkldnnLayer(config) {} @@ -34,6 +41,8 @@ public: bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; + void reshape(); + void forward(PassType passType) override; void backward(const UpdateCallback& callback) override; diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp new file mode 100644 index 0000000000..d462e8694c --- /dev/null +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -0,0 +1,98 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MkldnnLayer.h" + +// using namespace mkldnn; // NOLINT +using mem = mkldnn::memory; // NOLINT +typedef mem::format format; +typedef mkldnn::inner_product_forward fc_fwd; +typedef mkldnn::inner_product_backward_weights fc_bwdWgt; +typedef mkldnn::inner_product_backward_data fc_bwdData; + +namespace paddle { + +bool MkldnnLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." + << "Please set WITH_MKLDNN=ON"; + // TODO(TJ): deivecId + return Layer::init(layerMap, parameterMap); +} + +void MkldnnLayer::resetForwardFC(int bs, + int ic, + int ih, + int iw, + real* botData, + int oc, + real* topData, + real* wgtData, + real* biasData) { + bool hasSpatial = ih == 1 && iw == 1 ? false : true; + engine_ = CpuEngine::Instance().getEngine(); + + mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) + : createMD({bs, ic}, format::nc); + mem::desc wgtMD = hasSpatial ? createMD({oc, ic, ih, iw}, format::oihw) + : createMD({oc, ic}, format::oi); + mem::desc biasMD = biasData != NULL ? createMD({oc}, format::x) + : createMD({}, format::format_undef); + mem::desc topMD = createMD({bs, oc}, format::nc); + + mkldnn::prop_kind pk = mkldnn::prop_kind::forward; + fc_fwd::desc fwdDesc = biasData != NULL + ? fc_fwd::desc(pk, botMD, wgtMD, biasMD, topMD) + : fc_fwd::desc(pk, botMD, wgtMD, topMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + + mem bot = mem(mem::primitive_desc(botMD, engine_), botData); + mem wgt = mem(mem::primitive_desc(wgtMD, engine_), wgtData); + mem top = mem(mem::primitive_desc(topMD, engine_), topData); + + if (biasData != NULL) { + mem bias = mem(mem::primitive_desc(biasMD, engine_), biasData); + fwd_.reset(new fc_fwd(fwdPD, bot, wgt, bias, top)); + } else { + fwd_.reset(new fc_fwd(fwdPD, bot, wgt, top)); + } + pipelineFwd_.clear(); + pipelineFwd_.push_back(*fwd_); +} + +void MkldnnLayer::mkldnnForwardFC(int bs, + int ic, + int ih, + int iw, + real* botData, + int oc, + real* topData, + real* wgtData, + real* biasData) { + // if input size changed, reset it + resetForwardFC(bs, ic, ih, iw, botData, oc, topData, wgtData, biasData); + + // just forward + // update botdata + stream_->submit(pipelineFwd_); +} + +mem::desc MkldnnLayer::createMD(mem::dims dims, + mem::format fmt, + mem::data_type type) { + // TODO(TJ): isFmtSuppoted(fmt) + return mem::desc(dims, type, fmt); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index e69c9d6a1a..6e41ee4028 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -29,20 +29,65 @@ typedef std::shared_ptr MkldnnLayerPtr; * */ class MkldnnLayer : public Layer { +protected: + // batch size + int bs_; + // input image channel, height and width + int ic_, ih_, iw_; + // output image channel, height and width + int oc_, oh_, ow_; + + // mkldnn engine, stream and primivtives + mkldnn::engine engine_; + std::shared_ptr stream_; + + std::shared_ptr fwd_; + std::vector pipelineFwd_; + std::vector pipelineBwd_; + public: - explicit MkldnnLayer(const LayerConfig& config) : Layer(config) {} + explicit MkldnnLayer(const LayerConfig& config) + : Layer(config), + bs_(0), + ic_(0), + ih_(0), + iw_(0), + oc_(0), + oh_(0), + ow_(0), + engine_(mkldnn::engine::cpu, 0), + stream_(nullptr) {} ~MkldnnLayer() {} - virtual bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." - << "Please set WITH_MKLDNN=ON"; - // TODO(TJ): deivecId - return Layer::init(layerMap, parameterMap); - } + virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void resetForwardFC(int bs, + int ic, + int ih, + int iw, + real* botData, + int oc, + real* topData, + real* wgtData, + real* biasData); + + void mkldnnForwardFC(int bs, + int ic, + int ih, + int iw, + real* botData, + int oc, + real* topData, + real* wgtData, + real* biasData); - void resetOutput(size_t height, size_t width) { ; } + // TODO(TJ): move to MkldnnMatrix + // create memory desc + inline mkldnn::memory::desc createMD( + mkldnn::memory::dims dims, + mkldnn::memory::format fmt, + mkldnn::memory::data_type type = mkldnn::memory::data_type::f32); }; } // namespace paddle From 1203ebc498b7c11e69d6aa4613a8a823ecfa01e1 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sun, 6 Aug 2017 23:40:38 +0800 Subject: [PATCH 05/76] add mkldnn fc backward --- paddle/gserver/layers/MkldnnFcLayer.cpp | 37 ++++++++++- paddle/gserver/layers/MkldnnLayer.cpp | 88 +++++++++++++++++++++++++ paddle/gserver/layers/MkldnnLayer.h | 31 ++++++++- 3 files changed, 153 insertions(+), 3 deletions(-) diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index 5584b43ff1..b62422da83 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -77,7 +77,6 @@ void MkldnnFcLayer::reshape() { void MkldnnFcLayer::forward(PassType passType) { Layer::forward(passType); - reshape(); { @@ -97,6 +96,40 @@ void MkldnnFcLayer::forward(PassType passType) { } void MkldnnFcLayer::backward(const UpdateCallback& callback) { - ; // bool hasBias = biases_ && biases_->getWGrad(); + /* Do derivation */ { + REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); + backwardActivation(); + } + + bool hasBias = biases_ && biases_->getWGrad(); + { + REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); + real* inVal = getInputValue(0)->getData(); + real* inGrad = + getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL; + real* outGrad = getOutputGrad()->getData(); + real* wgtGrad = weight_->getWGrad()->getData(); + real* wgtVal = weight_->getW()->getData(); + real* biasGrad = hasBias ? biases_->getWGrad()->getData() : NULL; + mkldnnBackwardFC(bs_, + ic_, + ih_, + iw_, + inGrad, + inVal, + oc_, + outGrad, + wgtGrad, + wgtVal, + biasGrad); + } + + { + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + weight_->getParameterPtr()->incUpdate(callback); + if (hasBias) { + biases_->getParameterPtr()->incUpdate(callback); + } + } } } // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp index d462e8694c..64bed5c821 100644 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -88,6 +88,94 @@ void MkldnnLayer::mkldnnForwardFC(int bs, stream_->submit(pipelineFwd_); } +void MkldnnLayer::resetBackwardFC(int bs, + int ic, + int ih, + int iw, + real* botDiff, + real* botData, + int oc, + real* topDiff, + real* wgtDiff, + real* wgtData, + real* biasDiff) { + bool hasSpatial = ih == 1 && iw == 1 ? false : true; + engine_ = CpuEngine::Instance().getEngine(); + + // backward weight + mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) + : createMD({bs, ic}, format::nc); + mem::desc wgtMD = hasSpatial ? createMD({oc, ic, ih, iw}, format::oihw) + : createMD({oc, ic}, format::oi); + mem::desc topMD = createMD({bs, oc}, format::nc); + mem::desc biasMD = biasDiff != NULL ? createMD({oc}, format::x) + : createMD({}, format::format_undef); + + fc_fwd::desc fwdDesc = + fc_fwd::desc(mkldnn::prop_kind::forward, botMD, wgtMD, topMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + fc_bwdWgt::desc bwdWgtDesc = + biasDiff != NULL ? fc_bwdWgt::desc(botMD, wgtMD, biasMD, topMD) + : fc_bwdWgt::desc(botMD, wgtMD, topMD); + fc_bwdWgt::primitive_desc bwdWgtPD = + fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); + + mem botVal = mem(mem::primitive_desc(botMD, engine_), botData); + mem wgtGrad = mem(mem::primitive_desc(wgtMD, engine_), wgtDiff); + mem topGrad = mem(mem::primitive_desc(topMD, engine_), topDiff); + + if (biasDiff != NULL) { + mem biasGrad = mem(mem::primitive_desc(biasMD, engine_), biasDiff); + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, botVal, topGrad, wgtGrad, biasGrad)); + } else { + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, botVal, topGrad, wgtGrad)); + } + pipelineBwd_.clear(); + pipelineBwd_.push_back(*bwdWgt_); + + // backward data + if (botDiff == NULL) { + return; + } + + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(botMD, wgtMD, topMD); + fc_bwdData::primitive_desc bwdDataPD = + fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); + mem botGrad = mem(mem::primitive_desc(botMD, engine_), botDiff); + mem wgtVal = mem(mem::primitive_desc(wgtMD, engine_), wgtData); + bwdData_.reset(new fc_bwdData(bwdDataPD, topGrad, wgtVal, botGrad)); + pipelineBwd_.push_back(*bwdData_); +} + +void MkldnnLayer::mkldnnBackwardFC(int bs, + int ic, + int ih, + int iw, + real* botDiff, + real* botData, + int oc, + real* topDiff, + real* wgtDiff, + real* wgtData, + real* biasDiff) { + // if input size changed, reset it + resetBackwardFC(bs, + ic, + ih, + iw, + botDiff, + botData, + oc, + topDiff, + wgtDiff, + wgtData, + biasDiff); + + // just forward + // update botdata + stream_->submit(pipelineBwd_); +} + mem::desc MkldnnLayer::createMD(mem::dims dims, mem::format fmt, mem::data_type type) { diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index 6e41ee4028..5927bd6d52 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -42,6 +42,8 @@ protected: std::shared_ptr stream_; std::shared_ptr fwd_; + std::shared_ptr bwdWgt_; + std::shared_ptr bwdData_; std::vector pipelineFwd_; std::vector pipelineBwd_; @@ -56,7 +58,10 @@ public: oh_(0), ow_(0), engine_(mkldnn::engine::cpu, 0), - stream_(nullptr) {} + stream_(nullptr), + fwd_(nullptr), + bwdWgt_(nullptr), + bwdData_(nullptr) {} ~MkldnnLayer() {} @@ -82,6 +87,30 @@ public: real* wgtData, real* biasData); + void resetBackwardFC(int bs, + int ic, + int ih, + int iw, + real* botDiff, + real* botData, + int oc, + real* topDiff, + real* wgtDiff, + real* wgtData, + real* biasDiff); + + void mkldnnBackwardFC(int bs, + int ic, + int ih, + int iw, + real* botDiff, + real* botData, + int oc, + real* topDiff, + real* wgtDiff, + real* wgtData, + real* biasDiff); + // TODO(TJ): move to MkldnnMatrix // create memory desc inline mkldnn::memory::desc createMD( From ec9009f320204531082f81f6cb035292ff3f0f14 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 7 Aug 2017 14:53:02 +0800 Subject: [PATCH 06/76] add mkldnn tester --- paddle/gserver/layers/MkldnnFcLayer.cpp | 18 ++ paddle/gserver/layers/MkldnnFcLayer.h | 2 + paddle/gserver/layers/MkldnnLayer.cpp | 3 +- paddle/gserver/tests/CMakeLists.txt | 9 + paddle/gserver/tests/MkldnnTester.cpp | 381 ++++++++++++++++++++++++ paddle/gserver/tests/MkldnnTester.h | 119 ++++++++ paddle/gserver/tests/test_Mkldnn.cpp | 76 +++++ 7 files changed, 607 insertions(+), 1 deletion(-) create mode 100644 paddle/gserver/tests/MkldnnTester.cpp create mode 100644 paddle/gserver/tests/MkldnnTester.h create mode 100644 paddle/gserver/tests/test_Mkldnn.cpp diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index b62422da83..c3b1f83d7d 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MkldnnFcLayer.h" +#include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" namespace paddle { @@ -41,6 +42,7 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, // create weight weight_ = std::unique_ptr(new Weight(oc_, iLayerSize_, parameters_[0], 0)); + initWgt(); // create biases if (biasParameter_.get() != NULL) { @@ -49,6 +51,22 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, return true; } +void MkldnnFcLayer::initWgt() { + // The weight_ is transposed from initial paddle weight + MatrixPtr paddleWgt = Matrix::create( + weight_->getW()->getData(), iLayerSize_, oc_, false, false); + + std::ostringstream ostr; + paddleWgt->print(ostr); + VLOG(DNN_BASE) << ostr.str(); + + // Firstly in mkldnn, the matrix is transposed from initial paddle weight + MatrixPtr paddleWgtT; + paddleWgt->transpose(paddleWgtT, true); + + weight_->getW()->copyFrom(*paddleWgtT); +} + void MkldnnFcLayer::reshape() { const Argument& input = getInput(0); int batchSize = input.getBatchSize(); diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h index 6167702771..4cc445e87b 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -41,6 +41,8 @@ public: bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; + void initWgt(); + void reshape(); void forward(PassType passType) override; diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp index 64bed5c821..cead3d87ea 100644 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -26,7 +26,8 @@ namespace paddle { bool MkldnnLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." - << "Please set WITH_MKLDNN=ON"; + << "Please set WITH_MKLDNN=ON " + << "and set use_mkldnn=True"; // TODO(TJ): deivecId return Layer::init(layerMap, parameterMap); } diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index a43adc7ce7..486456c8b7 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -18,6 +18,15 @@ add_unittest_without_exec(test_LayerGrad add_test(NAME test_LayerGrad COMMAND test_LayerGrad) +########## test_Mkldnn layers and activations ########## +if(WITH_MKLDNN) + add_unittest_without_exec(test_Mkldnn + test_Mkldnn.cpp + MkldnnTester.cpp + LayerGradUtil.cpp) + add_test(NAME test_Mkldnn COMMAND test_Mkldnn) +endif() + ################ test_CRFLayerGrad #################### add_unittest_without_exec(test_CRFLayerGrad test_CRFLayerGrad.cpp diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MkldnnTester.cpp new file mode 100644 index 0000000000..38e5bc75be --- /dev/null +++ b/paddle/gserver/tests/MkldnnTester.cpp @@ -0,0 +1,381 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MkldnnTester.h" +#include "paddle/gserver/layers/MkldnnBase.h" + +namespace paddle { + +// init data layer and test layer of both dnn and reference +void MkldnnTester::reset(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize) { + const bool trans = false; + const bool useGpu = false; + + // clear + configs_.clear(); + layerNames_.clear(); + dataLayers_.clear(); + datas_.clear(); + layerMaps_.clear(); + parameters_.clear(); + testLayers_.clear(); + + // resize + configs_.resize(NUM); + layerNames_.resize(NUM); + dataLayers_.resize(NUM); + datas_.resize(NUM); + layerMaps_.resize(NUM); + parameters_.resize(NUM); + testLayers_.resize(NUM); + + // reset configs and layer names + configs_[DNN] = dnn; + configs_[REF] = ref; + layerNames_[DNN] = "mkldnn"; // the first is mkldnn layer + layerNames_[REF] = "reference"; // second is reference layer + + // reset others + for (size_t i = 0; i < NUM; ++i) { + configs_[i].layerConfig.set_name(layerNames_[i]); + initDataLayer(configs_[i], + &(dataLayers_[i]), + &(datas_[i]), + &(layerMaps_[i]), + layerNames_[i], + batchSize, + trans, + useGpu); + initTestLayer( + configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i])); + } + dnnLayer_ = testLayers_[DNN]; + refLayer_ = testLayers_[REF]; + EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size()); + EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); + + setInputImgSize(); +} + +void MkldnnTester::setInputImgSize() { + for (size_t n = 0; n < dataLayers_.size(); ++n) { + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + // TODO(TJ): fix me when concat and elewise ready + dataLayers_[n][i]->getOutput().setFrameHeight(ih_); + dataLayers_[n][i]->getOutput().setFrameWidth(iw_); + } + } +} + +// init randome parameters of ref, and copy to mkldnn +void MkldnnTester::randomWgtDatas() { + EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); + for (size_t i = 0; i < parameters_[REF].size(); ++i) { + const VectorPtr& dnnValue = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); + const VectorPtr& refValue = parameters_[REF][i]->getBuf(PARAMETER_VALUE); + parameters_[REF][i]->randomize(); + dnnValue->copyFrom(*refValue); + + VLOG(lvl_) << "Random weight data " << parameters_[DNN][i]->getName(); + printVector(dnnValue); + } +} + +// random botdata of ref layer and copy same to mkldnn +void MkldnnTester::randomBotDatas() { + CHECK_EQ(dataLayers_.size(), NUM); + for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { + dataLayers_[REF][i]->getOutputValue()->randomizeUniform(); + dataLayers_[DNN][i]->getOutputValue()->copyFrom( + *(dataLayers_[REF][i]->getOutputValue())); + VLOG(lvl_) << "Input " << i << " data:"; + printMatrix(dataLayers_[REF][i]->getOutputValue()); + } +} + +void MkldnnTester::randomTopDiffs() { + refLayer_->getOutputGrad()->randomizeUniform(); + dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad())); + VLOG(lvl_) << "Random dom Backward Input, TopDiff: "; + printMatrix(refLayer_->getOutputGrad()); +} + +void MkldnnTester::checkForward() { + printTopDatas(); + double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), + testLayers_[REF]->getOutputValue()); + VLOG(DNN_TESTS_DETAILS) << "Check Forward"; + EXPECT_LE(fabs(delta), eps_); +} + +void MkldnnTester::checkBackwardData() { + const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; + for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { + const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); + const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad(); + VLOG(lvl_) << "Mkldnn Backward Output BotDiff " << i; + printMatrix(dnnDiff); + VLOG(lvl_) << "Reference Backward Output BotDiff " << i; + printMatrix(refDiff); + + double delta = compareMatrix(dnnDiff, refDiff); + EXPECT_LE(fabs(delta), eps_); + if (isBN) { + // the other two inputs in batch norm are for moving mean and var + break; + } + } +} + +void MkldnnTester::checkBackwardWgts() { + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + vector dnnWgts; // used to temply save mkldnn weights + saveWgt(parameters_[DNN], dnnWgts); + + // TODO(TJ): cvtWgtToPaddle + for (size_t i = 0; i < parameters_[DNN].size(); ++i) { + const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); + const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); + VLOG(lvl_) << "Mkldnn Output weight " << parameters_[DNN][i]->getName(); + printVector(dnn); + VLOG(lvl_) << "Reference Output weight " << parameters_[REF][i]->getName(); + printVector(ref); + + double delta = compareVector(dnn, ref); + EXPECT_LE(fabs(delta), eps_); + } + + VLOG(DNN_TESTS_DETAILS) << "Restore dnn weights before comapre"; + restoreWgt(dnnWgts, parameters_[DNN]); +} + +void MkldnnTester::saveWgt(const vector& from, + vector& to) { + const bool useGpu = false; + to.resize(from.size()); + for (size_t i = 0; i < to.size(); ++i) { + const VectorPtr& wgt = from[i]->getBuf(PARAMETER_VALUE); + to[i] = Vector::create(wgt->getSize(), useGpu); + to[i]->copyFrom(*wgt); + } +} + +void MkldnnTester::restoreWgt(const vector& from, + vector& to) { + CHECK_EQ(from.size(), to.size()); + for (size_t i = 0; i < from.size(); ++i) { + const VectorPtr& wgt = to[i]->getBuf(PARAMETER_VALUE); + wgt->copyFrom(*from[i]); + } +} + +// clear parameters grad +void MkldnnTester::clearWgtDiffs() { + for (size_t n = 0; n < parameters_.size(); ++n) { + for (size_t i = 0; i < parameters_[n].size(); ++i) { + const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT); + if (grad) { + grad->zeroMem(); + } + } + } +} + +void MkldnnTester::clearBotDiffs() { + // dnn and ref + for (size_t n = 0; n < dataLayers_.size(); ++n) { + // all inputs layers + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + dataLayers_[n][i]->getOutputGrad()->zeroMem(); + } + } +} + +void MkldnnTester::clearBotDiffs(int n) { + CHECK_LT(n, NUM); + // all inputs layers + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + dataLayers_[n][i]->getOutputGrad()->zeroMem(); + } +} + +void MkldnnTester::clearTopDatas() { + for (size_t i = 0; i < testLayers_.size(); ++i) { + testLayers_[i]->getOutputValue()->zeroMem(); + } +} + +void MkldnnTester::printTopDatas() { + if (!log_) { + return; + } + + for (int n = 0; n < NUM; ++n) { + VLOG(lvl_) << testLayers_[n]->getType() << " forward output TopData: "; + printMatrix(testLayers_[n]->getOutputValue()); + } +} + +void MkldnnTester::printMatrix(const MatrixPtr& m) { + if (!log_) { + return; + } +#ifdef _DEBUG + std::ostream str; + m->print(str); + VLOG(lvl_) << str; +#endif +} + +void MkldnnTester::printVector(const VectorPtr& v) { + if (!log_) { + return; + } + + CHECK(v); + CHECK(v->getData()); + const real* pd = v->getData(); + const size_t sz = v->getSize(); + std::stringstream row; + for (size_t i = 0; i < sz; ++i) { + row << pd[i] << ", "; + } + VLOG(lvl_) << row.str(); +} + +double MkldnnTester::getDelta(const real* d1, + const real* d2, + size_t len, + const float failRate, + const float thres) { + double delta = 0, sum = 0; + int failCnt = 0; + const double eps = 1e-5; + double maxOut = 0; + for (size_t i = 0; i < len; ++i) { + double ref = fabs(d2[i]); + double diff = fabs(d1[i] - d2[i]); + delta += diff; + sum += ref; + if (ref > eps && fabs(d1[i]) > eps && diff / ref > thres) { + maxOut = std::max(maxOut, diff / ref); + failCnt++; + } + } + EXPECT_TRUE(std::isnormal(sum)); + EXPECT_FALSE(std::isinf(sum)); + EXPECT_FALSE(std::isnan(delta)); + VLOG(DNN_TESTS_MORE) << "reference avg data: " << sum / len + << ", delta: " << delta / sum << ", failCnt:" << failCnt; + return (failCnt / (float)len) > failRate ? maxOut : delta / sum; +} + +double MkldnnTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) { + CHECK_EQ(m1->getElementCnt(), m2->getElementCnt()); + return getDelta(m1->getData(), m2->getData(), m1->getElementCnt()); +} + +double MkldnnTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) { + CHECK_EQ(v1->getSize(), v2->getSize()); + return getDelta(v1->getData(), v2->getData(), v1->getSize()); +} + +void MkldnnTester::runOnce() { + // test forward + randomBotDatas(); + dnnLayer_->forward(PASS_TRAIN); + refLayer_->forward(PASS_TRAIN); + checkForward(); + + // test backward + randomTopDiffs(); + dnnLayer_->backward(nullptr); + refLayer_->backward(nullptr); + checkBackwardData(); + checkBackwardWgts(); + + // clear buffers + // ref code will addto the diff, dnn code will writeto it + clearBotDiffs(REF); + // below two should be coverd by test layers + // clearTopDatas(); + // clearWgtDiffs(); +} + +void MkldnnTester::run(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize, + size_t inputImgH, + size_t inputImgW, + size_t iter, + float epsilon, + bool log, + int level) { + VLOG(DNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type() + << " vs " << ref.layerConfig.type(); + ih_ = inputImgH; + iw_ = inputImgW; + iter_ = iter; + eps_ = epsilon; + log_ = log; + lvl_ = level; + + // Firstly always set flag false to initial from paddle weight + TestConfig first = dnn; + // first.layerConfig.set_init_wgt_from_mkldnn(false); + + // reset and run once + reset(first, ref, batchSize); + randomWgtDatas(); + clearWgtDiffs(); + clearBotDiffs(); + + VLOG(DNN_TESTS) << "Check Iteration 0"; + runOnce(); + + // firstly get the flag + bool initWgtFromMkldnn = false; + // dnn.layerConfig.has_init_wgt_from_mkldnn() && + // dnn.layerConfig.init_wgt_from_mkldnn(); + + if (initWgtFromMkldnn) { + // after run once the mkldnn weight has been stored in dnnlayer + // then save the weigths and restart again + vector dnnWgts, refWgts; + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + saveWgt(parameters_[DNN], dnnWgts); + saveWgt(parameters_[REF], refWgts); + + // restart again with flag true + reset(dnn, ref, batchSize); + + // restore wgt + restoreWgt(dnnWgts, parameters_[DNN]); + restoreWgt(refWgts, parameters_[REF]); + clearWgtDiffs(); + clearBotDiffs(); + + // at least run once + runOnce(); + } + + for (size_t i = 1; i < iter_; ++i) { + VLOG(DNN_TESTS) << "Check Iteration " << i; + runOnce(); + } +} + +} // namespace paddle diff --git a/paddle/gserver/tests/MkldnnTester.h b/paddle/gserver/tests/MkldnnTester.h new file mode 100644 index 0000000000..16b0970a8e --- /dev/null +++ b/paddle/gserver/tests/MkldnnTester.h @@ -0,0 +1,119 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "LayerGradUtil.h" +#include "paddle/gserver/layers/MkldnnBase.h" + +namespace paddle { + +/** + * @brief test the functionality of Mkldnnlayers + * refer to paddle original function + */ +class MkldnnTester { + enum { + DNN = 0, + REF = 1, + NUM = 2, + }; + +protected: + std::vector configs_; + vector layerNames_; + vector> dataLayers_; + vector> datas_; + vector layerMaps_; + vector> parameters_; + vector testLayers_; + LayerPtr dnnLayer_, refLayer_; + + /// run some iterations, all the result should pass + size_t iter_; + /// whether to print out the details + bool log_; + /// vlog level to print the matrix details datas + int lvl_; + /// epsilon + float eps_; + /// input image size, default 1 + size_t ih_, iw_; + +public: + explicit MkldnnTester(size_t iter = 3, float epsilon = 1e-4) { + iter_ = iter; + eps_ = epsilon; + log_ = false; + lvl_ = DNN_TESTS_MORE; + } + + ~MkldnnTester() {} + +public: + void run(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize, + size_t inputImgH = 1, + size_t inputImgW = 1, + size_t iter = 3, + float epsilon = 1e-4, + bool log = false, + int level = DNN_TESTS_MORE); + void setLogLevel(int lvl) { lvl_ = lvl; } + +private: + void reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize); + void setInputImgSize(); + void runOnce(); + + void randomWgtDatas(); + void randomBotDatas(); + void randomTopDiffs(); + + void checkForward(); + void checkBackwardData(); + void checkBackwardWgts(); + + void clearWgtDiffs(); + void clearBotDiffs(); + void clearBotDiffs(int n); // clear specific layer + void clearTopDatas(); + + void printTopDatas(); + void printMatrix(const MatrixPtr& m); + void printVector(const VectorPtr& v); + + void saveWgt(const vector& from, vector& to); + void restoreWgt(const vector& from, vector& to); + + double compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2); + double compareVector(const VectorPtr& v1, const VectorPtr& v2); + + /** + * Get delta percent + * if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the + * max(diff/ref) + * else return sum(abs(a-b)) / sum(abs(b)) should smaller than eps + */ + double getDelta(const real* d1, + const real* d2, + size_t len, + const float failRate = 1e-3, + const float thres = 0.1); +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/test_Mkldnn.cpp b/paddle/gserver/tests/test_Mkldnn.cpp new file mode 100644 index 0000000000..c2c6b701ec --- /dev/null +++ b/paddle/gserver/tests/test_Mkldnn.cpp @@ -0,0 +1,76 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "MkldnnTester.h" +#include "ModelConfig.pb.h" + +using namespace paddle; // NOLINT + +DECLARE_bool(thread_local_rand_use_global_seed); +DECLARE_bool(use_gpu); +DECLARE_bool(use_mkldnn); + +struct testFCDesc { + int bs; + int ic; + int oc; + int ih, iw; // oh == ow == 1 +}; + +void testFcLayer(const testFCDesc& pm) { + const std::string compareTypes[] = {"mkldnn_fc", "fc"}; + TestConfig cfg; + cfg.layerConfig.set_type(compareTypes[0]); + cfg.layerConfig.set_size(pm.oc); + cfg.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), + /* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)}); + cfg.layerConfig.add_inputs(); + + MkldnnTester tester; + for (auto biasSize : {pm.oc, 0}) { + cfg.biasSize = biasSize; + TestConfig ref = cfg; + ref.layerConfig.set_type(compareTypes[1]); + for (auto bs : {pm.bs, 1}) { + tester.run(cfg, ref, bs, pm.ih, pm.iw); + } + } +} + +TEST(MkldnnLayer, fcLayer) { + testFcLayer({2, 2, 3, 1, 1}); /* + testFcLayer({16, 32, 64, 1, 1}); + testFcLayer({8, 16, 32, 13, 13}); + testFcLayer({4, 12, 18, 13, 11}); + testFcLayer({2, 64, 32, 16, 16}); + testFcLayer({15, 3, 6, 16, 16});*/ +} + +// TODO(TJ): add branch test + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + FLAGS_use_gpu = false; + FLAGS_use_mkldnn = true; + initMain(argc, argv); + FLAGS_thread_local_rand_use_global_seed = true; + srand(1); + return RUN_ALL_TESTS(); +} From 0c951176bd16ade7b347f1f251e8374dca01a6da Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 7 Aug 2017 21:13:41 +0800 Subject: [PATCH 07/76] pass mkldnn gtest --- paddle/gserver/layers/MkldnnFcLayer.cpp | 24 ++++++++-- paddle/gserver/layers/MkldnnFcLayer.h | 11 +++-- paddle/gserver/layers/MkldnnLayer.cpp | 62 ++++++++++++++++--------- paddle/gserver/layers/MkldnnLayer.h | 27 ++++++++++- paddle/gserver/tests/MkldnnTester.cpp | 30 +++++------- paddle/gserver/tests/test_Mkldnn.cpp | 12 ++--- 6 files changed, 112 insertions(+), 54 deletions(-) diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index c3b1f83d7d..29b2cc184d 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -42,7 +42,6 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, // create weight weight_ = std::unique_ptr(new Weight(oc_, iLayerSize_, parameters_[0], 0)); - initWgt(); // create biases if (biasParameter_.get() != NULL) { @@ -51,20 +50,36 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, return true; } -void MkldnnFcLayer::initWgt() { +void MkldnnFcLayer::cvtWgtFromPaddle() { + if (hasInitedWgt_) { + return; + } + // The weight_ is transposed from initial paddle weight MatrixPtr paddleWgt = Matrix::create( weight_->getW()->getData(), iLayerSize_, oc_, false, false); std::ostringstream ostr; paddleWgt->print(ostr); - VLOG(DNN_BASE) << ostr.str(); + VLOG(DNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str(); - // Firstly in mkldnn, the matrix is transposed from initial paddle weight + // The mkldnn weight is transposed from initial paddle matrix MatrixPtr paddleWgtT; paddleWgt->transpose(paddleWgtT, true); weight_->getW()->copyFrom(*paddleWgtT); + hasInitedWgt_ = true; +} + +void MkldnnFcLayer::cvtWgtToPaddle() { + MatrixPtr dnnWgt = weight_->getW(); + MatrixPtr paddleWgt; + dnnWgt->transpose(paddleWgt, true); + + // copy paddle weight and override on weight_ + MatrixPtr dnnWgtT = Matrix::create( + dnnWgt->getData(), dnnWgt->getWidth(), dnnWgt->getHeight(), false, false); + dnnWgtT->copyFrom(*paddleWgt); } void MkldnnFcLayer::reshape() { @@ -86,6 +101,7 @@ void MkldnnFcLayer::reshape() { ic_ = iLayerSize_ / (ih_ * iw_); CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; CHECK_EQ(size_t(oc_), getSize()); + printSizeInfo(); // reset output output_.setFrameHeight(oh_); diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h index 4cc445e87b..0064fc4727 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -29,25 +29,30 @@ protected: // input layer size, can not be change after init size_t iLayerSize_; // == ic * ih * iw + bool hasInitedWgt_; + // fc weight and bias std::unique_ptr weight_; std::unique_ptr biases_; public: - explicit MkldnnFcLayer(const LayerConfig& config) : MkldnnLayer(config) {} + explicit MkldnnFcLayer(const LayerConfig& config) + : MkldnnLayer(config), hasInitedWgt_(false) {} ~MkldnnFcLayer() {} bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; - void initWgt(); + void cvtWgtFromPaddle() override; - void reshape(); + void cvtWgtToPaddle() override; void forward(PassType passType) override; void backward(const UpdateCallback& callback) override; + + void reshape(); }; } // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp index cead3d87ea..0e1e1c3061 100644 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -25,11 +25,18 @@ namespace paddle { bool MkldnnLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + if (!Layer::init(layerMap, parameterMap)) { + return false; + } + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." << "Please set WITH_MKLDNN=ON " << "and set use_mkldnn=True"; + stream_.reset(new MkldnnStream()); + engine_ = CpuEngine::Instance().getEngine(); + // TODO(TJ): deivecId - return Layer::init(layerMap, parameterMap); + return true; } void MkldnnLayer::resetForwardFC(int bs, @@ -42,7 +49,6 @@ void MkldnnLayer::resetForwardFC(int bs, real* wgtData, real* biasData) { bool hasSpatial = ih == 1 && iw == 1 ? false : true; - engine_ = CpuEngine::Instance().getEngine(); mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) : createMD({bs, ic}, format::nc); @@ -52,21 +58,21 @@ void MkldnnLayer::resetForwardFC(int bs, : createMD({}, format::format_undef); mem::desc topMD = createMD({bs, oc}, format::nc); + inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); + wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); + outVal_.reset(new mem(mem::primitive_desc(topMD, engine_), topData)); + mkldnn::prop_kind pk = mkldnn::prop_kind::forward; fc_fwd::desc fwdDesc = biasData != NULL ? fc_fwd::desc(pk, botMD, wgtMD, biasMD, topMD) : fc_fwd::desc(pk, botMD, wgtMD, topMD); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - mem bot = mem(mem::primitive_desc(botMD, engine_), botData); - mem wgt = mem(mem::primitive_desc(wgtMD, engine_), wgtData); - mem top = mem(mem::primitive_desc(topMD, engine_), topData); - if (biasData != NULL) { - mem bias = mem(mem::primitive_desc(biasMD, engine_), biasData); - fwd_.reset(new fc_fwd(fwdPD, bot, wgt, bias, top)); + biasVal_.reset(new mem(mem::primitive_desc(biasMD, engine_), biasData)); + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); } else { - fwd_.reset(new fc_fwd(fwdPD, bot, wgt, top)); + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); } pipelineFwd_.clear(); pipelineFwd_.push_back(*fwd_); @@ -84,8 +90,12 @@ void MkldnnLayer::mkldnnForwardFC(int bs, // if input size changed, reset it resetForwardFC(bs, ic, ih, iw, botData, oc, topData, wgtData, biasData); + this->cvtWgtFromPaddle(); + + // update input, since the data might be changed if this is after data layer + inVal_->set_data_handle(botData); + // just forward - // update botdata stream_->submit(pipelineFwd_); } @@ -112,6 +122,10 @@ void MkldnnLayer::resetBackwardFC(int bs, mem::desc biasMD = biasDiff != NULL ? createMD({oc}, format::x) : createMD({}, format::format_undef); + inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); + wgtGrad_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtDiff)); + outGrad_.reset(new mem(mem::primitive_desc(topMD, engine_), topDiff)); + fc_fwd::desc fwdDesc = fc_fwd::desc(mkldnn::prop_kind::forward, botMD, wgtMD, topMD); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); @@ -121,15 +135,12 @@ void MkldnnLayer::resetBackwardFC(int bs, fc_bwdWgt::primitive_desc bwdWgtPD = fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); - mem botVal = mem(mem::primitive_desc(botMD, engine_), botData); - mem wgtGrad = mem(mem::primitive_desc(wgtMD, engine_), wgtDiff); - mem topGrad = mem(mem::primitive_desc(topMD, engine_), topDiff); - if (biasDiff != NULL) { - mem biasGrad = mem(mem::primitive_desc(biasMD, engine_), biasDiff); - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, botVal, topGrad, wgtGrad, biasGrad)); + biasGrad_.reset(new mem(mem::primitive_desc(biasMD, engine_), biasDiff)); + bwdWgt_.reset( + new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); } else { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, botVal, topGrad, wgtGrad)); + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); } pipelineBwd_.clear(); pipelineBwd_.push_back(*bwdWgt_); @@ -142,9 +153,9 @@ void MkldnnLayer::resetBackwardFC(int bs, fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(botMD, wgtMD, topMD); fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); - mem botGrad = mem(mem::primitive_desc(botMD, engine_), botDiff); - mem wgtVal = mem(mem::primitive_desc(wgtMD, engine_), wgtData); - bwdData_.reset(new fc_bwdData(bwdDataPD, topGrad, wgtVal, botGrad)); + inGrad_.reset(new mem(mem::primitive_desc(botMD, engine_), botDiff)); + wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); + bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); pipelineBwd_.push_back(*bwdData_); } @@ -172,11 +183,18 @@ void MkldnnLayer::mkldnnBackwardFC(int bs, wgtData, biasDiff); - // just forward - // update botdata + // update data + outGrad_->set_data_handle(topDiff); + stream_->submit(pipelineBwd_); } +void MkldnnLayer::printSizeInfo() { + VLOG(DNN_SIZES) << "bs: " << bs_ << ", ic: " << ic_ << ", ih: " << ih_ + << ", iw: " << iw_ << ", oc: " << oc_ << ", oh: " << oh_ + << ", ow: " << ow_; +} + mem::desc MkldnnLayer::createMD(mem::dims dims, mem::format fmt, mem::data_type type) { diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index 5927bd6d52..a9eb9f79da 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -40,13 +40,24 @@ protected: // mkldnn engine, stream and primivtives mkldnn::engine engine_; std::shared_ptr stream_; - std::shared_ptr fwd_; std::shared_ptr bwdWgt_; std::shared_ptr bwdData_; std::vector pipelineFwd_; std::vector pipelineBwd_; + // TODO(TJ): change below memory as MkldnnMatrixPtr type + // input == bottom, output == top + // value == data, grad == diff + std::shared_ptr inVal_; + std::shared_ptr inGrad_; + std::shared_ptr outVal_; + std::shared_ptr outGrad_; + std::shared_ptr wgtVal_; + std::shared_ptr wgtGrad_; + std::shared_ptr biasVal_; + std::shared_ptr biasGrad_; + public: explicit MkldnnLayer(const LayerConfig& config) : Layer(config), @@ -67,6 +78,20 @@ public: virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + virtual void printSizeInfo(); + + /** + * convert weight from paddle format to mkldnn format + * weight_ will be override + */ + virtual void cvtWgtFromPaddle() { ; } + + /** + * convert mkldnn weight to paddle format + * weight_ will be override + */ + virtual void cvtWgtToPaddle() { ; } + void resetForwardFC(int bs, int ic, int ih, diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MkldnnTester.cpp index 38e5bc75be..ecf0f9124d 100644 --- a/paddle/gserver/tests/MkldnnTester.cpp +++ b/paddle/gserver/tests/MkldnnTester.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "MkldnnTester.h" #include "paddle/gserver/layers/MkldnnBase.h" +#include "paddle/gserver/layers/MkldnnLayer.h" namespace paddle { @@ -145,7 +146,10 @@ void MkldnnTester::checkBackwardWgts() { vector dnnWgts; // used to temply save mkldnn weights saveWgt(parameters_[DNN], dnnWgts); - // TODO(TJ): cvtWgtToPaddle + const MkldnnLayerPtr dnnlayer = + std::dynamic_pointer_cast(dnnLayer_); + CHECK(dnnlayer); + dnnlayer->cvtWgtToPaddle(); for (size_t i = 0; i < parameters_[DNN].size(); ++i) { const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); @@ -233,11 +237,10 @@ void MkldnnTester::printMatrix(const MatrixPtr& m) { if (!log_) { return; } -#ifdef _DEBUG - std::ostream str; - m->print(str); - VLOG(lvl_) << str; -#endif + + std::ostringstream ostr; + m->print(ostr); + VLOG(lvl_) << std::endl << ostr.str(); } void MkldnnTester::printVector(const VectorPtr& v) { @@ -245,15 +248,9 @@ void MkldnnTester::printVector(const VectorPtr& v) { return; } - CHECK(v); - CHECK(v->getData()); - const real* pd = v->getData(); - const size_t sz = v->getSize(); - std::stringstream row; - for (size_t i = 0; i < sz; ++i) { - row << pd[i] << ", "; - } - VLOG(lvl_) << row.str(); + std::ostringstream ostr; + v->print(ostr, v->getSize()); + VLOG(lvl_) << std::endl << ostr.str(); } double MkldnnTester::getDelta(const real* d1, @@ -335,7 +332,6 @@ void MkldnnTester::run(const TestConfig& dnn, // Firstly always set flag false to initial from paddle weight TestConfig first = dnn; - // first.layerConfig.set_init_wgt_from_mkldnn(false); // reset and run once reset(first, ref, batchSize); @@ -348,8 +344,6 @@ void MkldnnTester::run(const TestConfig& dnn, // firstly get the flag bool initWgtFromMkldnn = false; - // dnn.layerConfig.has_init_wgt_from_mkldnn() && - // dnn.layerConfig.init_wgt_from_mkldnn(); if (initWgtFromMkldnn) { // after run once the mkldnn weight has been stored in dnnlayer diff --git a/paddle/gserver/tests/test_Mkldnn.cpp b/paddle/gserver/tests/test_Mkldnn.cpp index c2c6b701ec..1d367e6180 100644 --- a/paddle/gserver/tests/test_Mkldnn.cpp +++ b/paddle/gserver/tests/test_Mkldnn.cpp @@ -55,12 +55,12 @@ void testFcLayer(const testFCDesc& pm) { } TEST(MkldnnLayer, fcLayer) { - testFcLayer({2, 2, 3, 1, 1}); /* - testFcLayer({16, 32, 64, 1, 1}); - testFcLayer({8, 16, 32, 13, 13}); - testFcLayer({4, 12, 18, 13, 11}); - testFcLayer({2, 64, 32, 16, 16}); - testFcLayer({15, 3, 6, 16, 16});*/ + testFcLayer({2, 2, 3, 1, 1}); + testFcLayer({3, 7, 19, 1, 1}); + testFcLayer({8, 16, 32, 13, 13}); + testFcLayer({4, 12, 18, 13, 11}); + testFcLayer({2, 64, 32, 16, 16}); + testFcLayer({15, 3, 6, 16, 16}); } // TODO(TJ): add branch test From 6373291c7787c83335cc64d56294756872493301 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 8 Aug 2017 19:34:57 +0800 Subject: [PATCH 08/76] add test case use_mkldnn_wgt --- paddle/gserver/layers/MkldnnBase.h | 2 - paddle/gserver/layers/MkldnnFcLayer.cpp | 4 ++ paddle/gserver/layers/MkldnnLayer.h | 3 ++ paddle/gserver/tests/MkldnnTester.cpp | 60 +++++++++++-------------- paddle/gserver/tests/MkldnnTester.h | 4 +- paddle/gserver/tests/test_Mkldnn.cpp | 1 + paddle/trainer/TrainerConfigHelper.cpp | 2 + paddle/utils/Flags.cpp | 1 + paddle/utils/Flags.h | 1 + python/paddle/trainer/config_parser.py | 5 ++- 10 files changed, 45 insertions(+), 38 deletions(-) diff --git a/paddle/gserver/layers/MkldnnBase.h b/paddle/gserver/layers/MkldnnBase.h index eba72e58e5..260dbe45e4 100644 --- a/paddle/gserver/layers/MkldnnBase.h +++ b/paddle/gserver/layers/MkldnnBase.h @@ -23,8 +23,6 @@ typedef enum { DNN_TESTS = 1, DNN_SIZES, DNN_FMTS, - DNN_TESTS_DETAILS, - DNN_TESTS_MORE, DNN_ALL, } DNN_LOG_LEVEL; diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index 29b2cc184d..7e09ed33d2 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -51,6 +51,10 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, } void MkldnnFcLayer::cvtWgtFromPaddle() { + if (FLAGS_use_mkldnn_wgt) { + return; + } + if (hasInitedWgt_) { return; } diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index a9eb9f79da..c653eb9985 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -19,6 +19,9 @@ limitations under the License. */ #include "MkldnnBase.h" #include "mkldnn.hpp" +DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); + namespace paddle { class MkldnnLayer; diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MkldnnTester.cpp index ecf0f9124d..ef99b384a9 100644 --- a/paddle/gserver/tests/MkldnnTester.cpp +++ b/paddle/gserver/tests/MkldnnTester.cpp @@ -118,7 +118,7 @@ void MkldnnTester::checkForward() { printTopDatas(); double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), testLayers_[REF]->getOutputValue()); - VLOG(DNN_TESTS_DETAILS) << "Check Forward"; + VLOG(DNN_ALL) << "Check Forward"; EXPECT_LE(fabs(delta), eps_); } @@ -162,7 +162,7 @@ void MkldnnTester::checkBackwardWgts() { EXPECT_LE(fabs(delta), eps_); } - VLOG(DNN_TESTS_DETAILS) << "Restore dnn weights before comapre"; + VLOG(DNN_ALL) << "Restore dnn weights before comapre"; restoreWgt(dnnWgts, parameters_[DNN]); } @@ -275,8 +275,8 @@ double MkldnnTester::getDelta(const real* d1, EXPECT_TRUE(std::isnormal(sum)); EXPECT_FALSE(std::isinf(sum)); EXPECT_FALSE(std::isnan(delta)); - VLOG(DNN_TESTS_MORE) << "reference avg data: " << sum / len - << ", delta: " << delta / sum << ", failCnt:" << failCnt; + VLOG(DNN_ALL) << "reference avg data: " << sum / len + << ", delta: " << delta / sum << ", failCnt:" << failCnt; return (failCnt / (float)len) > failRate ? maxOut : delta / sum; } @@ -330,43 +330,37 @@ void MkldnnTester::run(const TestConfig& dnn, log_ = log; lvl_ = level; - // Firstly always set flag false to initial from paddle weight - TestConfig first = dnn; - + // Firstly test FLAGS_use_mkldnn_wgt = false + FLAGS_use_mkldnn_wgt = false; // reset and run once - reset(first, ref, batchSize); + reset(dnn, ref, batchSize); randomWgtDatas(); clearWgtDiffs(); clearBotDiffs(); + for (size_t i = 0; i < iter_; ++i) { + VLOG(DNN_TESTS) << "Check Iteration " << i; + runOnce(); + } - VLOG(DNN_TESTS) << "Check Iteration 0"; - runOnce(); - - // firstly get the flag - bool initWgtFromMkldnn = false; - - if (initWgtFromMkldnn) { - // after run once the mkldnn weight has been stored in dnnlayer - // then save the weigths and restart again - vector dnnWgts, refWgts; - CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); - saveWgt(parameters_[DNN], dnnWgts); - saveWgt(parameters_[REF], refWgts); - - // restart again with flag true - reset(dnn, ref, batchSize); + // Then test FLAGS_use_mkldnn_wgt = true + FLAGS_use_mkldnn_wgt = true; + // after run once the mkldnn weight has been stored in dnnlayer + // then save the weigths and restart again + vector dnnWgts, refWgts; + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + saveWgt(parameters_[DNN], dnnWgts); + saveWgt(parameters_[REF], refWgts); - // restore wgt - restoreWgt(dnnWgts, parameters_[DNN]); - restoreWgt(refWgts, parameters_[REF]); - clearWgtDiffs(); - clearBotDiffs(); + // restart again with flag true + reset(dnn, ref, batchSize); - // at least run once - runOnce(); - } + // restore wgt + restoreWgt(dnnWgts, parameters_[DNN]); + restoreWgt(refWgts, parameters_[REF]); + clearWgtDiffs(); + clearBotDiffs(); - for (size_t i = 1; i < iter_; ++i) { + for (size_t i = 0; i < iter_; ++i) { VLOG(DNN_TESTS) << "Check Iteration " << i; runOnce(); } diff --git a/paddle/gserver/tests/MkldnnTester.h b/paddle/gserver/tests/MkldnnTester.h index 16b0970a8e..8b3049b5c2 100644 --- a/paddle/gserver/tests/MkldnnTester.h +++ b/paddle/gserver/tests/MkldnnTester.h @@ -58,7 +58,7 @@ public: iter_ = iter; eps_ = epsilon; log_ = false; - lvl_ = DNN_TESTS_MORE; + lvl_ = DNN_ALL; } ~MkldnnTester() {} @@ -72,7 +72,7 @@ public: size_t iter = 3, float epsilon = 1e-4, bool log = false, - int level = DNN_TESTS_MORE); + int level = DNN_ALL); void setLogLevel(int lvl) { lvl_ = lvl; } private: diff --git a/paddle/gserver/tests/test_Mkldnn.cpp b/paddle/gserver/tests/test_Mkldnn.cpp index 1d367e6180..0516a059de 100644 --- a/paddle/gserver/tests/test_Mkldnn.cpp +++ b/paddle/gserver/tests/test_Mkldnn.cpp @@ -23,6 +23,7 @@ using namespace paddle; // NOLINT DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(use_gpu); DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); struct testFCDesc { int bs; diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/trainer/TrainerConfigHelper.cpp index a0a365aa0b..eba40862b9 100644 --- a/paddle/trainer/TrainerConfigHelper.cpp +++ b/paddle/trainer/TrainerConfigHelper.cpp @@ -29,6 +29,7 @@ DECLARE_bool(with_gpu); DECLARE_bool(parallel_nn); DECLARE_string(config_args); DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserFuncName = "parse_config_and_serialize"; @@ -46,6 +47,7 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",parallel_nn=" << FLAGS_parallel_nn << ",use_mkldnn=" << FLAGS_use_mkldnn + << ",use_mkldnn_wgt=" << FLAGS_use_mkldnn_wgt << ",cudnn_version=" << hl_get_cudnn_lib_version(); if (!FLAGS_config_args.empty()) { configArgs << "," << FLAGS_config_args; diff --git a/paddle/utils/Flags.cpp b/paddle/utils/Flags.cpp index ab1c181c62..600c83a848 100644 --- a/paddle/utils/Flags.cpp +++ b/paddle/utils/Flags.cpp @@ -27,6 +27,7 @@ DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training"); DEFINE_bool(use_mkldnn, false, "Only support CPU training"); #endif +DEFINE_bool(use_mkldnn_wgt, false, "Init weight from CPU weight"); DEFINE_bool(parallel_nn, false, "Whether to use multi-threads to calculate one neural network." diff --git a/paddle/utils/Flags.h b/paddle/utils/Flags.h index 1832bb515e..0aca4c0ee0 100644 --- a/paddle/utils/Flags.h +++ b/paddle/utils/Flags.h @@ -41,3 +41,4 @@ DECLARE_string(predict_file); DECLARE_bool(prev_batch_state); DECLARE_string(init_model_path); DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index ae39abc081..dd79f3a043 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1619,6 +1619,8 @@ class FCLayer(LayerBase): config_assert( len(inputs) == 1, "MkldnnFCLayer support one and only one input!") + use_mkldnn_wgt = bool( + int(g_command_config_args.get("use_mkldnn_wgt", 0))) super(FCLayer, self).__init__( name, self.layer_type, size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): @@ -1627,9 +1629,10 @@ class FCLayer(LayerBase): format = self.inputs[input_index].format sparse = format == "csr" or format == "csc" if use_mkldnn: - dims = [self.config.size, input_layer.size] config_assert(not sparse, "MkldnnFCLayer do not support sparse format yet") + if use_mkldnn and use_mkldnn_wgt: + dims = [self.config.size, input_layer.size] else: dims = [input_layer.size, self.config.size] if sparse: From e18fbd82082096227bc3f8c51fc7b2a11c2f2707 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 8 Aug 2017 20:07:38 +0800 Subject: [PATCH 09/76] skip reset mkldnn when input size does not change --- paddle/gserver/layers/MkldnnLayer.cpp | 30 +++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp index 0e1e1c3061..c909fe274d 100644 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -49,7 +49,6 @@ void MkldnnLayer::resetForwardFC(int bs, real* wgtData, real* biasData) { bool hasSpatial = ih == 1 && iw == 1 ? false : true; - mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) : createMD({bs, ic}, format::nc); mem::desc wgtMD = hasSpatial ? createMD({oc, ic, ih, iw}, format::oihw) @@ -58,7 +57,12 @@ void MkldnnLayer::resetForwardFC(int bs, : createMD({}, format::format_undef); mem::desc topMD = createMD({bs, oc}, format::nc); - inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); + mem::primitive_desc botPD = mem::primitive_desc(botMD, engine_); + if (inVal_ && inVal_->get_primitive_desc() == botPD) { + return; + } + + inVal_.reset(new mem(botPD, botData)); wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); outVal_.reset(new mem(mem::primitive_desc(topMD, engine_), topData)); @@ -111,7 +115,6 @@ void MkldnnLayer::resetBackwardFC(int bs, real* wgtData, real* biasDiff) { bool hasSpatial = ih == 1 && iw == 1 ? false : true; - engine_ = CpuEngine::Instance().getEngine(); // backward weight mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) @@ -122,9 +125,19 @@ void MkldnnLayer::resetBackwardFC(int bs, mem::desc biasMD = biasDiff != NULL ? createMD({oc}, format::x) : createMD({}, format::format_undef); - inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); + mem::primitive_desc topPD = mem::primitive_desc(botMD, engine_); + if (outGrad_ && outGrad_->get_primitive_desc() == topPD) { + return; + } + + if (inVal_) { + // update data + inVal_->set_data_handle(botData); + } else { + inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); + } wgtGrad_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtDiff)); - outGrad_.reset(new mem(mem::primitive_desc(topMD, engine_), topDiff)); + outGrad_.reset(new mem(topPD, topDiff)); fc_fwd::desc fwdDesc = fc_fwd::desc(mkldnn::prop_kind::forward, botMD, wgtMD, topMD); @@ -154,7 +167,12 @@ void MkldnnLayer::resetBackwardFC(int bs, fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); inGrad_.reset(new mem(mem::primitive_desc(botMD, engine_), botDiff)); - wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); + if (wgtVal_) { + // update data + wgtVal_->set_data_handle(wgtData); + } else { + wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); + } bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); pipelineBwd_.push_back(*bwdData_); } From f6a940936b5f44ebf99a9925991158fdd3beaffd Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 8 Aug 2017 21:22:15 +0800 Subject: [PATCH 10/76] remove unused comments, refine and rename --- paddle/gserver/layers/MkldnnFcLayer.cpp | 4 ++-- paddle/gserver/layers/MkldnnFcLayer.h | 4 ++-- paddle/gserver/layers/MkldnnLayer.cpp | 9 ++++----- paddle/gserver/layers/MkldnnLayer.h | 4 ++-- paddle/gserver/tests/MkldnnTester.cpp | 2 +- python/paddle/trainer/config_parser.py | 4 ++-- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index 7e09ed33d2..e4c4d4675d 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -50,7 +50,7 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, return true; } -void MkldnnFcLayer::cvtWgtFromPaddle() { +void MkldnnFcLayer::convertWeightsFromPaddle() { if (FLAGS_use_mkldnn_wgt) { return; } @@ -75,7 +75,7 @@ void MkldnnFcLayer::cvtWgtFromPaddle() { hasInitedWgt_ = true; } -void MkldnnFcLayer::cvtWgtToPaddle() { +void MkldnnFcLayer::convertWeightsToPaddle() { MatrixPtr dnnWgt = weight_->getW(); MatrixPtr paddleWgt; dnnWgt->transpose(paddleWgt, true); diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h index 0064fc4727..f891052284 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -44,9 +44,9 @@ public: bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; - void cvtWgtFromPaddle() override; + void convertWeightsFromPaddle() override; - void cvtWgtToPaddle() override; + void convertWeightsToPaddle() override; void forward(PassType passType) override; diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp index c909fe274d..6bd2b15a17 100644 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #include "MkldnnLayer.h" -// using namespace mkldnn; // NOLINT using mem = mkldnn::memory; // NOLINT typedef mem::format format; typedef mkldnn::inner_product_forward fc_fwd; @@ -94,7 +93,7 @@ void MkldnnLayer::mkldnnForwardFC(int bs, // if input size changed, reset it resetForwardFC(bs, ic, ih, iw, botData, oc, topData, wgtData, biasData); - this->cvtWgtFromPaddle(); + this->convertWeightsFromPaddle(); // update input, since the data might be changed if this is after data layer inVal_->set_data_handle(botData); @@ -208,9 +207,9 @@ void MkldnnLayer::mkldnnBackwardFC(int bs, } void MkldnnLayer::printSizeInfo() { - VLOG(DNN_SIZES) << "bs: " << bs_ << ", ic: " << ic_ << ", ih: " << ih_ - << ", iw: " << iw_ << ", oc: " << oc_ << ", oh: " << oh_ - << ", ow: " << ow_; + VLOG(DNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_ + << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_ + << ", oh: " << oh_ << ", ow: " << ow_; } mem::desc MkldnnLayer::createMD(mem::dims dims, diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index c653eb9985..e5c93500c7 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -87,13 +87,13 @@ public: * convert weight from paddle format to mkldnn format * weight_ will be override */ - virtual void cvtWgtFromPaddle() { ; } + virtual void convertWeightsFromPaddle() {} /** * convert mkldnn weight to paddle format * weight_ will be override */ - virtual void cvtWgtToPaddle() { ; } + virtual void convertWeightsToPaddle() {} void resetForwardFC(int bs, int ic, diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MkldnnTester.cpp index ef99b384a9..59b3861df8 100644 --- a/paddle/gserver/tests/MkldnnTester.cpp +++ b/paddle/gserver/tests/MkldnnTester.cpp @@ -149,7 +149,7 @@ void MkldnnTester::checkBackwardWgts() { const MkldnnLayerPtr dnnlayer = std::dynamic_pointer_cast(dnnLayer_); CHECK(dnnlayer); - dnnlayer->cvtWgtToPaddle(); + dnnlayer->convertWeightsToPaddle(); for (size_t i = 0; i < parameters_[DNN].size(); ++i) { const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index dc07af343d..3213df5186 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1614,13 +1614,13 @@ class FCLayer(LayerBase): error_clipping_threshold=None, **xargs): use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) + use_mkldnn_wgt = bool( + int(g_command_config_args.get("use_mkldnn_wgt", 0))) if use_mkldnn: self.layer_type = 'mkldnn_fc' config_assert( len(inputs) == 1, "MkldnnFCLayer support one and only one input!") - use_mkldnn_wgt = bool( - int(g_command_config_args.get("use_mkldnn_wgt", 0))) super(FCLayer, self).__init__( name, self.layer_type, size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): From e2ccbccb02132cef59373bb8ec52ddbbf3c7c61d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 9 Aug 2017 19:49:37 +0800 Subject: [PATCH 11/76] support python test without installation python package --- cmake/generic.cmake | 2 +- python/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 957c20bcf6..9f907a9dc2 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} + COMMAND env PYTHONPATH=${PADDLE_PYTHON_LIB_DIR} python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b5030da8e7..fc8c6f6a42 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,6 +1,8 @@ set(OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/build") +set(PADDLE_PYTHON_LIB_DIR "${OUTPUT_DIR}/lib") + file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py) From 5e5c441245276a2696ac1f840ebd261c7c14cfd4 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 9 Aug 2017 20:16:16 +0800 Subject: [PATCH 12/76] Enable Python Unit Test before make install --- cmake/generic.cmake | 2 +- paddle/framework/CMakeLists.txt | 5 +++++ python/paddle/v2/framework/.gitignore | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 python/paddle/v2/framework/.gitignore diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 957c20bcf6..2778b49128 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} + COMMAND env PYTHONPATH=${CMAKE_SOURCE_DIR}/python:${CMAKE_SOURCE_DIR}/paddle:${PADDLE_PYTHON_PACKAGE_DIR} python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 6601918c90..b7b61b597f 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -35,6 +35,11 @@ py_proto_compile(framework_py_proto SRCS attribute.proto op_proto.proto op_desc. # Generate an empty __init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) +add_custom_command(TARGET framework_py_proto POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PROJ_ROOT}/python/paddle/v2/framework/proto + COMMAND cp *.py ${PROJ_ROOT}/python/paddle/v2/framework/proto/ + COMMENT "Copy generated python proto into directory paddle/v2/framework/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward) diff --git a/python/paddle/v2/framework/.gitignore b/python/paddle/v2/framework/.gitignore new file mode 100644 index 0000000000..2ff540d576 --- /dev/null +++ b/python/paddle/v2/framework/.gitignore @@ -0,0 +1 @@ +proto From 8f464a58984f8024afadab2920acf2b9c4a60d17 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 9 Aug 2017 20:20:42 +0800 Subject: [PATCH 13/76] update PROJ_ROOT --- cmake/generic.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 2778b49128..6b0524021c 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${CMAKE_SOURCE_DIR}/python:${CMAKE_SOURCE_DIR}/paddle:${PADDLE_PYTHON_PACKAGE_DIR} + COMMAND env PYTHONPATH=${PROJ_ROOT}/python:${PROJ_ROOT}/paddle:${PADDLE_PYTHON_PACKAGE_DIR} python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() From c7f245892eab275d9c60e3005ec8030168a0936d Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 9 Aug 2017 21:23:02 +0800 Subject: [PATCH 14/76] Fix some warning. --- paddle/math/CpuSparseMatrix.h | 4 ++++ paddle/math/SparseMatrix.h | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/paddle/math/CpuSparseMatrix.h b/paddle/math/CpuSparseMatrix.h index 9676f8864f..6ba795d5b7 100644 --- a/paddle/math/CpuSparseMatrix.h +++ b/paddle/math/CpuSparseMatrix.h @@ -302,6 +302,10 @@ public: bool isSparse() const { return true; } private: + using Matrix::mul; using Matrix::copyFrom; + using Matrix::rowMax; + using Matrix::print; + using Matrix::subMatrix; }; } // namespace paddle diff --git a/paddle/math/SparseMatrix.h b/paddle/math/SparseMatrix.h index f8d9ffc29f..8cd6b71f8f 100644 --- a/paddle/math/SparseMatrix.h +++ b/paddle/math/SparseMatrix.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include -#include "Matrix.h" #include "CpuSparseMatrix.h" +#include "Matrix.h" namespace paddle { @@ -234,6 +234,9 @@ public: private: using Matrix::mul; using Matrix::copyFrom; + using Matrix::rowMax; + using Matrix::print; + using Matrix::subMatrix; }; } // namespace paddle From b2bd67133aa609225ea46d12d1f091340ab000e4 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 9 Aug 2017 22:52:47 +0800 Subject: [PATCH 15/76] rename and refine functions --- paddle/gserver/layers/MkldnnBase.h | 16 +- paddle/gserver/layers/MkldnnFcLayer.cpp | 167 ++++++++++++++---- paddle/gserver/layers/MkldnnFcLayer.h | 21 ++- paddle/gserver/layers/MkldnnLayer.cpp | 222 ------------------------ paddle/gserver/layers/MkldnnLayer.h | 78 ++++----- paddle/gserver/tests/MkldnnTester.cpp | 22 ++- paddle/gserver/tests/MkldnnTester.h | 4 +- paddle/gserver/tests/test_Mkldnn.cpp | 13 +- python/paddle/trainer/config_parser.py | 7 +- 9 files changed, 217 insertions(+), 333 deletions(-) delete mode 100644 paddle/gserver/layers/MkldnnLayer.cpp diff --git a/paddle/gserver/layers/MkldnnBase.h b/paddle/gserver/layers/MkldnnBase.h index 260dbe45e4..63fd67a850 100644 --- a/paddle/gserver/layers/MkldnnBase.h +++ b/paddle/gserver/layers/MkldnnBase.h @@ -19,12 +19,12 @@ limitations under the License. */ namespace paddle { typedef enum { - DNN_BASE = 1, - DNN_TESTS = 1, - DNN_SIZES, - DNN_FMTS, - DNN_ALL, -} DNN_LOG_LEVEL; + MKLDNN_BASE = 1, // basical info of MKLDNN + MKLDNN_TESTS = 1, // gtest info of MKLDNN + MKLDNN_SIZES = 2, // size info of MKLDNN + MKLDNN_FMTS = 3, // format info of MKLDNN + MKLDNN_ALL = 4, // show all info of MKLDNN +} MKLDNN_LOG_LEVEL; /** * @brief MKLDNN CPU engine. @@ -68,7 +68,7 @@ public: /** * @brief Submit stream * @param prims The primitives vector - * block Waiting for the stream to complete + * @param block Waiting for the stream to complete */ void submit(std::vector& prims, bool block = true) { resetState(); @@ -84,8 +84,8 @@ public: return; } // TODO(TJ): change me when mkldnn have method to reset this state - stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); // stream_.reset(new mkldnn::stream(mkldnn::stream::kind::lazy)); + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); ready_ = true; } diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index e4c4d4675d..f89db169ef 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -16,6 +16,12 @@ limitations under the License. */ #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" +using namespace mkldnn; // NOLINT +typedef memory::format format; +typedef inner_product_forward fc_fwd; +typedef inner_product_backward_weights fc_bwdWgt; +typedef inner_product_backward_data fc_bwdData; + namespace paddle { REGISTER_LAYER(mkldnn_fc, MkldnnFcLayer); @@ -26,7 +32,7 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, return false; } - CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet!"; + CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet"; CHECK_EQ(inputLayers_.size(), parameters_.size()); CHECK(!parameters_[0]->isSparse()) << "Do not support sparse yet"; @@ -63,14 +69,14 @@ void MkldnnFcLayer::convertWeightsFromPaddle() { MatrixPtr paddleWgt = Matrix::create( weight_->getW()->getData(), iLayerSize_, oc_, false, false); + // TODO(TJ): remove this print when do not need differ weights std::ostringstream ostr; paddleWgt->print(ostr); - VLOG(DNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str(); + VLOG(MKLDNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str(); // The mkldnn weight is transposed from initial paddle matrix MatrixPtr paddleWgtT; paddleWgt->transpose(paddleWgtT, true); - weight_->getW()->copyFrom(*paddleWgtT); hasInitedWgt_ = true; } @@ -101,6 +107,10 @@ void MkldnnFcLayer::reshape() { if (iw_ == 0) { iw_ = 1; } + hasSpatial_ = true; + if (ih_ == 1 && iw_ == 1) { + hasSpatial_ = false; + } CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); ic_ = iLayerSize_ / (ih_ * iw_); CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; @@ -111,6 +121,114 @@ void MkldnnFcLayer::reshape() { output_.setFrameHeight(oh_); output_.setFrameWidth(ow_); resetOutput(bs_, oc_); + + // reset mkldnn forward + resetFwd(); + needResetBwd_ = true; + + convertWeightsFromPaddle(); +} + +void MkldnnFcLayer::resetFwd() { + bool hasBias = biases_ && biases_->getW(); + real* iData = getInputValue(0)->getData(); + real* oData = getOutputValue()->getData(); + real* wData = weight_->getW()->getData(); + real* bData = hasBias ? biases_->getW()->getData() : NULL; + + // TODO(TJ): below create should be covered in MkldnnMatrix + // create memory desc + memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) + : createMD({bs_, ic_}, format::nc); + memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) + : createMD({oc_, ic_}, format::oi); + memory::desc bMD = bData != NULL ? createMD({oc_}, format::x) + : createMD({}, format::format_undef); + memory::desc oMD = createMD({bs_, oc_}, format::nc); + + // create memory primitive desc and memory self + inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData)); + outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData)); + + prop_kind pk = prop_kind::forward; + fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD) + : fc_fwd::desc(pk, iMD, wMD, oMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + + if (bData != NULL) { + biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData)); + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); + } else { + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); + } + pipelineFwd_.clear(); + pipelineFwd_.push_back(*fwd_); +} + +void MkldnnFcLayer::resetBwd() { + if (!needResetBwd_) { + return; + } + needResetBwd_ = false; + + bool hasBias = biases_ && biases_->getWGrad(); + real* iData = getInputValue(0)->getData(); + real* iDiff = getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL; + real* oDiff = getOutputGrad()->getData(); + real* wDiff = weight_->getWGrad()->getData(); + real* bDiff = hasBias ? biases_->getWGrad()->getData() : NULL; + + /// backward weight + // create memory desc for backward memory + memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) + : createMD({bs_, ic_}, format::nc); + memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) + : createMD({oc_, ic_}, format::oi); + memory::desc oMD = createMD({bs_, oc_}, format::nc); + memory::desc bMD = bDiff != NULL ? createMD({oc_}, format::x) + : createMD({}, format::format_undef); + + if (inVal_) { + // update data + inVal_->set_data_handle(iData); + } else { + inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + } + + // create memory primitive desc and memory self + wgtGrad_.reset(new memory(memory::primitive_desc(wMD, engine_), wDiff)); + outGrad_.reset(new memory(memory::primitive_desc(oMD, engine_), oDiff)); + + fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, iMD, wMD, oMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + fc_bwdWgt::desc bwdWgtDesc = bDiff != NULL + ? fc_bwdWgt::desc(iMD, wMD, bMD, oMD) + : fc_bwdWgt::desc(iMD, wMD, oMD); + fc_bwdWgt::primitive_desc bwdWgtPD = + fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); + + if (bDiff != NULL) { + biasGrad_.reset(new memory(memory::primitive_desc(bMD, engine_), bDiff)); + bwdWgt_.reset( + new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); + } else { + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); + } + pipelineBwd_.clear(); + pipelineBwd_.push_back(*bwdWgt_); + + /// backward data + if (iDiff == NULL) { + return; + } + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(iMD, wMD, oMD); + fc_bwdData::primitive_desc bwdDataPD = + fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); + inGrad_.reset(new memory(memory::primitive_desc(iMD, engine_), iDiff)); + CHECK(wgtVal_) << "Should have weight memory"; + bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); + pipelineBwd_.push_back(*bwdData_); } void MkldnnFcLayer::forward(PassType passType) { @@ -119,12 +237,14 @@ void MkldnnFcLayer::forward(PassType passType) { { REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); - real* input = getInputValue(0)->getData(); - real* output = getOutputValue()->getData(); - real* wgt = weight_->getW()->getData(); - bool hasBias = biases_ && biases_->getW(); - real* bias = hasBias ? biases_->getW()->getData() : NULL; - mkldnnForwardFC(bs_, ic_, ih_, iw_, input, oc_, output, wgt, bias); + + // update input data + // since it might be changed if this is after data layer + real* iData = getInputValue(0)->getData(); + inVal_->set_data_handle(iData); + + // just submit forward pipeline + stream_->submit(pipelineFwd_); } /* activation */ { @@ -139,33 +259,22 @@ void MkldnnFcLayer::backward(const UpdateCallback& callback) { backwardActivation(); } - bool hasBias = biases_ && biases_->getWGrad(); { REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); - real* inVal = getInputValue(0)->getData(); - real* inGrad = - getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL; - real* outGrad = getOutputGrad()->getData(); - real* wgtGrad = weight_->getWGrad()->getData(); - real* wgtVal = weight_->getW()->getData(); - real* biasGrad = hasBias ? biases_->getWGrad()->getData() : NULL; - mkldnnBackwardFC(bs_, - ic_, - ih_, - iw_, - inGrad, - inVal, - oc_, - outGrad, - wgtGrad, - wgtVal, - biasGrad); + resetBwd(); + + // update diff + real* oDiff = getOutputGrad()->getData(); + outGrad_->set_data_handle(oDiff); + + // just sumbmit backward pipeline + stream_->submit(pipelineBwd_); } { REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); weight_->getParameterPtr()->incUpdate(callback); - if (hasBias) { + if (biases_ && biases_->getWGrad()) { biases_->getParameterPtr()->incUpdate(callback); } } diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h index f891052284..c4c0fa1c41 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -30,6 +30,7 @@ protected: size_t iLayerSize_; // == ic * ih * iw bool hasInitedWgt_; + bool hasSpatial_; // fc weight and bias std::unique_ptr weight_; @@ -37,7 +38,7 @@ protected: public: explicit MkldnnFcLayer(const LayerConfig& config) - : MkldnnLayer(config), hasInitedWgt_(false) {} + : MkldnnLayer(config), hasInitedWgt_(false), hasSpatial_(true) {} ~MkldnnFcLayer() {} @@ -52,7 +53,25 @@ public: void backward(const UpdateCallback& callback) override; +protected: + /** + * reshape the input image sizes + * and reset output buffer size + * and reset mkldnn forward + */ void reshape(); + + /** + * reset the forward primitve and memory + * only would be called when input size changes + */ + void resetFwd(); + + /** + * reset the backward primitve and memory for mkldnn fc + * only would be called when needed + */ + void resetBwd(); }; } // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp deleted file mode 100644 index 6bd2b15a17..0000000000 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MkldnnLayer.h" - -using mem = mkldnn::memory; // NOLINT -typedef mem::format format; -typedef mkldnn::inner_product_forward fc_fwd; -typedef mkldnn::inner_product_backward_weights fc_bwdWgt; -typedef mkldnn::inner_product_backward_data fc_bwdData; - -namespace paddle { - -bool MkldnnLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!Layer::init(layerMap, parameterMap)) { - return false; - } - - CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." - << "Please set WITH_MKLDNN=ON " - << "and set use_mkldnn=True"; - stream_.reset(new MkldnnStream()); - engine_ = CpuEngine::Instance().getEngine(); - - // TODO(TJ): deivecId - return true; -} - -void MkldnnLayer::resetForwardFC(int bs, - int ic, - int ih, - int iw, - real* botData, - int oc, - real* topData, - real* wgtData, - real* biasData) { - bool hasSpatial = ih == 1 && iw == 1 ? false : true; - mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) - : createMD({bs, ic}, format::nc); - mem::desc wgtMD = hasSpatial ? createMD({oc, ic, ih, iw}, format::oihw) - : createMD({oc, ic}, format::oi); - mem::desc biasMD = biasData != NULL ? createMD({oc}, format::x) - : createMD({}, format::format_undef); - mem::desc topMD = createMD({bs, oc}, format::nc); - - mem::primitive_desc botPD = mem::primitive_desc(botMD, engine_); - if (inVal_ && inVal_->get_primitive_desc() == botPD) { - return; - } - - inVal_.reset(new mem(botPD, botData)); - wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); - outVal_.reset(new mem(mem::primitive_desc(topMD, engine_), topData)); - - mkldnn::prop_kind pk = mkldnn::prop_kind::forward; - fc_fwd::desc fwdDesc = biasData != NULL - ? fc_fwd::desc(pk, botMD, wgtMD, biasMD, topMD) - : fc_fwd::desc(pk, botMD, wgtMD, topMD); - fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - - if (biasData != NULL) { - biasVal_.reset(new mem(mem::primitive_desc(biasMD, engine_), biasData)); - fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); - } else { - fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); - } - pipelineFwd_.clear(); - pipelineFwd_.push_back(*fwd_); -} - -void MkldnnLayer::mkldnnForwardFC(int bs, - int ic, - int ih, - int iw, - real* botData, - int oc, - real* topData, - real* wgtData, - real* biasData) { - // if input size changed, reset it - resetForwardFC(bs, ic, ih, iw, botData, oc, topData, wgtData, biasData); - - this->convertWeightsFromPaddle(); - - // update input, since the data might be changed if this is after data layer - inVal_->set_data_handle(botData); - - // just forward - stream_->submit(pipelineFwd_); -} - -void MkldnnLayer::resetBackwardFC(int bs, - int ic, - int ih, - int iw, - real* botDiff, - real* botData, - int oc, - real* topDiff, - real* wgtDiff, - real* wgtData, - real* biasDiff) { - bool hasSpatial = ih == 1 && iw == 1 ? false : true; - - // backward weight - mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) - : createMD({bs, ic}, format::nc); - mem::desc wgtMD = hasSpatial ? createMD({oc, ic, ih, iw}, format::oihw) - : createMD({oc, ic}, format::oi); - mem::desc topMD = createMD({bs, oc}, format::nc); - mem::desc biasMD = biasDiff != NULL ? createMD({oc}, format::x) - : createMD({}, format::format_undef); - - mem::primitive_desc topPD = mem::primitive_desc(botMD, engine_); - if (outGrad_ && outGrad_->get_primitive_desc() == topPD) { - return; - } - - if (inVal_) { - // update data - inVal_->set_data_handle(botData); - } else { - inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); - } - wgtGrad_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtDiff)); - outGrad_.reset(new mem(topPD, topDiff)); - - fc_fwd::desc fwdDesc = - fc_fwd::desc(mkldnn::prop_kind::forward, botMD, wgtMD, topMD); - fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - fc_bwdWgt::desc bwdWgtDesc = - biasDiff != NULL ? fc_bwdWgt::desc(botMD, wgtMD, biasMD, topMD) - : fc_bwdWgt::desc(botMD, wgtMD, topMD); - fc_bwdWgt::primitive_desc bwdWgtPD = - fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); - - if (biasDiff != NULL) { - biasGrad_.reset(new mem(mem::primitive_desc(biasMD, engine_), biasDiff)); - bwdWgt_.reset( - new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); - } else { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); - } - pipelineBwd_.clear(); - pipelineBwd_.push_back(*bwdWgt_); - - // backward data - if (botDiff == NULL) { - return; - } - - fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(botMD, wgtMD, topMD); - fc_bwdData::primitive_desc bwdDataPD = - fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); - inGrad_.reset(new mem(mem::primitive_desc(botMD, engine_), botDiff)); - if (wgtVal_) { - // update data - wgtVal_->set_data_handle(wgtData); - } else { - wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); - } - bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); - pipelineBwd_.push_back(*bwdData_); -} - -void MkldnnLayer::mkldnnBackwardFC(int bs, - int ic, - int ih, - int iw, - real* botDiff, - real* botData, - int oc, - real* topDiff, - real* wgtDiff, - real* wgtData, - real* biasDiff) { - // if input size changed, reset it - resetBackwardFC(bs, - ic, - ih, - iw, - botDiff, - botData, - oc, - topDiff, - wgtDiff, - wgtData, - biasDiff); - - // update data - outGrad_->set_data_handle(topDiff); - - stream_->submit(pipelineBwd_); -} - -void MkldnnLayer::printSizeInfo() { - VLOG(DNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_ - << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_ - << ", oh: " << oh_ << ", ow: " << ow_; -} - -mem::desc MkldnnLayer::createMD(mem::dims dims, - mem::format fmt, - mem::data_type type) { - // TODO(TJ): isFmtSuppoted(fmt) - return mem::desc(dims, type, fmt); -} - -} // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index e5c93500c7..620bdfc984 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -40,6 +40,9 @@ protected: // output image channel, height and width int oc_, oh_, ow_; + // backward also need reset after reset forward handle + bool needResetBwd_; + // mkldnn engine, stream and primivtives mkldnn::engine engine_; std::shared_ptr stream_; @@ -50,8 +53,6 @@ protected: std::vector pipelineBwd_; // TODO(TJ): change below memory as MkldnnMatrixPtr type - // input == bottom, output == top - // value == data, grad == diff std::shared_ptr inVal_; std::shared_ptr inGrad_; std::shared_ptr outVal_; @@ -71,6 +72,7 @@ public: oc_(0), oh_(0), ow_(0), + needResetBwd_(true), engine_(mkldnn::engine::cpu, 0), stream_(nullptr), fwd_(nullptr), @@ -79,9 +81,21 @@ public: ~MkldnnLayer() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + virtual bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!Layer::init(layerMap, parameterMap)) { + return false; + } + + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." + << "Please set WITH_MKLDNN=ON " + << "and set use_mkldnn=True"; + stream_.reset(new MkldnnStream()); + engine_ = CpuEngine::Instance().getEngine(); - virtual void printSizeInfo(); + // TODO(TJ): deivecId + return true; + } /** * convert weight from paddle format to mkldnn format @@ -95,56 +109,24 @@ public: */ virtual void convertWeightsToPaddle() {} - void resetForwardFC(int bs, - int ic, - int ih, - int iw, - real* botData, - int oc, - real* topData, - real* wgtData, - real* biasData); - - void mkldnnForwardFC(int bs, - int ic, - int ih, - int iw, - real* botData, - int oc, - real* topData, - real* wgtData, - real* biasData); - - void resetBackwardFC(int bs, - int ic, - int ih, - int iw, - real* botDiff, - real* botData, - int oc, - real* topDiff, - real* wgtDiff, - real* wgtData, - real* biasDiff); - - void mkldnnBackwardFC(int bs, - int ic, - int ih, - int iw, - real* botDiff, - real* botData, - int oc, - real* topDiff, - real* wgtDiff, - real* wgtData, - real* biasDiff); + /** + * print info about sizes + */ + virtual void printSizeInfo() { + VLOG(MKLDNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_ + << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_ + << ", oh: " << oh_ << ", ow: " << ow_; + } // TODO(TJ): move to MkldnnMatrix // create memory desc inline mkldnn::memory::desc createMD( mkldnn::memory::dims dims, mkldnn::memory::format fmt, - mkldnn::memory::data_type type = mkldnn::memory::data_type::f32); + mkldnn::memory::data_type type = mkldnn::memory::data_type::f32) { + // TODO(TJ): isFmtSuppoted(fmt) + return mkldnn::memory::desc(dims, type, fmt); + } }; } // namespace paddle diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MkldnnTester.cpp index 59b3861df8..9232e2fdcd 100644 --- a/paddle/gserver/tests/MkldnnTester.cpp +++ b/paddle/gserver/tests/MkldnnTester.cpp @@ -118,7 +118,7 @@ void MkldnnTester::checkForward() { printTopDatas(); double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), testLayers_[REF]->getOutputValue()); - VLOG(DNN_ALL) << "Check Forward"; + VLOG(MKLDNN_ALL) << "Check Forward"; EXPECT_LE(fabs(delta), eps_); } @@ -162,7 +162,7 @@ void MkldnnTester::checkBackwardWgts() { EXPECT_LE(fabs(delta), eps_); } - VLOG(DNN_ALL) << "Restore dnn weights before comapre"; + VLOG(MKLDNN_ALL) << "Restore dnn weights before comapre"; restoreWgt(dnnWgts, parameters_[DNN]); } @@ -275,8 +275,8 @@ double MkldnnTester::getDelta(const real* d1, EXPECT_TRUE(std::isnormal(sum)); EXPECT_FALSE(std::isinf(sum)); EXPECT_FALSE(std::isnan(delta)); - VLOG(DNN_ALL) << "reference avg data: " << sum / len - << ", delta: " << delta / sum << ", failCnt:" << failCnt; + VLOG(MKLDNN_ALL) << "reference avg data: " << sum / len + << ", delta: " << delta / sum << ", failCnt:" << failCnt; return (failCnt / (float)len) > failRate ? maxOut : delta / sum; } @@ -306,10 +306,8 @@ void MkldnnTester::runOnce() { // clear buffers // ref code will addto the diff, dnn code will writeto it + // and clearTopDatas() and clearWgtDiffs() should be coverd by test layers clearBotDiffs(REF); - // below two should be coverd by test layers - // clearTopDatas(); - // clearWgtDiffs(); } void MkldnnTester::run(const TestConfig& dnn, @@ -321,8 +319,8 @@ void MkldnnTester::run(const TestConfig& dnn, float epsilon, bool log, int level) { - VLOG(DNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type() - << " vs " << ref.layerConfig.type(); + VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type() + << " vs " << ref.layerConfig.type(); ih_ = inputImgH; iw_ = inputImgW; iter_ = iter; @@ -338,14 +336,14 @@ void MkldnnTester::run(const TestConfig& dnn, clearWgtDiffs(); clearBotDiffs(); for (size_t i = 0; i < iter_; ++i) { - VLOG(DNN_TESTS) << "Check Iteration " << i; + VLOG(MKLDNN_TESTS) << "Check Iteration " << i; runOnce(); } // Then test FLAGS_use_mkldnn_wgt = true FLAGS_use_mkldnn_wgt = true; // after run once the mkldnn weight has been stored in dnnlayer - // then save the weigths and restart again + // then save the weights and restart again vector dnnWgts, refWgts; CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); saveWgt(parameters_[DNN], dnnWgts); @@ -361,7 +359,7 @@ void MkldnnTester::run(const TestConfig& dnn, clearBotDiffs(); for (size_t i = 0; i < iter_; ++i) { - VLOG(DNN_TESTS) << "Check Iteration " << i; + VLOG(MKLDNN_TESTS) << "Check Iteration " << i; runOnce(); } } diff --git a/paddle/gserver/tests/MkldnnTester.h b/paddle/gserver/tests/MkldnnTester.h index 8b3049b5c2..7d1db870d1 100644 --- a/paddle/gserver/tests/MkldnnTester.h +++ b/paddle/gserver/tests/MkldnnTester.h @@ -58,7 +58,7 @@ public: iter_ = iter; eps_ = epsilon; log_ = false; - lvl_ = DNN_ALL; + lvl_ = MKLDNN_ALL; } ~MkldnnTester() {} @@ -72,7 +72,7 @@ public: size_t iter = 3, float epsilon = 1e-4, bool log = false, - int level = DNN_ALL); + int level = MKLDNN_ALL); void setLogLevel(int lvl) { lvl_ = lvl; } private: diff --git a/paddle/gserver/tests/test_Mkldnn.cpp b/paddle/gserver/tests/test_Mkldnn.cpp index 0516a059de..8e4a8595d3 100644 --- a/paddle/gserver/tests/test_Mkldnn.cpp +++ b/paddle/gserver/tests/test_Mkldnn.cpp @@ -23,7 +23,6 @@ using namespace paddle; // NOLINT DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(use_gpu); DECLARE_bool(use_mkldnn); -DECLARE_bool(use_mkldnn_wgt); struct testFCDesc { int bs; @@ -56,12 +55,12 @@ void testFcLayer(const testFCDesc& pm) { } TEST(MkldnnLayer, fcLayer) { - testFcLayer({2, 2, 3, 1, 1}); - testFcLayer({3, 7, 19, 1, 1}); - testFcLayer({8, 16, 32, 13, 13}); - testFcLayer({4, 12, 18, 13, 11}); - testFcLayer({2, 64, 32, 16, 16}); - testFcLayer({15, 3, 6, 16, 16}); + testFcLayer({/*bs*/ 2, /*ic*/ 2, /*oc*/ 3, /*ih*/ 1, /*iw*/ 1}); + testFcLayer({/*bs*/ 3, /*ic*/ 7, /*oc*/ 19, /*ih*/ 1, /*iw*/ 1}); + testFcLayer({/*bs*/ 8, /*ic*/ 16, /*oc*/ 32, /*ih*/ 13, /*iw*/ 13}); + testFcLayer({/*bs*/ 4, /*ic*/ 12, /*oc*/ 18, /*ih*/ 13, /*iw*/ 11}); + testFcLayer({/*bs*/ 2, /*ic*/ 64, /*oc*/ 32, /*ih*/ 16, /*iw*/ 16}); + testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16}); } // TODO(TJ): add branch test diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 3213df5186..da99e5bd53 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1626,15 +1626,14 @@ class FCLayer(LayerBase): for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) psize = self.config.size * input_layer.size + dims = [input_layer.size, self.config.size] format = self.inputs[input_index].format sparse = format == "csr" or format == "csc" if use_mkldnn: config_assert(not sparse, "MkldnnFCLayer do not support sparse format yet") - if use_mkldnn and use_mkldnn_wgt: - dims = [self.config.size, input_layer.size] - else: - dims = [input_layer.size, self.config.size] + if use_mkldnn_wgt: + dims = [self.config.size, input_layer.size] if sparse: psize = self.inputs[input_index].nnz else: From 78c3e1de65c31cdabd2af9dfb41f557f447afc27 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 9 Aug 2017 11:28:17 -0700 Subject: [PATCH 16/76] refactor --- paddle/framework/op_registry.h | 115 +++++++++++++++++---------------- 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 6c26183818..b3663f8bf7 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -307,22 +307,37 @@ class OpRegistry { } }; +class Registrar {}; + template -class OpRegisterHelper { +class OpRegistrar : public Registrar { public: - explicit OpRegisterHelper(const char* op_type) { + explicit OpRegistrar(const char* op_type) { OpRegistry::RegisterOp(op_type); } }; template -class GradOpRegisterHelper { +class GradOpRegistrar : public Registrar { public: - GradOpRegisterHelper(const char* op_type, const char* grad_op_type) { + GradOpRegistrar(const char* op_type, const char* grad_op_type) { OpRegistry::RegisterGradOp(op_type, grad_op_type); } }; +template +class OpKernelRegistrar : public Registrar { + public: + explicit OpKernelRegistrar(const char* op_type) { + ::paddle::framework::OperatorWithKernel::OpKernelKey key; + key.place_ = PlaceType(); + ::paddle::framework::OperatorWithKernel::AllOpKernels()[op_type][key].reset( + new KernelType); + } +}; + +int TouchRegistrar(const Registrar& registrar) { return 0; } + /** * check if MACRO is used in GLOBAL NAMESPACE. */ @@ -335,72 +350,58 @@ class GradOpRegisterHelper { /** * Macro to Register Operator. */ -#define REGISTER_OP(__op_type, __op_class, __op_maker_class) \ - STATIC_ASSERT_GLOBAL_NAMESPACE(__reg_op__##__op_type, \ - "REGISTER_OP must be in global namespace"); \ - static ::paddle::framework::OpRegisterHelper<__op_class, __op_maker_class> \ - __op_register_##__op_type##__(#__op_type); \ - int __op_register_##__op_type##_handle__() { return 0; } +#define REGISTER_OP(op_type, op_class, op_maker_class) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op__##op_type, "REGISTER_OP must be called in global namespace"); \ + static ::paddle::framework::OpRegistrar \ + __op_registrar_##op_type##__(#op_type); /** * Macro to Register Gradient Operator. */ -#define REGISTER_GRADIENT_OP(__op_type, __grad_op_type, __grad_op_class) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_gradient_op__##__op_type##__grad_op_type, \ - "REGISTER_GRADIENT_OP must be in global namespace"); \ - static ::paddle::framework::GradOpRegisterHelper<__grad_op_class> \ - __op_gradient_register_##__op_type##__grad_op_type##__(#__op_type, \ - #__grad_op_type); \ - int __op_gradient_register_##__op_type##__grad_op_type##_handle__() { \ - return 0; \ - } +#define REGISTER_GRADIENT_OP(op_type, grad_op_type, grad_op_class) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_gradient_op__##op_type##_##grad_op_type, \ + "REGISTER_GRADIENT_OP must be called in global namespace"); \ + static ::paddle::framework::GradOpRegistrar \ + __op_gradient_register_##op_type##_##grad_op_type##__(#op_type, \ + #grad_op_type); /** - * Macro to Forbid user register Gradient Operator. + * Macro to Register OperatorKernel. */ -#define NO_GRADIENT(__op_type) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_gradient_op__##__op_type##__op_type##_grad, \ - "NO_GRADIENT must be in global namespace") +#define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, kernel_class) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ + "REGISTER_OP_KERNEL must be called in global namespace"); \ + static ::paddle::framework::OpKernelRegistrar \ + __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); /** - * Macro to Register OperatorKernel. + * Macro to Forbid user register Gradient Operator. */ -#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, ...) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_op_kernel_##type##_##DEVICE_TYPE##__, \ - "REGISTER_OP_KERNEL must be in global namespace"); \ - struct __op_kernel_register__##type##__##DEVICE_TYPE##__ { \ - __op_kernel_register__##type##__##DEVICE_TYPE##__() { \ - ::paddle::framework::OperatorWithKernel::OpKernelKey key; \ - key.place_ = PlaceType(); \ - ::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \ - .reset(new __VA_ARGS__()); \ - } \ - }; \ - static __op_kernel_register__##type##__##DEVICE_TYPE##__ \ - __reg_kernel_##type##__##DEVICE_TYPE##__; \ - int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; } - -// (type, KernelType) -#define REGISTER_OP_GPU_KERNEL(type, ...) \ - REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__) - -// (type, KernelType) -#define REGISTER_OP_CPU_KERNEL(type, ...) \ - REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) +#define NO_GRADIENT(op_type) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_gradient_op__##op_type##_##op_type##_grad, \ + "NO_GRADIENT must be called in global namespace") + +#define REGISTER_OP_GPU_KERNEL(op_type, kernel_class) \ + REGISTER_OP_KERNEL(op_type, GPU, ::paddle::platform::GPUPlace, kernel_class) + +#define REGISTER_OP_CPU_KERNEL(op_type, kernel_class) \ + REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, kernel_class) /** * Macro to mark what Operator and Kernel we will use and tell the compiler to * link them into target. */ -#define USE_OP_WITHOUT_KERNEL(op_type) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __use_op_without_kernel_##op_type, \ - "USE_OP_WITHOUT_KERNEL must be in global namespace"); \ - extern int __op_register_##op_type##_handle__(); \ - static int __use_op_ptr_##op_type##_without_kernel__ \ +#define USE_OP_ITSELF(op_type) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __use_op_itself_##op_type, \ + "USE_OP_ITSELF must be called in global namespace"); \ + extern ::paddle::framework::OpRegistrar \ + __op_registrar_##op_type##__; \ + static int __use_op_ptr_##op_type##_without_kernel__ \ __attribute__((unused)) = __op_register_##op_type##_handle__() #define USE_OP_KERNEL(op_type, DEVICE_TYPE) \ @@ -413,8 +414,8 @@ class GradOpRegisterHelper { __op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__() // use Operator with only cpu kernel. -#define USE_OP_CPU(op_type) \ - USE_OP_WITHOUT_KERNEL(op_type); \ +#define USE_OP_CPU(op_type) \ + USE_OP_ITSELF(op_type); \ USE_OP_KERNEL(op_type, CPU) #ifdef PADDLE_ONLY_CPU From e14a4541dd8f85a49ee3c42429f0f663864f1e0a Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 9 Aug 2017 13:16:08 -0700 Subject: [PATCH 17/76] Refactor registry macro --- paddle/framework/op_registry.h | 102 ++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 46 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index b3663f8bf7..0ac3ffda28 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -307,7 +307,10 @@ class OpRegistry { } }; -class Registrar {}; +class Registrar { + public: + void Touch() {} +}; template class OpRegistrar : public Registrar { @@ -336,8 +339,6 @@ class OpKernelRegistrar : public Registrar { } }; -int TouchRegistrar(const Registrar& registrar) { return 0; } - /** * check if MACRO is used in GLOBAL NAMESPACE. */ @@ -354,28 +355,40 @@ int TouchRegistrar(const Registrar& registrar) { return 0; } STATIC_ASSERT_GLOBAL_NAMESPACE( \ __reg_op__##op_type, "REGISTER_OP must be called in global namespace"); \ static ::paddle::framework::OpRegistrar \ - __op_registrar_##op_type##__(#op_type); + __op_registrar_##op_type##__(#op_type); \ + int TouchOpRegistrar_##op_type() { \ + __op_registrar_##op_type##__.Touch(); \ + return 0; \ + } /** * Macro to Register Gradient Operator. */ -#define REGISTER_GRADIENT_OP(op_type, grad_op_type, grad_op_class) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_gradient_op__##op_type##_##grad_op_type, \ - "REGISTER_GRADIENT_OP must be called in global namespace"); \ - static ::paddle::framework::GradOpRegistrar \ - __op_gradient_register_##op_type##_##grad_op_type##__(#op_type, \ - #grad_op_type); +#define REGISTER_GRADIENT_OP(op_type, grad_op_type, grad_op_class) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_gradient_op__##op_type##_##grad_op_type, \ + "REGISTER_GRADIENT_OP must be called in global namespace"); \ + static ::paddle::framework::GradOpRegistrar \ + __op_gradient_registrar_##op_type##_##grad_op_type##__(#op_type, \ + #grad_op_type); \ + int TouchOpGradientRegister_##op_type() { \ + __op_gradient_registrar_##op_type##_##grad_op_type##__.Touch(); \ + return 0; \ + } /** * Macro to Register OperatorKernel. */ -#define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, kernel_class) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ - "REGISTER_OP_KERNEL must be called in global namespace"); \ - static ::paddle::framework::OpKernelRegistrar \ - __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); +#define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, ...) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ + "REGISTER_OP_KERNEL must be called in global namespace"); \ + static ::paddle::framework::OpKernelRegistrar \ + __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); \ + int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \ + __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch(); \ + return 0; \ + } /** * Macro to Forbid user register Gradient Operator. @@ -385,44 +398,41 @@ int TouchRegistrar(const Registrar& registrar) { return 0; } __reg_gradient_op__##op_type##_##op_type##_grad, \ "NO_GRADIENT must be called in global namespace") -#define REGISTER_OP_GPU_KERNEL(op_type, kernel_class) \ - REGISTER_OP_KERNEL(op_type, GPU, ::paddle::platform::GPUPlace, kernel_class) +#define REGISTER_OP_GPU_KERNEL(op_type, ...) \ + REGISTER_OP_KERNEL(op_type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__) -#define REGISTER_OP_CPU_KERNEL(op_type, kernel_class) \ - REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, kernel_class) +#define REGISTER_OP_CPU_KERNEL(op_type, ...) \ + REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) /** * Macro to mark what Operator and Kernel we will use and tell the compiler to * link them into target. */ -#define USE_OP_ITSELF(op_type) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __use_op_itself_##op_type, \ - "USE_OP_ITSELF must be called in global namespace"); \ - extern ::paddle::framework::OpRegistrar \ - __op_registrar_##op_type##__; \ - static int __use_op_ptr_##op_type##_without_kernel__ \ - __attribute__((unused)) = __op_register_##op_type##_handle__() - -#define USE_OP_KERNEL(op_type, DEVICE_TYPE) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __use_op_kernel_##op_type##_##DEVICE_TYPE##__, \ - "USE_OP_KERNEL must be in global namespace"); \ - extern int __op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__(); \ - static int __use_op_ptr_##op_type##_##DEVICE_TYPE##_kernel__ \ - __attribute__((unused)) = \ - __op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__() - -// use Operator with only cpu kernel. -#define USE_OP_CPU(op_type) \ - USE_OP_ITSELF(op_type); \ - USE_OP_KERNEL(op_type, CPU) +#define USE_OP_ITSELF(op_type) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __use_op_itself_##op_type, \ + "USE_OP_ITSELF must be called in global namespace"); \ + extern int TouchOpRegistrar_##op_type(); \ + static int use_op_itself_##op_type##_ __attribute__((unused)) = \ + TouchOpRegistrar_##op_type##() + +#define USE_OP_KERNEL(op_type, DEVICE_TYPE) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __use_op_kernel_##op_type##_##DEVICE_TYPE##__, \ + "USE_OP_KERNEL must be in global namespace"); \ + extern int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE(); \ + static int use_op_kernel_##op_type##_##DEVICE_TYPE##_ \ + __attribute__((unused)) = \ + TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE##() #ifdef PADDLE_ONLY_CPU -#define USE_OP(op_type) USE_OP_CPU(op_type) +#define USE_OP(op_type) \ + USE_OP_ITSELF(op_type); \ + USE_OP_KERNEL(op_type, CPU) #else -#define USE_OP(op_type) \ - USE_OP_CPU(op_type); \ +#define USE_OP(op_type) \ + USE_OP_ITSELF(op_type); \ + USE_OP_KERNEL(op_type, CPU); \ USE_OP_KERNEL(op_type, GPU) #endif From ebb3a588359fdacccbd57e072c8df557419653a0 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 9 Aug 2017 13:42:49 -0700 Subject: [PATCH 18/76] Fix bug --- paddle/framework/op_registry.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 0ac3ffda28..3858cd7794 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -414,7 +414,7 @@ class OpKernelRegistrar : public Registrar { "USE_OP_ITSELF must be called in global namespace"); \ extern int TouchOpRegistrar_##op_type(); \ static int use_op_itself_##op_type##_ __attribute__((unused)) = \ - TouchOpRegistrar_##op_type##() + TouchOpRegistrar_##op_type() #define USE_OP_KERNEL(op_type, DEVICE_TYPE) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ @@ -423,7 +423,7 @@ class OpKernelRegistrar : public Registrar { extern int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE(); \ static int use_op_kernel_##op_type##_##DEVICE_TYPE##_ \ __attribute__((unused)) = \ - TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE##() + TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() #ifdef PADDLE_ONLY_CPU #define USE_OP(op_type) \ From 57c097841107a4d03100aa6dfe4fdf3e3ce165a1 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 9 Aug 2017 14:00:06 -0700 Subject: [PATCH 19/76] Fix bug --- paddle/framework/pybind.cc | 4 ++-- paddle/operators/recurrent_op_test.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 915ffb1c00..cc2bad6234 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -30,7 +30,7 @@ namespace py = pybind11; USE_OP(add_two); USE_OP_CPU(onehot_cross_entropy); -USE_OP_WITHOUT_KERNEL(fc); +USE_OP_ITSELF(fc); USE_OP(sgd); USE_OP(mul); USE_OP(mean); @@ -38,7 +38,7 @@ USE_OP(sigmoid); USE_OP(softmax); USE_OP(rowwise_add); USE_OP(fill_zeros_like); -USE_OP_WITHOUT_KERNEL(recurrent_op); +USE_OP_ITSELF(recurrent_op); USE_OP(uniform_random); namespace paddle { namespace framework { diff --git a/paddle/operators/recurrent_op_test.cc b/paddle/operators/recurrent_op_test.cc index 6ce28a2b52..0c9a343415 100644 --- a/paddle/operators/recurrent_op_test.cc +++ b/paddle/operators/recurrent_op_test.cc @@ -395,4 +395,4 @@ TEST(RecurrentOp, LinkMemories) { USE_OP(add_two); USE_OP(mul); -USE_OP_WITHOUT_KERNEL(recurrent_op); +USE_OP_ITSELF(recurrent_op); From 54fad18382741baa5b7965130a215daa137aa03d Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 9 Aug 2017 14:03:37 -0700 Subject: [PATCH 20/76] Fix error --- paddle/framework/op_registry.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 3858cd7794..3633ddb9df 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -425,14 +425,15 @@ class OpKernelRegistrar : public Registrar { __attribute__((unused)) = \ TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() -#ifdef PADDLE_ONLY_CPU -#define USE_OP(op_type) \ - USE_OP_ITSELF(op_type); \ +#define USE_CPU_OP(op_type) \ + USE_OP_ITSELF(op_type); \ USE_OP_KERNEL(op_type, CPU) + +#ifdef PADDLE_ONLY_CPU +#define USE_OP(op_type) USE_CPU_OP(op_type) #else -#define USE_OP(op_type) \ - USE_OP_ITSELF(op_type); \ - USE_OP_KERNEL(op_type, CPU); \ +#define USE_OP(op_type) \ + USE_CPU_OP(op_type); \ USE_OP_KERNEL(op_type, GPU) #endif From e4f058cec75d3e6b28a158b5215cbf394e282d84 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 9 Aug 2017 14:05:12 -0700 Subject: [PATCH 21/76] Fix error --- paddle/framework/pybind.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index cc2bad6234..a955191e98 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -29,7 +29,7 @@ limitations under the License. */ namespace py = pybind11; USE_OP(add_two); -USE_OP_CPU(onehot_cross_entropy); +USE_CPU_OP(onehot_cross_entropy); USE_OP_ITSELF(fc); USE_OP(sgd); USE_OP(mul); From f66d78680d9d52e9ea29796e5bcc1d9106772756 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 9 Aug 2017 14:48:19 -0700 Subject: [PATCH 22/76] Add macro USE_OP_GRADIENT() --- paddle/framework/op_registry.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 3633ddb9df..a3fd93290a 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -371,7 +371,7 @@ class OpKernelRegistrar : public Registrar { static ::paddle::framework::GradOpRegistrar \ __op_gradient_registrar_##op_type##_##grad_op_type##__(#op_type, \ #grad_op_type); \ - int TouchOpGradientRegister_##op_type() { \ + int TouchOpGradientRegistrar_##op_type() { \ __op_gradient_registrar_##op_type##_##grad_op_type##__.Touch(); \ return 0; \ } @@ -416,6 +416,14 @@ class OpKernelRegistrar : public Registrar { static int use_op_itself_##op_type##_ __attribute__((unused)) = \ TouchOpRegistrar_##op_type() +#define USE_OP_GRADIENT(op_type) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __use_op_gradient_##op_type, \ + "USE_OP_GRADIENT must be called in global namespace"); \ + extern int TouchOpGradientRegistrar_##op_type(); \ + static int use_op_gradient_##op_type##_ __attribute__((unused)) = \ + TouchOpGradientRegistrar_##op_type() + #define USE_OP_KERNEL(op_type, DEVICE_TYPE) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ __use_op_kernel_##op_type##_##DEVICE_TYPE##__, \ @@ -425,9 +433,10 @@ class OpKernelRegistrar : public Registrar { __attribute__((unused)) = \ TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() -#define USE_CPU_OP(op_type) \ - USE_OP_ITSELF(op_type); \ - USE_OP_KERNEL(op_type, CPU) +#define USE_CPU_OP(op_type) \ + USE_OP_ITSELF(op_type); \ + USE_OP_KERNEL(op_type, CPU); \ + USE_OP_GRADIENT(op_type) #ifdef PADDLE_ONLY_CPU #define USE_OP(op_type) USE_CPU_OP(op_type) From bc1459cefb292c4aff7a7cae43eeab175b40b722 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 9 Aug 2017 15:50:04 -0700 Subject: [PATCH 23/76] Unable macro `USE_OP_GRADIENT` temporarily --- paddle/framework/op_registry.h | 39 +++++++++++++++++++++++---------- paddle/framework/pybind.cc | 2 +- paddle/operators/sgd_op_test.cc | 2 +- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index a3fd93290a..9eeec37331 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -416,6 +416,12 @@ class OpKernelRegistrar : public Registrar { static int use_op_itself_##op_type##_ __attribute__((unused)) = \ TouchOpRegistrar_##op_type() +// TODO(jiayi): Most ops' gradient op have not been compeleted. So we use +// `NO_GRAD` to disable micro USE_OP_GRADIENT(op_type). Otherwise the code can't +// be compiled. `NO_GRAD` should be removed after all gradient ops are +// compeleted. +#define NO_GRAD +#ifndef NO_GRAD #define USE_OP_GRADIENT(op_type) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ __use_op_gradient_##op_type, \ @@ -423,28 +429,39 @@ class OpKernelRegistrar : public Registrar { extern int TouchOpGradientRegistrar_##op_type(); \ static int use_op_gradient_##op_type##_ __attribute__((unused)) = \ TouchOpGradientRegistrar_##op_type() +#else +#define USE_OP_GRADIENT(op_type) +#endif -#define USE_OP_KERNEL(op_type, DEVICE_TYPE) \ +#define USE_OP_DEVICE_KERNEL(op_type, DEVICE_TYPE) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ __use_op_kernel_##op_type##_##DEVICE_TYPE##__, \ - "USE_OP_KERNEL must be in global namespace"); \ + "USE_OP_DEVICE_KERNEL must be in global namespace"); \ extern int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE(); \ static int use_op_kernel_##op_type##_##DEVICE_TYPE##_ \ __attribute__((unused)) = \ TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() -#define USE_CPU_OP(op_type) \ - USE_OP_ITSELF(op_type); \ - USE_OP_KERNEL(op_type, CPU); \ - USE_OP_GRADIENT(op_type) - #ifdef PADDLE_ONLY_CPU -#define USE_OP(op_type) USE_CPU_OP(op_type) +#define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU) #else -#define USE_OP(op_type) \ - USE_CPU_OP(op_type); \ - USE_OP_KERNEL(op_type, GPU) +#define USE_OP_KERNEL(op_type) \ + USE_OP_DEVICE_KERNEL(op_type, CPU); \ + USE_OP_DEVICE_KERNEL(op_type, GPU) #endif +#define USE_NO_GRAD_OP(op_type) \ + USE_OP_ITSELF(op_type); \ + USE_OP_KERNEL(op_type) + +#define USE_CPU_OP(op_type) \ + USE_OP_ITSELF(op_type); \ + USE_OP_DEVICE_KERNEL(op_type, CPU); \ + USE_OP_GRADIENT(op_type) + +#define USE_OP(op_type) \ + USE_NO_GRAD_OP(op_type); \ + USE_OP_GRADIENT(op_type) + } // namespace framework } // namespace paddle diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index a955191e98..9c618ad900 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -31,7 +31,7 @@ namespace py = pybind11; USE_OP(add_two); USE_CPU_OP(onehot_cross_entropy); USE_OP_ITSELF(fc); -USE_OP(sgd); +USE_NO_GRAD_OP(sgd); USE_OP(mul); USE_OP(mean); USE_OP(sigmoid); diff --git a/paddle/operators/sgd_op_test.cc b/paddle/operators/sgd_op_test.cc index 75137259f5..b2a5487f12 100644 --- a/paddle/operators/sgd_op_test.cc +++ b/paddle/operators/sgd_op_test.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include #include -USE_OP(sgd); +USE_NO_GRAD_OP(sgd); TEST(SGDOp, GetOpProto) { auto& protos = paddle::framework::OpRegistry::protos(); auto it = protos.find("sgd"); From 046af5478a34db8b67158e50bcda7479d17fe6d9 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 9 Aug 2017 15:56:48 -0700 Subject: [PATCH 24/76] Move `Registrar`s into Touch functions --- paddle/framework/op_registry.h | 46 +++++++++++++++------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 9eeec37331..05f51d885c 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -307,10 +307,7 @@ class OpRegistry { } }; -class Registrar { - public: - void Touch() {} -}; +class Registrar {}; template class OpRegistrar : public Registrar { @@ -354,40 +351,37 @@ class OpKernelRegistrar : public Registrar { #define REGISTER_OP(op_type, op_class, op_maker_class) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ __reg_op__##op_type, "REGISTER_OP must be called in global namespace"); \ - static ::paddle::framework::OpRegistrar \ - __op_registrar_##op_type##__(#op_type); \ int TouchOpRegistrar_##op_type() { \ - __op_registrar_##op_type##__.Touch(); \ + static ::paddle::framework::OpRegistrar \ + __op_registrar_##op_type##__(#op_type); \ return 0; \ } /** * Macro to Register Gradient Operator. */ -#define REGISTER_GRADIENT_OP(op_type, grad_op_type, grad_op_class) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_gradient_op__##op_type##_##grad_op_type, \ - "REGISTER_GRADIENT_OP must be called in global namespace"); \ - static ::paddle::framework::GradOpRegistrar \ - __op_gradient_registrar_##op_type##_##grad_op_type##__(#op_type, \ - #grad_op_type); \ - int TouchOpGradientRegistrar_##op_type() { \ - __op_gradient_registrar_##op_type##_##grad_op_type##__.Touch(); \ - return 0; \ +#define REGISTER_GRADIENT_OP(op_type, grad_op_type, grad_op_class) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_gradient_op__##op_type##_##grad_op_type, \ + "REGISTER_GRADIENT_OP must be called in global namespace"); \ + int TouchOpGradientRegistrar_##op_type() { \ + static ::paddle::framework::GradOpRegistrar \ + __op_gradient_registrar_##op_type##_##grad_op_type##__(#op_type, \ + #grad_op_type); \ + return 0; \ } /** * Macro to Register OperatorKernel. */ -#define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, ...) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ - "REGISTER_OP_KERNEL must be called in global namespace"); \ - static ::paddle::framework::OpKernelRegistrar \ - __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); \ - int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \ - __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch(); \ - return 0; \ +#define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, ...) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ + "REGISTER_OP_KERNEL must be called in global namespace"); \ + int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \ + static ::paddle::framework::OpKernelRegistrar \ + __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); \ + return 0; \ } /** From aaddf5f6940768b827f03305e86da557ab24db65 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 10 Aug 2017 08:25:22 +0800 Subject: [PATCH 25/76] test on CI --- paddle/scripts/docker/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 44442be472..f70583c641 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -74,11 +74,11 @@ cat < Date: Wed, 9 Aug 2017 17:45:21 -0700 Subject: [PATCH 26/76] Fix bug --- paddle/framework/op_registry.h | 48 ++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 05f51d885c..aed244d61a 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -307,7 +307,10 @@ class OpRegistry { } }; -class Registrar {}; +class Registrar { + public: + void Touch() {} +}; template class OpRegistrar : public Registrar { @@ -351,37 +354,40 @@ class OpKernelRegistrar : public Registrar { #define REGISTER_OP(op_type, op_class, op_maker_class) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ __reg_op__##op_type, "REGISTER_OP must be called in global namespace"); \ + static ::paddle::framework::OpRegistrar \ + __op_registrar_##op_type##__(#op_type); \ int TouchOpRegistrar_##op_type() { \ - static ::paddle::framework::OpRegistrar \ - __op_registrar_##op_type##__(#op_type); \ + __op_registrar_##op_type##__.Touch(); \ return 0; \ } /** * Macro to Register Gradient Operator. */ -#define REGISTER_GRADIENT_OP(op_type, grad_op_type, grad_op_class) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_gradient_op__##op_type##_##grad_op_type, \ - "REGISTER_GRADIENT_OP must be called in global namespace"); \ - int TouchOpGradientRegistrar_##op_type() { \ - static ::paddle::framework::GradOpRegistrar \ - __op_gradient_registrar_##op_type##_##grad_op_type##__(#op_type, \ - #grad_op_type); \ - return 0; \ +#define REGISTER_GRADIENT_OP(op_type, grad_op_type, grad_op_class) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_gradient_op__##op_type##_##grad_op_type, \ + "REGISTER_GRADIENT_OP must be called in global namespace"); \ + static ::paddle::framework::GradOpRegistrar \ + __op_gradient_registrar_##op_type##_##grad_op_type##__(#op_type, \ + #grad_op_type); \ + int TouchOpGradientRegistrar_##op_type() { \ + __op_gradient_registrar_##op_type##_##grad_op_type##__.Touch(); \ + return 0; \ } /** * Macro to Register OperatorKernel. */ -#define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, ...) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ - "REGISTER_OP_KERNEL must be called in global namespace"); \ - int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \ - static ::paddle::framework::OpKernelRegistrar \ - __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); \ - return 0; \ +#define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, ...) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ + "REGISTER_OP_KERNEL must be called in global namespace"); \ + static ::paddle::framework::OpKernelRegistrar \ + __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); \ + int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \ + __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch(); \ + return 0; \ } /** @@ -436,6 +442,8 @@ class OpKernelRegistrar : public Registrar { __attribute__((unused)) = \ TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() +// TODO(jiayi): The following macros seems ugly, do we have better method? + #ifdef PADDLE_ONLY_CPU #define USE_OP_KERNEL(op_type) USE_OP_DEVICE_KERNEL(op_type, CPU) #else From 3736e3dfdfdff91e6fc4cc6f4fcb68f57cd61919 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 10 Aug 2017 08:52:10 +0800 Subject: [PATCH 27/76] add python unit test dependencies --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 8ac123bf9c..c7fbe12c1b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,7 +64,7 @@ RUN pip install --upgrade pip && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ - pip install rarfile + pip install rarfile 'scipy>=0.19.0' 'nltk>=3.2.2' # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 From 6cd1617129620c88c84dcfe55f1e21e1882ef7e0 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 10 Aug 2017 09:20:16 +0800 Subject: [PATCH 28/76] add pip install opencv-python --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c7fbe12c1b..5d4de6e30d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,7 +64,7 @@ RUN pip install --upgrade pip && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ - pip install rarfile 'scipy>=0.19.0' 'nltk>=3.2.2' + pip install rarfile opencv-python 'scipy>=0.19.0' 'nltk>=3.2.2' # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 From 7d977e885ee22da42bd38731b90786fbc594f6eb Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 10 Aug 2017 09:22:49 +0800 Subject: [PATCH 29/76] add pip install opencv-python --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c7fbe12c1b..5d4de6e30d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,7 +64,7 @@ RUN pip install --upgrade pip && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ - pip install rarfile 'scipy>=0.19.0' 'nltk>=3.2.2' + pip install rarfile opencv-python 'scipy>=0.19.0' 'nltk>=3.2.2' # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 From ca6c29635682ce1ebf4d42d7f9f0b94c2c88f6f4 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 10 Aug 2017 09:24:17 +0800 Subject: [PATCH 30/76] update --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 5d4de6e30d..0d0c88f40c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,7 +64,7 @@ RUN pip install --upgrade pip && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ - pip install rarfile opencv-python 'scipy>=0.19.0' 'nltk>=3.2.2' + pip install opencv-python rarfile 'scipy>=0.19.0' 'nltk>=3.2.2' # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 From 7a56d46a8a1040773c3d4e27bc111124eae95bae Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 10 Aug 2017 11:21:03 +0800 Subject: [PATCH 31/76] Rename PROJ_ROOT to PADDLE_SOURCE_DIR and PROJ_BINARY_ROOT to PADDLE_BINARY_DIR --- CMakeLists.txt | 8 ++-- cmake/configure.cmake | 2 +- cmake/cpplint.cmake | 2 +- cmake/package.cmake | 2 +- cmake/util.cmake | 4 +- cmake/version.cmake | 2 +- doc/templates/conf.py.cn.in | 6 +-- doc/templates/conf.py.en.in | 6 +-- paddle/api/CMakeLists.txt | 14 +++--- paddle/capi/tests/CMakeLists.txt | 4 +- paddle/gserver/tests/CMakeLists.txt | 24 +++++----- paddle/math/CMakeLists.txt | 8 ++-- paddle/pserver/test/CMakeLists.txt | 6 +-- paddle/trainer/tests/CMakeLists.txt | 48 +++++++++---------- paddle/utils/tests/CMakeLists.txt | 2 +- proto/CMakeLists.txt | 4 +- python/CMakeLists.txt | 8 ++-- .../tests/CMakeLists.txt | 14 +++--- python/setup.py.in | 14 +++--- 19 files changed, 89 insertions(+), 89 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b174831109..72a9165431 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,8 +14,8 @@ cmake_minimum_required(VERSION 3.0) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") -set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) -set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}) +set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) include(system) @@ -121,8 +121,8 @@ include(version) # set PADDLE_VERSION include(coveralls) # set code coverage -include_directories("${PROJ_ROOT}") -include_directories("${PROJ_ROOT}/paddle/cuda/include") +include_directories("${PADDLE_SOURCE_DIR}") +include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c") include_directories(${Boost_INCLUDE_DIRS}) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 2ac0989546..209f9078a6 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -129,7 +129,7 @@ if(WITH_GOLANG) add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide COMMAND env GOPATH=${GOPATH} ${GLIDE} install COMMAND touch ${CMAKE_BINARY_DIR}/glide - DEPENDS ${PROJ_ROOT}/go/glide.lock + DEPENDS ${PADDLE_SOURCE_DIR}/go/glide.lock WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go" ) diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 5184f0815f..8d5d533126 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -52,7 +52,7 @@ macro(add_style_check_target TARGET_NAME) if(SOURCES_LIST) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py" + COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/scripts/cpplint.py" "--filter=${STYLE_FILTER}" ${SOURCES_LIST} COMMENT "cpplint: Checking source code style" diff --git a/cmake/package.cmake b/cmake/package.cmake index ff49a2d08e..79e02147f3 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -12,7 +12,7 @@ set(CPACK_PACKAGE_DESCRIPTION "") set(CPACK_DEBIAN_PACKAGE_DEPENDS "libpython2.7-dev, libstdc++6, python-pip, curl, libgfortran3, python-pip-whl") set(CPACK_DEBIAN_PACKAGE_SECTION Devel) set(CPACK_DEBIAN_PACKAGE_VERSION ${PADDLE_VERSION}) -set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJ_ROOT}/paddle/scripts/deb/postinst") +set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PADDLE_SOURCE_DIR}/paddle/scripts/deb/postinst") #set(CPACK_GENERATOR "DEB") # Start cpack include (CMakePackageConfigHelpers) diff --git a/cmake/util.cmake b/cmake/util.cmake index 4a27623b7f..0da4969d31 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -141,8 +141,8 @@ endmacro() function(create_resources res_file output_file) add_custom_command( OUTPUT ${output_file} - COMMAND python ARGS ${PROJ_ROOT}/cmake/make_resource.py ${res_file} ${output_file} - DEPENDS ${res_file} ${PROJ_ROOT}/cmake/make_resource.py) + COMMAND python ARGS ${PADDLE_SOURCE_DIR}/cmake/make_resource.py ${res_file} ${output_file} + DEPENDS ${res_file} ${PADDLE_SOURCE_DIR}/cmake/make_resource.py) endfunction() diff --git a/cmake/version.cmake b/cmake/version.cmake index ac1583a24c..cde650128a 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -4,7 +4,7 @@ set(tmp_version "HEAD") while ("${PADDLE_VERSION}" STREQUAL "") execute_process( COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version} - WORKING_DIRECTORY ${PROJ_ROOT} + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} OUTPUT_VARIABLE GIT_TAG_NAME RESULT_VARIABLE GIT_RESULT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in index 673948dfe7..41b35b5b23 100644 --- a/doc/templates/conf.py.cn.in +++ b/doc/templates/conf.py.cn.in @@ -13,7 +13,7 @@ # serve to show the default. import sys import os, subprocess -sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) +sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) import shlex from recommonmark import parser, transform import paddle @@ -24,7 +24,7 @@ AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] +templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"] # -- General configuration ------------------------------------------------ @@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] +html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static'] # Output file base name for HTML help builder. htmlhelp_basename = project + 'doc' diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in index b6b50b7dcd..5822c2481d 100644 --- a/doc/templates/conf.py.en.in +++ b/doc/templates/conf.py.en.in @@ -13,7 +13,7 @@ # serve to show the default. import sys import os, subprocess -sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) +sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) import shlex from recommonmark import parser, transform import paddle @@ -25,7 +25,7 @@ AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] +templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"] # -- General configuration ------------------------------------------------ @@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] +html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static'] # Output file base name for HTML help builder. htmlhelp_basename = project + 'doc' diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index 7a1e8b8b26..d7b3d2bdec 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -19,9 +19,9 @@ add_library(paddle_api STATIC ${API_SOURCES}) add_dependencies(paddle_api paddle_proto paddle_trainer_lib) INCLUDE(${SWIG_USE_FILE}) -INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle) +INCLUDE_DIRECTORIES(${PADDLE_SOURCE_DIR}/paddle) -FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py) +FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py) SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON) @@ -79,16 +79,16 @@ SWIG_LINK_LIBRARIES(swig_paddle ${START_END} ) -add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle +add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PADDLE_SOURCE_DIR}/paddle/py_paddle + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PADDLE_SOURCE_DIR}/paddle/py_paddle COMMAND ${CMAKE_COMMAND} -E touch .timestamp - WORKING_DIRECTORY ${PROJ_ROOT}/paddle + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle DEPENDS _swig_paddle ) # TODO(yuyang18) : make wheel name calculated by cmake -add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so) +add_custom_target(python_api_wheel ALL DEPENDS ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so) if(WITH_TESTING) IF(NOT PY_PIP_FOUND) diff --git a/paddle/capi/tests/CMakeLists.txt b/paddle/capi/tests/CMakeLists.txt index d73f6b7733..8208808b94 100644 --- a/paddle/capi/tests/CMakeLists.txt +++ b/paddle/capi/tests/CMakeLists.txt @@ -10,5 +10,5 @@ target_include_directories(capi_test_gradientMachine PUBLIC ${PADDLE_CAPI_INC_PATH}) target_link_libraries(capi_test_gradientMachine paddle_capi) add_test(NAME capi_test_gradientMachine - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/capi/tests) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/capi/tests) diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 209d0ab9c8..294d5f115d 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -9,7 +9,7 @@ add_unittest_without_exec(test_ProtoDataProvider # mkdir will get error. add_test(NAME test_ProtoDataProvider COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) ################# test_LayerGrad ####################### add_unittest_without_exec(test_LayerGrad @@ -92,8 +92,8 @@ if(WITH_PYTHON) test_PyDataProvider.cpp) add_test(NAME test_PyDataProvider - COMMAND .set_python_path.sh -d ./gserver/tests:${PROJ_ROOT}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ./gserver/tests:${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() ############### test_RecurrentLayer ####################### @@ -106,7 +106,7 @@ if(NOT WITH_DOUBLE) add_test(NAME test_WarpCTCLayer COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_WarpCTCLayer --warpctc_dir=${WARPCTC_LIB_DIR} - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() ############### test_RecurrentGradientMachine ############### @@ -116,20 +116,20 @@ add_unittest_without_exec(test_RecurrentGradientMachine test_RecurrentGradientMachine.cpp) add_test(NAME test_RecurrentGradientMachine COMMAND .set_python_path.sh -d - ${PROJ_ROOT}/python:${PROJ_ROOT}/paddle/gserver/tests + ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests ${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) add_unittest_without_exec(test_NetworkCompare test_NetworkCompare.cpp) if(WITH_GPU) add_test(NAME test_NetworkCompare - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) else() add_test(NAME test_NetworkCompare - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() @@ -137,6 +137,6 @@ add_unittest_without_exec(test_PyDataProvider2 test_PyDataProvider2.cpp) add_test(NAME test_PyDataProvider2 - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/paddle/gserver/tests:${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2 - WORKING_DIRECTORY ${PROJ_ROOT}/paddle + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/paddle/gserver/tests:${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2 + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle ) diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index 9981de6160..bf28092e82 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -15,13 +15,13 @@ file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_SOURCES . *.cpp) set(MATH_SOURCES - "${PROJ_ROOT}/paddle/math/BaseMatrix.cu" - "${PROJ_ROOT}/paddle/math/TrainingAlgorithmOp.cu" + "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu" + "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu" ${MATH_SOURCES}) if(NOT WITH_GPU) # then compile BaseMatrix.cu as c++ file - compile_cu_as_cpp("${PROJ_ROOT}/paddle/math/BaseMatrix.cu") - compile_cu_as_cpp("${PROJ_ROOT}/paddle/math/TrainingAlgorithmOp.cu") + compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu") + compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu") add_library(paddle_math STATIC ${MATH_SOURCES}) else() diff --git a/paddle/pserver/test/CMakeLists.txt b/paddle/pserver/test/CMakeLists.txt index 6e8f9c37f6..b66a00ba06 100644 --- a/paddle/pserver/test/CMakeLists.txt +++ b/paddle/pserver/test/CMakeLists.txt @@ -3,7 +3,7 @@ add_unittest_without_exec(socket_test SocketTest.cpp) add_test(NAME socket_test - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/socket_test --loop_time=10) ####################### test_ProtoServer #################### @@ -12,7 +12,7 @@ add_unittest_without_exec(test_ProtoServer IF(NOT ON_TRAVIS) add_test(NAME test_ProtoServer - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer) ENDIF(NOT ON_TRAVIS) @@ -24,5 +24,5 @@ ENDIF(NOT ON_TRAVIS) add_unittest_without_exec(test_ParameterServer2 test_ParameterServer2.cpp) add_test(NAME test_ParameterServer2 - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port -n 4 + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port -n 4 ${CMAKE_CURRENT_BINARY_DIR}/test_ParameterServer2) diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt index 08b2d8a38e..f01ad4142d 100644 --- a/paddle/trainer/tests/CMakeLists.txt +++ b/paddle/trainer/tests/CMakeLists.txt @@ -2,19 +2,19 @@ add_unittest_without_exec(test_Compare test_Compare.cpp) add_test(NAME test_Compare - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_Compare - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ################# test_Trainer ########################### add_unittest_without_exec(test_Trainer test_Trainer.cpp) add_test(NAME test_Trainer - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/paddle/trainer/tests/gen_proto_data.py && - ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/gen_proto_data.py && + ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_Trainer - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ############### test_TrainerOnePass ########################## if(WITH_PYTHON) @@ -23,60 +23,60 @@ if(WITH_PYTHON) add_unittest_without_exec(test_TrainerOnePass test_TrainerOnePass.cpp) add_test(NAME test_TrainerOnePass - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d - ${PROJ_ROOT}/python/:${PROJ_ROOT}/paddle/trainer/tests - ${PROJ_ROOT}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d + ${PADDLE_SOURCE_DIR}/python/:${PADDLE_SOURCE_DIR}/paddle/trainer/tests + ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) endif() ################ test_CompareTwoNets ###################### add_unittest_without_exec(test_CompareTwoNets test_CompareTwoNets.cpp) add_test(NAME test_CompareTwoNets - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets --config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ############### test_CompareTwoOpts ################### add_unittest_without_exec(test_CompareTwoOpts test_CompareTwoOpts.cpp) add_test(NAME test_CompareTwoOpts - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts --config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf --num_passes=1 --need_high_accuracy=0 - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ################# test_CompareSparse ################## add_unittest_without_exec(test_CompareSparse test_CompareSparse.cpp) if(NOT ON_TRAVIS) add_test(NAME test_CompareSparse - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ./.set_port.sh -p port -n 6 ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) endif() ################# test_recurrent_machine_generation ############### add_unittest_without_exec(test_recurrent_machine_generation test_recurrent_machine_generation.cpp) add_test(NAME test_recurrent_machine_generation - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_recurrent_machine_generation - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) #################### test_PyDataProviderWrapper ######################### add_unittest_without_exec(test_PyDataProviderWrapper test_PyDataProviderWrapper.cpp) add_test(NAME test_PyDataProviderWrapper - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d - ${PROJ_ROOT}/python/:${PROJ_ROOT}/paddle/trainer/tests + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d + ${PADDLE_SOURCE_DIR}/python/:${PADDLE_SOURCE_DIR}/paddle/trainer/tests ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProviderWrapper - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) #################### test_config_parser ######################### add_test(NAME test_config_parser - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/paddle/trainer/tests/config_parser_test.py - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/config_parser_test.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) diff --git a/paddle/utils/tests/CMakeLists.txt b/paddle/utils/tests/CMakeLists.txt index aa923b3553..c770ce1698 100644 --- a/paddle/utils/tests/CMakeLists.txt +++ b/paddle/utils/tests/CMakeLists.txt @@ -13,6 +13,6 @@ add_executable( link_paddle_exe(test_CustomStackTracePrint) if(NOT APPLE) add_test(NAME test_CustomStackTracePrint - COMMAND ${PROJ_ROOT}/paddle/utils/tests/test_CustomStackTracePrint.sh + COMMAND ${PADDLE_SOURCE_DIR}/paddle/utils/tests/test_CustomStackTracePrint.sh WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index e1cea8bd0d..6212c2e60a 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -9,13 +9,13 @@ foreach(filename ${proto_filenames}) get_filename_component(ABS_FIL ${filename} ABSOLUTE) get_filename_component(FIL_WE ${filename} NAME_WE) set(CUR_PROTO_GEN_PY - ${PROJ_ROOT}/paddle/python/paddle/proto/${FIL_WE}_pb2.py) + ${PADDLE_SOURCE_DIR}/paddle/python/paddle/proto/${FIL_WE}_pb2.py) set(PROTO_GEN_PY ${CUR_PROTO_GEN_PY} ${PROTO_GEN_PY}) add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY} COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS "--python_out=${PROJ_ROOT}/python/paddle/proto" + ARGS "--python_out=${PADDLE_SOURCE_DIR}/python/paddle/proto" "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} DEPENDS ${ABS_FIL} protoc) endforeach() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b5030da8e7..02e4f7c477 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -18,7 +18,7 @@ SET(COPY_PADDLE_MASTER "") if(WITH_GOLANG) SET(COPY_PADDLE_MASTER "copy_paddle_master") add_custom_command(TARGET ${COPY_PADDLE_MASTER} - COMMAND cp ${paddle_master_LIB_PATH} ${PROJ_ROOT}/python/paddle/v2/master/ + COMMAND cp ${paddle_master_LIB_PATH} ${PADDLE_SOURCE_DIR}/python/paddle/v2/master/ ) add_dependencies(copy_paddle_master paddle_master) endif(WITH_GOLANG) @@ -27,10 +27,10 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) -add_custom_command(OUTPUT ${PROJ_ROOT}/python/paddle/v2/framework/core.so - COMMAND cmake -E copy $ ${PROJ_ROOT}/python/paddle/v2/framework/core.so +add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so + COMMAND cmake -E copy $ ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so DEPENDS paddle_pybind) -add_custom_target(copy_paddle_pybind ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/framework/core.so) +add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so) add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp diff --git a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt index 6c860fd497..580aef935b 100644 --- a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt +++ b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt @@ -1,17 +1,17 @@ #################### test_config_parser ######################### add_test(NAME layers_test - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py - WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/layers_test.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle) add_test(NAME test_reset_hook - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py - WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle) add_paddle_exe(protobuf_equal ProtobufEqualMain.cpp) add_test(NAME test_layerHelpers COMMAND - ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} + ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal ) diff --git a/python/setup.py.in b/python/setup.py.in index 38f0a503be..4110c98318 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -45,14 +45,14 @@ setup(name='paddlepaddle', '': '${CMAKE_CURRENT_SOURCE_DIR}', # The paddle.v2.framework.proto will be generated while compiling. # So that package points to other directory. - 'paddle.v2.framework.proto': '${PROJ_BINARY_ROOT}/paddle/framework', - 'py_paddle': '${PROJ_ROOT}/paddle/py_paddle' + 'paddle.v2.framework.proto': '${PADDLE_BINARY_DIR}/paddle/framework', + 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle' }, - scripts=['${PROJ_BINARY_ROOT}/paddle/scripts/paddle'], + scripts=['${PADDLE_BINARY_DIR}/paddle/scripts/paddle'], distclass=BinaryDistribution, data_files=[('/usr/local/opt/paddle/bin', - ['${PROJ_BINARY_ROOT}/paddle/scripts/paddle_usage', - '${PROJ_BINARY_ROOT}/paddle/trainer/paddle_trainer', - '${PROJ_BINARY_ROOT}/paddle/trainer/paddle_merge_model', - '${PROJ_BINARY_ROOT}/paddle/pserver/paddle_pserver_main'])] + ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle_usage', + '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer', + '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model', + '${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main'])] ) From 82026fe8d952f197ae63964dd70442ede737c18b Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 10 Aug 2017 12:06:29 +0800 Subject: [PATCH 32/76] remove eigen tensor header file in dddim.h --- paddle/framework/ddim.h | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 5aa5af0c19..3cb59e1ed2 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -20,7 +20,6 @@ limitations under the License. */ #include #include "paddle/framework/dim.h" #include "paddle/platform/enforce.h" -#include "unsupported/Eigen/CXX11/Tensor" namespace paddle { namespace framework { From 55fac551078c10cc17dcc8b4c4e4dc700a6e790b Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 9 Aug 2017 21:09:52 -0700 Subject: [PATCH 33/76] Refine code according to reviewer's advices --- paddle/framework/op_registry.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 111709c64a..84bf325fed 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -309,6 +309,14 @@ class OpRegistry { class Registrar { public: + // In our design, various kinds of classes, e.g., operators and kernels, have + // their corresponding registry and registrar. The action of registration is + // in the constructor of a global registrar variable, which, however, are not + // used in the code that calls package framework, and would be removed from + // the generated binary file by the linker. To avoid such removal, we add + // Touch to all registrar classes and make USE_OP macros to call this + // method. So, as long as the callee code calls USE_OP, the global + // registrar variable won't be removed by the linker. void Touch() {} }; @@ -332,10 +340,9 @@ template class OpKernelRegistrar : public Registrar { public: explicit OpKernelRegistrar(const char* op_type) { - ::paddle::framework::OperatorWithKernel::OpKernelKey key; + OperatorWithKernel::OpKernelKey key; key.place_ = PlaceType(); - ::paddle::framework::OperatorWithKernel::AllOpKernels()[op_type][key].reset( - new KernelType); + OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KernelType); } }; @@ -349,7 +356,7 @@ class OpKernelRegistrar : public Registrar { msg) /** - * Macro to Register Operator. + * Macro to register Operator. */ #define REGISTER_OP(op_type, op_class, op_maker_class) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ @@ -362,7 +369,7 @@ class OpKernelRegistrar : public Registrar { } /** - * Macro to Register Gradient Operator. + * Macro to register Gradient Operator. */ #define REGISTER_GRADIENT_OP(op_type, grad_op_type, grad_op_class) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ @@ -377,7 +384,7 @@ class OpKernelRegistrar : public Registrar { } /** - * Macro to Register OperatorKernel. + * Macro to register OperatorKernel. */ #define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, ...) \ STATIC_ASSERT_GLOBAL_NAMESPACE( \ From c304e02813e0628acfbce0fb21239cca931483ca Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 10 Aug 2017 12:31:06 +0800 Subject: [PATCH 34/76] fix py_padde test --- CMakeLists.txt | 2 ++ cmake/generic.cmake | 2 +- python/CMakeLists.txt | 10 +++------- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b174831109..89e1fec566 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -164,10 +164,12 @@ if(WITH_GOLANG) add_subdirectory(go) endif(WITH_GOLANG) +set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") add_subdirectory(paddle) if(WITH_PYTHON) add_subdirectory(python) endif() + if(WITH_DOC) add_subdirectory(doc) endif() diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 9f907a9dc2..951642e70b 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PADDLE_PYTHON_LIB_DIR} + COMMAND env PYTHONPATH=${PROJ_ROOT}/paddle:${PADDLE_PYTHON_BUILD_DIR}/lib python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index fc8c6f6a42..684691d240 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,7 +1,3 @@ -set(OUTPUT_DIR - "${CMAKE_CURRENT_BINARY_DIR}/build") - -set(PADDLE_PYTHON_LIB_DIR "${OUTPUT_DIR}/lib") file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) @@ -35,13 +31,13 @@ add_custom_command(OUTPUT ${PROJ_ROOT}/python/paddle/v2/framework/core.so add_custom_target(copy_paddle_pybind ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/framework/core.so) -add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp +add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp + COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) add_custom_target(paddle_python ALL DEPENDS - ${OUTPUT_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel) + ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel) set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) From a475a57d9ba2d70477ef072a0bcf7c3254b4afeb Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 10 Aug 2017 13:02:43 +0800 Subject: [PATCH 35/76] rename files and classes, use uppercase of Mkldnn and Cpu --- paddle/gserver/CMakeLists.txt | 8 +-- .../layers/{MkldnnBase.h => MKLDNNBase.h} | 26 ++++----- .../{MkldnnFcLayer.cpp => MKLDNNFcLayer.cpp} | 22 ++++---- .../{MkldnnFcLayer.h => MKLDNNFcLayer.h} | 12 ++--- .../layers/{MkldnnLayer.h => MKLDNNLayer.h} | 22 ++++---- paddle/gserver/tests/CMakeLists.txt | 8 +-- .../{MkldnnTester.cpp => MKLDNNTester.cpp} | 54 +++++++++---------- .../tests/{MkldnnTester.h => MKLDNNTester.h} | 8 +-- .../{test_Mkldnn.cpp => test_MKLDNN.cpp} | 6 +-- 9 files changed, 83 insertions(+), 83 deletions(-) rename paddle/gserver/layers/{MkldnnBase.h => MKLDNNBase.h} (77%) rename paddle/gserver/layers/{MkldnnFcLayer.cpp => MKLDNNFcLayer.cpp} (94%) rename paddle/gserver/layers/{MkldnnFcLayer.h => MKLDNNFcLayer.h} (86%) rename paddle/gserver/layers/{MkldnnLayer.h => MKLDNNLayer.h} (88%) rename paddle/gserver/tests/{MkldnnTester.cpp => MKLDNNTester.cpp} (89%) rename paddle/gserver/tests/{MkldnnTester.h => MKLDNNTester.h} (95%) rename paddle/gserver/tests/{test_Mkldnn.cpp => test_MKLDNN.cpp} (96%) diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 1305d5438a..62cff9361c 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -25,13 +25,13 @@ filter_test(GSERVER_HEADER) filter_test(GSERVER_SOURCES) if(NOT WITH_MKLDNN) - file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "Mkldnn*.h") - file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "Mkldnn*.cpp") + file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") + file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") list(REMOVE_ITEM GSERVER_HEADER ${DNN_HEADER}) list(REMOVE_ITEM GSERVER_SOURCES ${DNN_SOURCES}) - message(STATUS "Skip compiling with Mkldnnlayers and MkldnnActivations") + message(STATUS "Skip compiling with MKLDNNLayers and MKLDNNActivations") else() - message(STATUS "Compile with Mkldnnlayers and MkldnnActivations") + message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations") endif() if(NOT WITH_GPU) diff --git a/paddle/gserver/layers/MkldnnBase.h b/paddle/gserver/layers/MKLDNNBase.h similarity index 77% rename from paddle/gserver/layers/MkldnnBase.h rename to paddle/gserver/layers/MKLDNNBase.h index 63fd67a850..4c0234e7b3 100644 --- a/paddle/gserver/layers/MkldnnBase.h +++ b/paddle/gserver/layers/MKLDNNBase.h @@ -30,26 +30,26 @@ typedef enum { * @brief MKLDNN CPU engine. * */ -class CpuEngine { +class CPUEngine { public: - static CpuEngine& Instance() { + static CPUEngine& Instance() { // Thread-safe in C++11. - static CpuEngine myInstance; + static CPUEngine myInstance; return myInstance; } // Disallow copy or move - CpuEngine(const CpuEngine&) = delete; // Copy constructor - CpuEngine(CpuEngine&&) = delete; // Move constructor - CpuEngine& operator=(const CpuEngine&) = delete; // Copy assignment - CpuEngine& operator=(CpuEngine&&) = delete; // Move assignment + CPUEngine(const CPUEngine&) = delete; // Copy constructor + CPUEngine(CPUEngine&&) = delete; // Move constructor + CPUEngine& operator=(const CPUEngine&) = delete; // Copy assignment + CPUEngine& operator=(CPUEngine&&) = delete; // Move assignment mkldnn::engine& getEngine() { return cpuEngine_; } protected: - CpuEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {} - // CpuEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {} - ~CpuEngine() {} + CPUEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {} + // CPUEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {} + ~CPUEngine() {} private: mkldnn::engine cpuEngine_; @@ -59,11 +59,11 @@ private: * @brief MKLDNN Stream. * */ -class MkldnnStream { +class MKLDNNStream { public: - MkldnnStream() : ready_(false) { resetState(); } + MKLDNNStream() : ready_(false) { resetState(); } - virtual ~MkldnnStream() {} + virtual ~MKLDNNStream() {} /** * @brief Submit stream diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp similarity index 94% rename from paddle/gserver/layers/MkldnnFcLayer.cpp rename to paddle/gserver/layers/MKLDNNFcLayer.cpp index f89db169ef..30f567eaf8 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "MkldnnFcLayer.h" +#include "MKLDNNFcLayer.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" @@ -24,11 +24,11 @@ typedef inner_product_backward_data fc_bwdData; namespace paddle { -REGISTER_LAYER(mkldnn_fc, MkldnnFcLayer); +REGISTER_LAYER(mkldnn_fc, MKLDNNFcLayer); -bool MkldnnFcLayer::init(const LayerMap& layerMap, +bool MKLDNNFcLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { - if (!MkldnnLayer::init(layerMap, parameterMap)) { + if (!MKLDNNLayer::init(layerMap, parameterMap)) { return false; } @@ -56,7 +56,7 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, return true; } -void MkldnnFcLayer::convertWeightsFromPaddle() { +void MKLDNNFcLayer::convertWeightsFromPaddle() { if (FLAGS_use_mkldnn_wgt) { return; } @@ -81,7 +81,7 @@ void MkldnnFcLayer::convertWeightsFromPaddle() { hasInitedWgt_ = true; } -void MkldnnFcLayer::convertWeightsToPaddle() { +void MKLDNNFcLayer::convertWeightsToPaddle() { MatrixPtr dnnWgt = weight_->getW(); MatrixPtr paddleWgt; dnnWgt->transpose(paddleWgt, true); @@ -92,7 +92,7 @@ void MkldnnFcLayer::convertWeightsToPaddle() { dnnWgtT->copyFrom(*paddleWgt); } -void MkldnnFcLayer::reshape() { +void MKLDNNFcLayer::reshape() { const Argument& input = getInput(0); int batchSize = input.getBatchSize(); if (bs_ == batchSize) { @@ -129,7 +129,7 @@ void MkldnnFcLayer::reshape() { convertWeightsFromPaddle(); } -void MkldnnFcLayer::resetFwd() { +void MKLDNNFcLayer::resetFwd() { bool hasBias = biases_ && biases_->getW(); real* iData = getInputValue(0)->getData(); real* oData = getOutputValue()->getData(); @@ -166,7 +166,7 @@ void MkldnnFcLayer::resetFwd() { pipelineFwd_.push_back(*fwd_); } -void MkldnnFcLayer::resetBwd() { +void MKLDNNFcLayer::resetBwd() { if (!needResetBwd_) { return; } @@ -231,7 +231,7 @@ void MkldnnFcLayer::resetBwd() { pipelineBwd_.push_back(*bwdData_); } -void MkldnnFcLayer::forward(PassType passType) { +void MKLDNNFcLayer::forward(PassType passType) { Layer::forward(passType); reshape(); @@ -253,7 +253,7 @@ void MkldnnFcLayer::forward(PassType passType) { } } -void MkldnnFcLayer::backward(const UpdateCallback& callback) { +void MKLDNNFcLayer::backward(const UpdateCallback& callback) { /* Do derivation */ { REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); backwardActivation(); diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h similarity index 86% rename from paddle/gserver/layers/MkldnnFcLayer.h rename to paddle/gserver/layers/MKLDNNFcLayer.h index c4c0fa1c41..dffae27d7b 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -14,17 +14,17 @@ limitations under the License. */ #pragma once -#include "MkldnnLayer.h" +#include "MKLDNNLayer.h" #include "mkldnn.hpp" namespace paddle { /** - * @brief A subclass of MkldnnLayer fc layer. + * @brief A subclass of MKLDNNLayer fc layer. * * The config file api is mkldnn_fc */ -class MkldnnFcLayer : public MkldnnLayer { +class MKLDNNFcLayer : public MKLDNNLayer { protected: // input layer size, can not be change after init size_t iLayerSize_; // == ic * ih * iw @@ -37,10 +37,10 @@ protected: std::unique_ptr biases_; public: - explicit MkldnnFcLayer(const LayerConfig& config) - : MkldnnLayer(config), hasInitedWgt_(false), hasSpatial_(true) {} + explicit MKLDNNFcLayer(const LayerConfig& config) + : MKLDNNLayer(config), hasInitedWgt_(false), hasSpatial_(true) {} - ~MkldnnFcLayer() {} + ~MKLDNNFcLayer() {} bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MKLDNNLayer.h similarity index 88% rename from paddle/gserver/layers/MkldnnLayer.h rename to paddle/gserver/layers/MKLDNNLayer.h index 620bdfc984..63e29f447e 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "Layer.h" -#include "MkldnnBase.h" +#include "MKLDNNBase.h" #include "mkldnn.hpp" DECLARE_bool(use_mkldnn); @@ -24,14 +24,14 @@ DECLARE_bool(use_mkldnn_wgt); namespace paddle { -class MkldnnLayer; -typedef std::shared_ptr MkldnnLayerPtr; +class MKLDNNLayer; +typedef std::shared_ptr MKLDNNLayerPtr; /** - * @brief Base class of Mkldnnlayer. + * @brief Base class of MKLDNNlayer. * */ -class MkldnnLayer : public Layer { +class MKLDNNLayer : public Layer { protected: // batch size int bs_; @@ -45,14 +45,14 @@ protected: // mkldnn engine, stream and primivtives mkldnn::engine engine_; - std::shared_ptr stream_; + std::shared_ptr stream_; std::shared_ptr fwd_; std::shared_ptr bwdWgt_; std::shared_ptr bwdData_; std::vector pipelineFwd_; std::vector pipelineBwd_; - // TODO(TJ): change below memory as MkldnnMatrixPtr type + // TODO(TJ): change below memory as MKLDNNMatrixPtr type std::shared_ptr inVal_; std::shared_ptr inGrad_; std::shared_ptr outVal_; @@ -63,7 +63,7 @@ protected: std::shared_ptr biasGrad_; public: - explicit MkldnnLayer(const LayerConfig& config) + explicit MKLDNNLayer(const LayerConfig& config) : Layer(config), bs_(0), ic_(0), @@ -79,7 +79,7 @@ public: bwdWgt_(nullptr), bwdData_(nullptr) {} - ~MkldnnLayer() {} + ~MKLDNNLayer() {} virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { @@ -90,8 +90,8 @@ public: CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." << "Please set WITH_MKLDNN=ON " << "and set use_mkldnn=True"; - stream_.reset(new MkldnnStream()); - engine_ = CpuEngine::Instance().getEngine(); + stream_.reset(new MKLDNNStream()); + engine_ = CPUEngine::Instance().getEngine(); // TODO(TJ): deivecId return true; diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index bcfc85aea0..ade5f633b4 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -20,11 +20,11 @@ add_test(NAME test_LayerGrad ########## test_Mkldnn layers and activations ########## if(WITH_MKLDNN) - add_unittest_without_exec(test_Mkldnn - test_Mkldnn.cpp - MkldnnTester.cpp + add_unittest_without_exec(test_MKLDNN + test_MKLDNN.cpp + MKLDNNTester.cpp LayerGradUtil.cpp) - add_test(NAME test_Mkldnn COMMAND test_Mkldnn) + add_test(NAME test_MKLDNN COMMAND test_MKLDNN) endif() ################ test_CRFLayerGrad #################### diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp similarity index 89% rename from paddle/gserver/tests/MkldnnTester.cpp rename to paddle/gserver/tests/MKLDNNTester.cpp index 9232e2fdcd..d91e4ed60c 100644 --- a/paddle/gserver/tests/MkldnnTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "MkldnnTester.h" -#include "paddle/gserver/layers/MkldnnBase.h" -#include "paddle/gserver/layers/MkldnnLayer.h" +#include "MKLDNNTester.h" +#include "paddle/gserver/layers/MKLDNNBase.h" +#include "paddle/gserver/layers/MKLDNNLayer.h" namespace paddle { // init data layer and test layer of both dnn and reference -void MkldnnTester::reset(const TestConfig& dnn, +void MKLDNNTester::reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize) { const bool trans = false; @@ -71,7 +71,7 @@ void MkldnnTester::reset(const TestConfig& dnn, setInputImgSize(); } -void MkldnnTester::setInputImgSize() { +void MKLDNNTester::setInputImgSize() { for (size_t n = 0; n < dataLayers_.size(); ++n) { for (size_t i = 0; i < dataLayers_[n].size(); ++i) { // TODO(TJ): fix me when concat and elewise ready @@ -82,7 +82,7 @@ void MkldnnTester::setInputImgSize() { } // init randome parameters of ref, and copy to mkldnn -void MkldnnTester::randomWgtDatas() { +void MKLDNNTester::randomWgtDatas() { EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); for (size_t i = 0; i < parameters_[REF].size(); ++i) { const VectorPtr& dnnValue = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); @@ -96,7 +96,7 @@ void MkldnnTester::randomWgtDatas() { } // random botdata of ref layer and copy same to mkldnn -void MkldnnTester::randomBotDatas() { +void MKLDNNTester::randomBotDatas() { CHECK_EQ(dataLayers_.size(), NUM); for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { dataLayers_[REF][i]->getOutputValue()->randomizeUniform(); @@ -107,14 +107,14 @@ void MkldnnTester::randomBotDatas() { } } -void MkldnnTester::randomTopDiffs() { +void MKLDNNTester::randomTopDiffs() { refLayer_->getOutputGrad()->randomizeUniform(); dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad())); VLOG(lvl_) << "Random dom Backward Input, TopDiff: "; printMatrix(refLayer_->getOutputGrad()); } -void MkldnnTester::checkForward() { +void MKLDNNTester::checkForward() { printTopDatas(); double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), testLayers_[REF]->getOutputValue()); @@ -122,7 +122,7 @@ void MkldnnTester::checkForward() { EXPECT_LE(fabs(delta), eps_); } -void MkldnnTester::checkBackwardData() { +void MKLDNNTester::checkBackwardData() { const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); @@ -141,13 +141,13 @@ void MkldnnTester::checkBackwardData() { } } -void MkldnnTester::checkBackwardWgts() { +void MKLDNNTester::checkBackwardWgts() { CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); vector dnnWgts; // used to temply save mkldnn weights saveWgt(parameters_[DNN], dnnWgts); - const MkldnnLayerPtr dnnlayer = - std::dynamic_pointer_cast(dnnLayer_); + const MKLDNNLayerPtr dnnlayer = + std::dynamic_pointer_cast(dnnLayer_); CHECK(dnnlayer); dnnlayer->convertWeightsToPaddle(); for (size_t i = 0; i < parameters_[DNN].size(); ++i) { @@ -166,7 +166,7 @@ void MkldnnTester::checkBackwardWgts() { restoreWgt(dnnWgts, parameters_[DNN]); } -void MkldnnTester::saveWgt(const vector& from, +void MKLDNNTester::saveWgt(const vector& from, vector& to) { const bool useGpu = false; to.resize(from.size()); @@ -177,7 +177,7 @@ void MkldnnTester::saveWgt(const vector& from, } } -void MkldnnTester::restoreWgt(const vector& from, +void MKLDNNTester::restoreWgt(const vector& from, vector& to) { CHECK_EQ(from.size(), to.size()); for (size_t i = 0; i < from.size(); ++i) { @@ -187,7 +187,7 @@ void MkldnnTester::restoreWgt(const vector& from, } // clear parameters grad -void MkldnnTester::clearWgtDiffs() { +void MKLDNNTester::clearWgtDiffs() { for (size_t n = 0; n < parameters_.size(); ++n) { for (size_t i = 0; i < parameters_[n].size(); ++i) { const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT); @@ -198,7 +198,7 @@ void MkldnnTester::clearWgtDiffs() { } } -void MkldnnTester::clearBotDiffs() { +void MKLDNNTester::clearBotDiffs() { // dnn and ref for (size_t n = 0; n < dataLayers_.size(); ++n) { // all inputs layers @@ -208,7 +208,7 @@ void MkldnnTester::clearBotDiffs() { } } -void MkldnnTester::clearBotDiffs(int n) { +void MKLDNNTester::clearBotDiffs(int n) { CHECK_LT(n, NUM); // all inputs layers for (size_t i = 0; i < dataLayers_[n].size(); ++i) { @@ -216,13 +216,13 @@ void MkldnnTester::clearBotDiffs(int n) { } } -void MkldnnTester::clearTopDatas() { +void MKLDNNTester::clearTopDatas() { for (size_t i = 0; i < testLayers_.size(); ++i) { testLayers_[i]->getOutputValue()->zeroMem(); } } -void MkldnnTester::printTopDatas() { +void MKLDNNTester::printTopDatas() { if (!log_) { return; } @@ -233,7 +233,7 @@ void MkldnnTester::printTopDatas() { } } -void MkldnnTester::printMatrix(const MatrixPtr& m) { +void MKLDNNTester::printMatrix(const MatrixPtr& m) { if (!log_) { return; } @@ -243,7 +243,7 @@ void MkldnnTester::printMatrix(const MatrixPtr& m) { VLOG(lvl_) << std::endl << ostr.str(); } -void MkldnnTester::printVector(const VectorPtr& v) { +void MKLDNNTester::printVector(const VectorPtr& v) { if (!log_) { return; } @@ -253,7 +253,7 @@ void MkldnnTester::printVector(const VectorPtr& v) { VLOG(lvl_) << std::endl << ostr.str(); } -double MkldnnTester::getDelta(const real* d1, +double MKLDNNTester::getDelta(const real* d1, const real* d2, size_t len, const float failRate, @@ -280,17 +280,17 @@ double MkldnnTester::getDelta(const real* d1, return (failCnt / (float)len) > failRate ? maxOut : delta / sum; } -double MkldnnTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) { +double MKLDNNTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) { CHECK_EQ(m1->getElementCnt(), m2->getElementCnt()); return getDelta(m1->getData(), m2->getData(), m1->getElementCnt()); } -double MkldnnTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) { +double MKLDNNTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) { CHECK_EQ(v1->getSize(), v2->getSize()); return getDelta(v1->getData(), v2->getData(), v1->getSize()); } -void MkldnnTester::runOnce() { +void MKLDNNTester::runOnce() { // test forward randomBotDatas(); dnnLayer_->forward(PASS_TRAIN); @@ -310,7 +310,7 @@ void MkldnnTester::runOnce() { clearBotDiffs(REF); } -void MkldnnTester::run(const TestConfig& dnn, +void MKLDNNTester::run(const TestConfig& dnn, const TestConfig& ref, size_t batchSize, size_t inputImgH, diff --git a/paddle/gserver/tests/MkldnnTester.h b/paddle/gserver/tests/MKLDNNTester.h similarity index 95% rename from paddle/gserver/tests/MkldnnTester.h rename to paddle/gserver/tests/MKLDNNTester.h index 7d1db870d1..d21f92d426 100644 --- a/paddle/gserver/tests/MkldnnTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "LayerGradUtil.h" -#include "paddle/gserver/layers/MkldnnBase.h" +#include "paddle/gserver/layers/MKLDNNBase.h" namespace paddle { @@ -25,7 +25,7 @@ namespace paddle { * @brief test the functionality of Mkldnnlayers * refer to paddle original function */ -class MkldnnTester { +class MKLDNNTester { enum { DNN = 0, REF = 1, @@ -54,14 +54,14 @@ protected: size_t ih_, iw_; public: - explicit MkldnnTester(size_t iter = 3, float epsilon = 1e-4) { + explicit MKLDNNTester(size_t iter = 3, float epsilon = 1e-4) { iter_ = iter; eps_ = epsilon; log_ = false; lvl_ = MKLDNN_ALL; } - ~MkldnnTester() {} + ~MKLDNNTester() {} public: void run(const TestConfig& dnn, diff --git a/paddle/gserver/tests/test_Mkldnn.cpp b/paddle/gserver/tests/test_MKLDNN.cpp similarity index 96% rename from paddle/gserver/tests/test_Mkldnn.cpp rename to paddle/gserver/tests/test_MKLDNN.cpp index 8e4a8595d3..e1d2270df2 100644 --- a/paddle/gserver/tests/test_Mkldnn.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include #include #include -#include "MkldnnTester.h" +#include "MKLDNNTester.h" #include "ModelConfig.pb.h" using namespace paddle; // NOLINT @@ -43,7 +43,7 @@ void testFcLayer(const testFCDesc& pm) { /* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)}); cfg.layerConfig.add_inputs(); - MkldnnTester tester; + MKLDNNTester tester; for (auto biasSize : {pm.oc, 0}) { cfg.biasSize = biasSize; TestConfig ref = cfg; @@ -54,7 +54,7 @@ void testFcLayer(const testFCDesc& pm) { } } -TEST(MkldnnLayer, fcLayer) { +TEST(MKLDNNLayer, FcLayer) { testFcLayer({/*bs*/ 2, /*ic*/ 2, /*oc*/ 3, /*ih*/ 1, /*iw*/ 1}); testFcLayer({/*bs*/ 3, /*ic*/ 7, /*oc*/ 19, /*ih*/ 1, /*iw*/ 1}); testFcLayer({/*bs*/ 8, /*ic*/ 16, /*oc*/ 32, /*ih*/ 13, /*iw*/ 13}); From 2d4c66d4b2bc723d2404d650b1adbd3b76a42b32 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 10 Aug 2017 13:42:27 +0800 Subject: [PATCH 36/76] add comments and todo lists --- paddle/gserver/layers/MKLDNNFcLayer.h | 3 +++ paddle/gserver/tests/MKLDNNTester.cpp | 12 +++++++----- paddle/gserver/tests/MKLDNNTester.h | 9 +++++---- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index dffae27d7b..7954852a23 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -29,7 +29,10 @@ protected: // input layer size, can not be change after init size_t iLayerSize_; // == ic * ih * iw + // if has already init the weight bool hasInitedWgt_; + + // if input layer has image size info (ih>1 && iw>1) bool hasSpatial_; // fc weight and bias diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index d91e4ed60c..99c8c4948c 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -123,7 +123,8 @@ void MKLDNNTester::checkForward() { } void MKLDNNTester::checkBackwardData() { - const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; + // TODO(TJ): uncomment me when batch norm ready + // const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad(); @@ -134,10 +135,11 @@ void MKLDNNTester::checkBackwardData() { double delta = compareMatrix(dnnDiff, refDiff); EXPECT_LE(fabs(delta), eps_); - if (isBN) { - // the other two inputs in batch norm are for moving mean and var - break; - } + // TODO(TJ): uncomment me when batch norm ready + // if (isBN) { + // // the other two inputs in batch norm are for moving mean and var + // break; + // } } } diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h index d21f92d426..522eeaf24b 100644 --- a/paddle/gserver/tests/MKLDNNTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -27,9 +27,9 @@ namespace paddle { */ class MKLDNNTester { enum { - DNN = 0, - REF = 1, - NUM = 2, + DNN = 0, // MKLDNN layer + REF = 1, // Reference layer + NUM = 2, // Number of total }; protected: @@ -107,7 +107,8 @@ private: * Get delta percent * if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the * max(diff/ref) - * else return sum(abs(a-b)) / sum(abs(b)) should smaller than eps + * else return sum(abs(a-b)) / sum(abs(b)) + * The return value should smaller than eps when passing. */ double getDelta(const real* d1, const real* d2, From fb5cd7f8238be3503290b35597dd3b60a8e33b17 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 10 Aug 2017 06:35:11 +0000 Subject: [PATCH 37/76] Refine the error logs. --- .../examples/model_inference/common/common.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/paddle/capi/examples/model_inference/common/common.h b/paddle/capi/examples/model_inference/common/common.h index a78522e4a7..e32f2f9836 100644 --- a/paddle/capi/examples/model_inference/common/common.h +++ b/paddle/capi/examples/model_inference/common/common.h @@ -3,18 +3,21 @@ #include #include -#define CHECK(stmt) \ - do { \ - paddle_error __err__ = stmt; \ - if (__err__ != kPD_NO_ERROR) { \ - fprintf(stderr, "Invoke paddle error %d \n" #stmt, __err__); \ - exit(__err__); \ - } \ +#define CHECK(stmt) \ + do { \ + paddle_error __err__ = stmt; \ + if (__err__ != kPD_NO_ERROR) { \ + fprintf(stderr, "Invoke paddle error %d in " #stmt "\n", __err__); \ + exit(__err__); \ + } \ } while (0) void* read_config(const char* filename, long* size) { FILE* file = fopen(filename, "r"); - if (file == NULL) return NULL; + if (file == NULL) { + fprintf(stderr, "Open %s error\n", filename); + return NULL; + } fseek(file, 0L, SEEK_END); *size = ftell(file); fseek(file, 0L, SEEK_SET); From c7a247b7afe2498be4442e84d394a73b076bfcff Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 10 Aug 2017 06:56:18 +0000 Subject: [PATCH 38/76] Support to load parameters from buffer in c-api. --- paddle/capi/Arguments.cpp | 12 ++++++ paddle/capi/arguments.h | 13 ++++++ paddle/capi/gradient_machine.cpp | 9 ++++ paddle/capi/gradient_machine.h | 9 ++++ .../gradientmachines/GradientMachine.cpp | 43 +++++++++++++++++++ .../gradientmachines/GradientMachine.h | 2 + .../gradientmachines/NeuralNetwork.cpp | 2 + paddle/parameter/Parameter.cpp | 40 +++++++++-------- paddle/parameter/Parameter.h | 5 +++ 9 files changed, 117 insertions(+), 18 deletions(-) diff --git a/paddle/capi/Arguments.cpp b/paddle/capi/Arguments.cpp index 8b81ec69e6..1ec403077e 100644 --- a/paddle/capi/Arguments.cpp +++ b/paddle/capi/Arguments.cpp @@ -90,6 +90,18 @@ paddle_error paddle_arguments_set_ids(paddle_arguments args, return kPD_NO_ERROR; } +paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, + uint64_t ID, + uint64_t frameHeight, + uint64_t frameWidth) { + if (args == nullptr) return kPD_NULLPTR; + auto a = castArg(args); + if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; + a->args[ID].setFrameHeight(frameHeight); + a->args[ID].setFrameWidth(frameWidth); + return kPD_NO_ERROR; +} + paddle_error paddle_arguments_set_sequence_start_pos(paddle_arguments args, uint64_t ID, uint32_t nestedLevel, diff --git a/paddle/capi/arguments.h b/paddle/capi/arguments.h index d71ea26a5d..ba49d692ad 100644 --- a/paddle/capi/arguments.h +++ b/paddle/capi/arguments.h @@ -111,6 +111,19 @@ PD_API paddle_error paddle_arguments_set_ids(paddle_arguments args, uint64_t ID, paddle_ivector ids); +/** + * @brief paddle_arguments_set_frame_shape Set the fram size of one argument + * in array, which index is `ID`. + * @param [in] args arguments array + * @param [in] ID array index + * @param [out] ids integer vector pointer + * @return paddle_error + */ +PD_API paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, + uint64_t ID, + uint64_t frameHeight, + uint64_t frameWidth); + /** * @brief PDArgsSetSequenceStartPos Set sequence start position vector of one * argument in array, which index is `ID`. diff --git a/paddle/capi/gradient_machine.cpp b/paddle/capi/gradient_machine.cpp index 00f76e0152..e2d2d30ddc 100644 --- a/paddle/capi/gradient_machine.cpp +++ b/paddle/capi/gradient_machine.cpp @@ -68,6 +68,15 @@ paddle_error paddle_gradient_machine_load_parameter_from_disk( return kPD_NO_ERROR; } +paddle_error paddle_gradient_machine_load_parameter_from_buffer( + paddle_gradient_machine machine, const char* buf, uint64_t length) { + auto m = cast(machine); + if (m == nullptr || buf == nullptr || m->machine == nullptr) + return kPD_NULLPTR; + m->machine->loadParameters(buf, length); + return kPD_NO_ERROR; +} + paddle_error paddle_gradient_machine_forward(paddle_gradient_machine machine, paddle_arguments inArgs, paddle_arguments outArgs, diff --git a/paddle/capi/gradient_machine.h b/paddle/capi/gradient_machine.h index d7e2dd9bf8..2426839050 100644 --- a/paddle/capi/gradient_machine.h +++ b/paddle/capi/gradient_machine.h @@ -45,6 +45,15 @@ PD_API paddle_error paddle_gradient_machine_create_for_inference( PD_API paddle_error paddle_gradient_machine_load_parameter_from_disk( paddle_gradient_machine machine, const char* path); +/** + * @brief Load parameter from buffer. + * @param machine Gradient Machine. + * @param buffer containing all parameters. + * @return paddle_error + */ +PD_API paddle_error paddle_gradient_machine_load_parameter_from_buffer( + paddle_gradient_machine machine, const char* buf, uint64_t length); + /** * @brief Forward a gradient machine * @param machine Gradient machine diff --git a/paddle/gserver/gradientmachines/GradientMachine.cpp b/paddle/gserver/gradientmachines/GradientMachine.cpp index b44e4dc202..b7678d9b2f 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.cpp +++ b/paddle/gserver/gradientmachines/GradientMachine.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "GradientMachine.h" +#include #include #include "paddle/utils/Logging.h" @@ -81,6 +82,48 @@ void GradientMachine::loadParameters(const std::string& dir) { } } +void GradientMachine::loadParameters(const char* buf, uint64_t length) { + LOG(INFO) << "Loading parameter from pre-load buffer"; + + CHECK_NOTNULL(buf); + CHECK_GE(length, static_cast(sizeof(uint64_t))); + + uint64_t numFiles = 0; + memcpy(&numFiles, buf, sizeof(uint64_t)); + uint64_t position = sizeof(uint64_t); + LOG(INFO) << "numFiles: " << numFiles << ", position: " << position; + + std::map offsets; + std::map lengths; + for (uint64_t i = 0; i < numFiles; i++) { + std::string filename(buf + position); + position += filename.size() + 1; + LOG(INFO) << "filename: " << filename << ", position: " << position; + uint64_t size = 0; + memcpy(&size, buf + position, sizeof(uint64_t)); + position += sizeof(uint64_t); + offsets[filename] = const_cast(buf + position); + lengths[filename] = size; + position += size; + CHECK_GE(length, position); + } + + CHECK_GE(offsets.size(), parameters_.size()); + + for (auto& para : parameters_) { + std::string filename = para->getName(); + if (para->isFullSize()) { + if (offsets.end() == offsets.find(filename)) { + para->loadMiss(filename); + } else { + std::istringstream stream( + std::string(offsets[filename], lengths[filename])); + para->load(stream); + } + } + } +} + void GradientMachine::randParameters() { LOG(INFO) << "Initing parameters.."; diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index f9c82a2bef..081518a9d2 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -221,6 +221,8 @@ public: void loadParameters(const std::string& dir); + void loadParameters(const char* buf, uint64_t length); + void randParameters(); virtual void getStats(real& cost, int64_t& numProcessed) { diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index cfa80a8936..148296d20b 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -24,6 +24,8 @@ limitations under the License. */ #include "paddle/gserver/layers/AgentLayer.h" #include "paddle/utils/Stat.h" +#include + namespace paddle { void parameterInitNN(int paramId, Parameter* para, diff --git a/paddle/parameter/Parameter.cpp b/paddle/parameter/Parameter.cpp index ebe36d4937..80dbb73a7d 100644 --- a/paddle/parameter/Parameter.cpp +++ b/paddle/parameter/Parameter.cpp @@ -314,27 +314,31 @@ bool Parameter::save(std::ostream& s) const { /** * Load parameter value from a file */ +bool Parameter::loadMiss(const std::string& filename) { + LOG(INFO) << "missing parameters [" << filename << "] while loading model."; + if (kMissParameterFail == FLAGS_load_missing_parameter_strategy) { + LOG(FATAL) << getName() << " missing, not allowed."; + return false; + } + if (kMissParameterRand == FLAGS_load_missing_parameter_strategy) { + LOG(INFO) << getName() << " missing, set to random."; + randomize(); + return true; + } + if (kMissParameterZero == FLAGS_load_missing_parameter_strategy) { + LOG(INFO) << getName() << " missing, set to zero."; + zeroMem(); + return true; + } + LOG(FATAL) << "unsupported load_missing_parameter_strategy: " + << FLAGS_load_missing_parameter_strategy; + return false; +} + bool Parameter::load(const std::string& filename) { std::ifstream fs(filename, std::ios_base::binary); if (!fs) { - LOG(INFO) << "missing parameters [" << filename << "] while loading model."; - if (kMissParameterFail == FLAGS_load_missing_parameter_strategy) { - LOG(FATAL) << getName() << " missing, not allowed."; - return false; - } - if (kMissParameterRand == FLAGS_load_missing_parameter_strategy) { - LOG(INFO) << getName() << " missing, set to random."; - randomize(); - return true; - } - if (kMissParameterZero == FLAGS_load_missing_parameter_strategy) { - LOG(INFO) << getName() << " missing, set to zero."; - zeroMem(); - return true; - } - LOG(FATAL) << "unsupported load_missing_parameter_strategy: " - << FLAGS_load_missing_parameter_strategy; - return false; + loadMiss(filename); } return load(fs); } diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h index 0bac76f068..21932f6b6e 100644 --- a/paddle/parameter/Parameter.h +++ b/paddle/parameter/Parameter.h @@ -201,6 +201,11 @@ public: */ bool save(std::ostream& s) const; + /** + * Fill parameter when file is missed + */ + bool loadMiss(const std::string& filename); + /** * Load parameter value from a file */ From 1505e46be89e9a717ff5f206bfc48c97682c0cef Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 10 Aug 2017 08:01:48 +0000 Subject: [PATCH 39/76] Refine the comments of c-api function, paddle_arguments_set_frame_shape. --- paddle/capi/arguments.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/capi/arguments.h b/paddle/capi/arguments.h index ba49d692ad..7c32524a00 100644 --- a/paddle/capi/arguments.h +++ b/paddle/capi/arguments.h @@ -116,7 +116,8 @@ PD_API paddle_error paddle_arguments_set_ids(paddle_arguments args, * in array, which index is `ID`. * @param [in] args arguments array * @param [in] ID array index - * @param [out] ids integer vector pointer + * @param [in] frameHeight maximum height of input images + * @param [in] frameWidth maximum width of input images * @return paddle_error */ PD_API paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, From 03799bdbfe63f89afd9b65ef4b59f9164f5d03bb Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 10 Aug 2017 16:51:01 +0800 Subject: [PATCH 40/76] Refine the unit test of convolution function. --- paddle/function/CMakeLists.txt | 2 + paddle/function/ConvOpTest.h | 244 ++++++++++++++++++++++++ paddle/function/DepthwiseConvOpTest.cpp | 37 ++++ paddle/function/GemmConvOpTest.cpp | 50 +++++ 4 files changed, 333 insertions(+) create mode 100644 paddle/function/ConvOpTest.h create mode 100644 paddle/function/DepthwiseConvOpTest.cpp create mode 100644 paddle/function/GemmConvOpTest.cpp diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 93304f7303..790e342fb9 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -38,10 +38,12 @@ if(WITH_GPU) add_simple_unittest(RowConvOpTest) add_simple_unittest(BlockExpandOpTest) add_simple_unittest(CropOpTest) + add_simple_unittest(DepthwiseConvOpTest) endif() add_simple_unittest(ConvOpTest) add_simple_unittest(Im2ColTest) +add_simple_unittest(GemmConvOpTest) endif() add_style_check_target(paddle_function ${h_files}) diff --git a/paddle/function/ConvOpTest.h b/paddle/function/ConvOpTest.h new file mode 100644 index 0000000000..d745afca56 --- /dev/null +++ b/paddle/function/ConvOpTest.h @@ -0,0 +1,244 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "FunctionTest.h" + +namespace paddle { + +template +void forward(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output) { + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); + test.run(); +} + +template +void backward_input(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output) { + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); + test.run(); +} + +template +void backward_filter(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output) { + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), ADD_TO); + test.run(); +} + +template +using Function = void (*)(Compare2Function& test, + const TensorShape& input, + const TensorShape& filter, + const TensorShape& output); + +/** + * \brief A basic convolution function test interface. + * + * \param conv1 type name of convolution function 1. + * \param conv2 type name of convolution function 2. + * \param function test function, can be one of the forward, backward_input + * backward_filter function. + * Example: + * 1. Compare GemmConv's CPU and GPU implementation: + * Convolution( + * "GemmConv-CPU", "GemmConv-GPU", forward); + */ +template +void Convolution(const std::string& conv1, + const std::string& conv2, + Function function) { + for (size_t batchSize : {1, 5}) { + for (size_t inputSize : {7, 14, 31}) { + for (size_t filterSize : {1, 3, 5}) { + for (size_t inputChannels : {3, 16}) { + for (size_t outputChannels : {3, 16}) { + if (outputChannels < inputChannels) continue; + for (size_t stride : {1, 2}) { + for (size_t padding : {0, 1}) { + if (padding >= filterSize) break; + size_t outputSize = + (inputSize - filterSize + 2 * padding + stride) / stride; + VLOG(3) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputSize + << " inputWidth=" << inputSize + << " outputChannels=" << outputChannels + << " filterHeight=" << filterSize + << " filterWidth=" << filterSize + << " outputHeight=" << outputSize + << " outputWidth=" << outputSize << " stride=" << stride + << " padding=" << padding; + + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", (size_t)1) + .set("algo", "auto")); + + TensorShape input{ + batchSize, inputChannels, inputSize, inputSize}; + TensorShape filter{ + outputChannels, inputChannels, filterSize, filterSize}; + TensorShape output{ + batchSize, outputChannels, outputSize, outputSize}; + + function(test, input, filter, output); + } + } + } + } + } + } + } +} + +/** + * \brief A convolution function test interface for + * image height is not equal image width. + */ +template +void Convolution2(const std::string& conv1, + const std::string& conv2, + Function function) { + for (size_t batchSize : {4}) { + for (size_t inputHeight : {7, 31}) { + for (size_t inputWidth : {10, 54}) { + for (size_t filterHeight : {1, 5}) { + for (size_t filterWidth : {3, 7}) { + for (size_t inputChannels : {7}) { + for (size_t outputChannels : {7}) { + size_t stride = 1; + size_t padding = 0; + size_t outputHeight = + (inputHeight - filterHeight + 2 * padding + stride) / + stride; + size_t outputWidth = + (inputWidth - filterWidth + 2 * padding + stride) / stride; + VLOG(3) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputHeight + << " inputWidth=" << inputWidth + << " outputChannels=" << outputChannels + << " filterHeight=" << filterHeight + << " filterWidth=" << filterWidth + << " outputHeight=" << outputHeight + << " outputWidth=" << outputWidth + << " stride=" << stride << " padding=" << padding; + + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", (size_t)1) + .set("algo", "auto")); + + TensorShape input{ + batchSize, inputChannels, inputHeight, inputWidth}; + TensorShape filter{ + outputChannels, inputChannels, filterHeight, filterWidth}; + TensorShape output{ + batchSize, outputChannels, outputHeight, outputWidth}; + + function(test, input, filter, output); + } + } + } + } + } + } + } +} + +/** + * \brief A convolution function test interface for depthwise convolution. + */ +template +void DepthwiseConvolution(const std::string& conv1, + const std::string& conv2, + Function function) { + for (size_t batchSize : {1, 32}) { + for (size_t inputSize : {7, 14, 54}) { + for (size_t filterSize : {3, 4}) { + for (size_t inputChannels : {32}) { + for (size_t outputChannels : {32, 64}) { + for (size_t stride : {1, 2}) { + for (size_t padding : {0, 1}) { + size_t outputSize = + (inputSize - filterSize + 2 * padding + stride) / stride; + VLOG(3) << " batchSize=" << batchSize + << " inputChannels=" << inputChannels + << " inputHeight=" << inputSize + << " inputWidth=" << inputSize + << " outputChannels=" << outputChannels + << " filterHeight=" << filterSize + << " filterWidth=" << filterSize + << " outputHeight=" << outputSize + << " outputWidth=" << outputSize << " stride=" << stride + << " padding=" << padding; + + std::vector paddings = {padding, padding}; + std::vector strides = {stride, stride}; + size_t groups = inputChannels; + Compare2Function test( + conv1, + conv2, + FuncConfig() + .set("paddings", paddings) + .set("strides", strides) + .set("groups", groups) + .set("algo", "auto")); + + TensorShape input{ + batchSize, inputChannels, inputSize, inputSize}; + TensorShape filter{groups, + outputChannels / groups, + inputChannels / groups, + filterSize, + filterSize}; + TensorShape output{ + batchSize, outputChannels, outputSize, outputSize}; + + function(test, input, filter, output); + } + } + } + } + } + } + } +} + +} // namespace paddle diff --git a/paddle/function/DepthwiseConvOpTest.cpp b/paddle/function/DepthwiseConvOpTest.cpp new file mode 100644 index 0000000000..f44ae0c342 --- /dev/null +++ b/paddle/function/DepthwiseConvOpTest.cpp @@ -0,0 +1,37 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "ConvOpTest.h" + +namespace paddle { + +#ifndef PADDLE_ONLY_CPU +TEST(DepthwiseConv, Forward) { + DepthwiseConvolution( + "GemmConv-CPU", "DepthwiseConv-GPU", forward); +} + +TEST(DepthwiseConv, BackwardInput) { + DepthwiseConvolution( + "GemmConvGradInput-CPU", "DepthwiseConvGradInput-GPU", backward_input); +} + +TEST(DepthwiseConv, BackwardFilter) { + DepthwiseConvolution( + "GemmConvGradFilter-CPU", "DepthwiseConvGradFilter-GPU", backward_filter); +} +#endif + +} // namespace paddle diff --git a/paddle/function/GemmConvOpTest.cpp b/paddle/function/GemmConvOpTest.cpp new file mode 100644 index 0000000000..5283d79a5a --- /dev/null +++ b/paddle/function/GemmConvOpTest.cpp @@ -0,0 +1,50 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "ConvOpTest.h" + +namespace paddle { + +TEST(GemmConv, NaiveConv) { + Convolution( + "NaiveConv-CPU", "GemmConv-CPU", forward); + Convolution2( + "NaiveConv-CPU", "GemmConv-CPU", forward); +} + +#ifndef PADDLE_ONLY_CPU +TEST(GemmConv, Forward) { + Convolution( + "GemmConv-CPU", "GemmConv-GPU", forward); + Convolution2( + "GemmConv-CPU", "GemmConv-GPU", forward); +} + +TEST(GemmConv, BackwardInput) { + Convolution( + "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", backward_input); + Convolution2( + "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", backward_input); +} + +TEST(GemmConv, BackwardFilter) { + Convolution( + "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", backward_filter); + Convolution2( + "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", backward_filter); +} +#endif + +} // namespace paddle From 1d74d16cca325e3c0b52a63d491f5f1a7466f3d5 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 10 Aug 2017 16:53:03 +0800 Subject: [PATCH 41/76] Remove the useless code. --- paddle/function/CMakeLists.txt | 1 - paddle/function/ConvOpTest.cpp | 306 --------------------------------- 2 files changed, 307 deletions(-) delete mode 100644 paddle/function/ConvOpTest.cpp diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 790e342fb9..7dfb6f61c5 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -41,7 +41,6 @@ if(WITH_GPU) add_simple_unittest(DepthwiseConvOpTest) endif() -add_simple_unittest(ConvOpTest) add_simple_unittest(Im2ColTest) add_simple_unittest(GemmConvOpTest) endif() diff --git a/paddle/function/ConvOpTest.cpp b/paddle/function/ConvOpTest.cpp deleted file mode 100644 index 7f32c73479..0000000000 --- a/paddle/function/ConvOpTest.cpp +++ /dev/null @@ -1,306 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "Function.h" -#include "FunctionTest.h" - -namespace paddle { - -enum TestType { - kForwardTest = 0, - kBackwardInputTest = 1, - kBackwardFilterTest = 2, -}; - -template -class ConvolutionTest { -public: - ConvolutionTest(const std::string& conv1, - const std::string& conv2, - TestType type, - bool useGroups = true, - std::string algo = "auto") { - for (size_t batchSize : {1, 32}) { - for (size_t inputSize : {7, 14, 54}) { - for (size_t filterSize : {1, 3, 5}) { - for (size_t inputChannels : {3, 64}) { - for (size_t outputChannels : {3, 64}) { - if (inputChannels > outputChannels) break; - size_t groups; - if (!useGroups) { - groups = 1; - } else { - if (outputChannels % inputChannels != 0) continue; - groups = inputChannels; - } - - for (size_t stride : {1, 2}) { - for (size_t padding : {0, 1}) { - if (padding >= filterSize) break; - size_t outputSize = - (inputSize - filterSize + 2 * padding + stride) / stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputSize - << " inputWidth=" << inputSize - << " outputChannels=" << outputChannels - << " filterHeight=" << filterSize - << " filterWidth=" << filterSize - << " outputHeight=" << outputSize - << " outputWidth=" << outputSize - << " stride=" << stride << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", groups) - .set("algo", algo)); - - TensorShape input{ - batchSize, inputChannels, inputSize, inputSize}; - - TensorShape filter; - if (groups > 1) - filter = TensorShape({groups, - outputChannels / groups, - inputChannels / groups, - filterSize, - filterSize}); - else - filter = TensorShape({outputChannels, - inputChannels, - filterSize, - filterSize}); - TensorShape output{ - batchSize, outputChannels, outputSize, outputSize}; - - if (type == kForwardTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.run(); - } else if (type == kBackwardInputTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); - test.run(); - } else if (type == kBackwardFilterTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), - ADD_TO); - test.run(); - } - } - } - } - } - } - } - } - } -}; - -// Mainly used to test cases where the height and width (input, filter) -// are not equal. -template -class ConvolutionTest2 { -public: - ConvolutionTest2(const std::string& conv1, - const std::string& conv2, - TestType type, - bool useGroups = true, - std::string algo = "auto") { - for (size_t batchSize : {16}) { - for (size_t inputHeight : {7, 31}) { - for (size_t inputWidth : {10, 54}) { - for (size_t filterHeight : {1, 5}) { - for (size_t filterWidth : {3, 7}) { - for (size_t inputChannels : {7}) { - for (size_t outputChannels : {7}) { - size_t groups; - if (!useGroups) { - groups = 1; - } else { - if (outputChannels % inputChannels != 0) continue; - groups = inputChannels; - } - - size_t stride = 1; - size_t padding = 0; - size_t outputHeight = - (inputHeight - filterHeight + 2 * padding + stride) / - stride; - size_t outputWidth = - (inputWidth - filterWidth + 2 * padding + stride) / - stride; - VLOG(3) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputHeight - << " inputWidth=" << inputWidth - << " outputChannels=" << outputChannels - << " filterHeight=" << filterHeight - << " filterWidth=" << filterWidth - << " outputHeight=" << outputHeight - << " outputWidth=" << outputWidth - << " stride=" << stride << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", groups) - .set("algo", algo)); - - TensorShape input{ - batchSize, inputChannels, inputHeight, inputWidth}; - - TensorShape filter; - if (groups > 1) - filter = TensorShape({groups, - outputChannels / groups, - inputChannels / groups, - filterHeight, - filterWidth}); - else - filter = TensorShape({outputChannels, - inputChannels, - filterHeight, - filterWidth}); - TensorShape output{ - batchSize, outputChannels, outputHeight, outputWidth}; - - if (type == kForwardTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.run(); - } else if (type == kBackwardInputTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO); - test.run(); - } else if (type == kBackwardFilterTest) { - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter), - ADD_TO); - test.run(); - } - } - } - } - } - } - } - } - } -}; - -// ======Start Convolution TEST====== - -TEST(Forward, GEMM) { - ConvolutionTest test( - "NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false); - ConvolutionTest2 test2( - "NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false); -} - -#ifndef PADDLE_ONLY_CPU -TEST(Forward, GEMM2) { - ConvolutionTest test( - "GemmConv-CPU", "GemmConv-GPU", kForwardTest, false); - ConvolutionTest2 test2( - "GemmConv-CPU", "GemmConv-GPU", kForwardTest, false); -} - -TEST(BackwardInput, GEMM) { - ConvolutionTest test( - "GemmConvGradInput-CPU", - "GemmConvGradInput-GPU", - kBackwardInputTest, - false); - ConvolutionTest2 test2( - "GemmConvGradInput-CPU", - "GemmConvGradInput-GPU", - kBackwardInputTest, - false); -} - -TEST(BackwardFilter, GEMM) { - ConvolutionTest test( - "GemmConvGradFilter-CPU", - "GemmConvGradFilter-GPU", - kBackwardFilterTest, - false); - ConvolutionTest2 test2( - "GemmConvGradFilter-CPU", - "GemmConvGradFilter-GPU", - kBackwardFilterTest, - false); -} -#endif -// ======End Convolution TEST====== - -// ======Start DepthwiseConvolution TEST====== - -// TODO(zhaolong) The depthwise convolution cpu test will be added when the cpu -// version of depthwiseConv is implemented. - -#ifndef PADDLE_ONLY_CPU - -TEST(DepthwiseConvForward, GEMM2) { - ConvolutionTest test( - "GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest); - ConvolutionTest2 test2( - "GemmConv-CPU", "DepthwiseConv-GPU", kForwardTest); -} - -TEST(DepthwiseConvBackwardInput, GEMM) { - ConvolutionTest test( - "GemmConvGradInput-CPU", - "DepthwiseConvGradInput-GPU", - kBackwardInputTest); - ConvolutionTest2 test2( - "GemmConvGradInput-CPU", - "DepthwiseConvGradInput-GPU", - kBackwardInputTest); -} - -TEST(DepthwiseConvBackwardFilter, GEMM) { - ConvolutionTest test( - "GemmConvGradFilter-CPU", - "DepthwiseConvGradFilter-GPU", - kBackwardFilterTest); - ConvolutionTest2 test2( - "GemmConvGradFilter-CPU", - "DepthwiseConvGradFilter-GPU", - kBackwardFilterTest); -} - -#endif -// ======End DepthwiseConvolution TEST====== - -} // namespace paddle From 459111020111b3159c04045cc48317cd418fe039 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 10 Aug 2017 16:54:22 +0800 Subject: [PATCH 42/76] Fix gaussian_random_op compile error * Should always use `dynload::` for cuda function. * Fix cublas.h without DSO load. --- paddle/operators/gaussian_random_op.cu | 4 ++-- paddle/platform/dynload/cublas.h | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index 54e4ae5d2b..c04637ae3e 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -40,8 +40,8 @@ class GaussianRandomKernel : public framework::OpKernel { &g, CURAND_RNG_PSEUDO_DEFAULT)); PADDLE_ENFORCE( platform::dynload::curandSetPseudoRandomGeneratorSeed(g, seed)); - curandGenerateNormal(g, data, framework::product(tensor->dims()), mean, - std); + platform::dynload::curandGenerateNormal( + g, data, framework::product(tensor->dims()), mean, std); } }; diff --git a/paddle/platform/dynload/cublas.h b/paddle/platform/dynload/cublas.h index c44b7240a8..aad8097dbb 100644 --- a/paddle/platform/dynload/cublas.h +++ b/paddle/platform/dynload/cublas.h @@ -48,13 +48,13 @@ extern void *cublas_dso_handle; }; \ extern DynLoad__##__name __name #else -#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ - struct DynLoad__##__name { \ - inline template \ - cublasStatus_t operator()(Args... args) { \ - return __name(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + inline cublasStatus_t operator()(Args... args) { \ + return __name(args...); \ + } \ + }; \ extern DynLoad__##__name __name #endif From 3df8ee1ff778abf341b6391bec9b6a95001e004d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 10 Aug 2017 18:22:41 +0800 Subject: [PATCH 43/76] use lib-python as python test dir --- cmake/generic.cmake | 2 +- python/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 951642e70b..d2aab938d4 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PROJ_ROOT}/paddle:${PADDLE_PYTHON_BUILD_DIR}/lib + COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 684691d240..0deff5ff08 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -34,6 +34,8 @@ add_custom_target(copy_paddle_pybind ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/f add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp + COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python + COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) add_custom_target(paddle_python ALL DEPENDS From d299528829a2ad022b11e7f05c7df1d585834372 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 10 Aug 2017 18:39:02 +0800 Subject: [PATCH 44/76] Add curandGenerateNormal to curand.h --- paddle/platform/dynload/curand.h | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/platform/dynload/curand.h b/paddle/platform/dynload/curand.h index d8c46bc41e..7bfe0778c7 100644 --- a/paddle/platform/dynload/curand.h +++ b/paddle/platform/dynload/curand.h @@ -55,6 +55,7 @@ extern void *curand_dso_handle; __macro(curandSetPseudoRandomGeneratorSeed); \ __macro(curandGenerateUniform); \ __macro(curandGenerateUniformDouble); \ + __macro(curandGenerateNormal); \ __macro(curandDestroyGenerator); CURAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP); From f4bb60ae37d8e6f1815d5c46ac30096aae04fcbf Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 10 Aug 2017 19:41:30 +0800 Subject: [PATCH 45/76] Refine NNPACKConvOpTest. --- paddle/function/ConvOpTest.h | 17 ++++- paddle/function/nnpack/NNPACKConvOpTest.cpp | 85 ++------------------- 2 files changed, 22 insertions(+), 80 deletions(-) diff --git a/paddle/function/ConvOpTest.h b/paddle/function/ConvOpTest.h index d745afca56..d8c3bb03b3 100644 --- a/paddle/function/ConvOpTest.h +++ b/paddle/function/ConvOpTest.h @@ -80,6 +80,12 @@ void Convolution(const std::string& conv1, for (size_t stride : {1, 2}) { for (size_t padding : {0, 1}) { if (padding >= filterSize) break; + + // NNPACK only supports stride = 1 if batchSize > 1 + if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") && + batchSize > 1 && stride > 1) + break; + size_t outputSize = (inputSize - filterSize + 2 * padding + stride) / stride; VLOG(3) << " batchSize=" << batchSize @@ -102,7 +108,7 @@ void Convolution(const std::string& conv1, .set("paddings", paddings) .set("strides", strides) .set("groups", (size_t)1) - .set("algo", "auto")); + .set("algo", (std::string) "auto")); TensorShape input{ batchSize, inputChannels, inputSize, inputSize}; @@ -163,7 +169,7 @@ void Convolution2(const std::string& conv1, .set("paddings", paddings) .set("strides", strides) .set("groups", (size_t)1) - .set("algo", "auto")); + .set("algo", (std::string) "auto")); TensorShape input{ batchSize, inputChannels, inputHeight, inputWidth}; @@ -196,6 +202,11 @@ void DepthwiseConvolution(const std::string& conv1, for (size_t outputChannels : {32, 64}) { for (size_t stride : {1, 2}) { for (size_t padding : {0, 1}) { + // NNPACK only supports stride = 1 if batchSize > 1 + if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") && + batchSize > 1 && stride > 1) + break; + size_t outputSize = (inputSize - filterSize + 2 * padding + stride) / stride; VLOG(3) << " batchSize=" << batchSize @@ -219,7 +230,7 @@ void DepthwiseConvolution(const std::string& conv1, .set("paddings", paddings) .set("strides", strides) .set("groups", groups) - .set("algo", "auto")); + .set("algo", (std::string) "auto")); TensorShape input{ batchSize, inputChannels, inputSize, inputSize}; diff --git a/paddle/function/nnpack/NNPACKConvOpTest.cpp b/paddle/function/nnpack/NNPACKConvOpTest.cpp index 4818011211..4dd3982487 100644 --- a/paddle/function/nnpack/NNPACKConvOpTest.cpp +++ b/paddle/function/nnpack/NNPACKConvOpTest.cpp @@ -13,87 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/function/Function.h" -#include "paddle/function/FunctionTest.h" - -DEFINE_string(algo, - "auto", - "The algorithm (auto, ft8x8, ft16x16, wt8x8, " - "implicit-gemm, or direct) for computing convolution of NNPACK."); +#include "paddle/function/ConvOpTest.h" namespace paddle { -#define IS_NNPACK_SUPPORT(algo, filterSize, stride) \ - if (algo == "direct" && filterSize != 1) continue; \ - if (algo == "direct" && batchSize != 1) continue; \ - if (algo == "wt8x8" && filterSize != 3) continue; \ - if (algo == "implicit-gemm" && batchSize != 1) continue; \ - if (algo != "auto" && algo != "implicit-gemm" && stride > 1) continue; - -class ConvolutionTest { -public: - ConvolutionTest(const std::string& conv1, - const std::string& conv2, - std::string algo = "auto") { - for (size_t batchSize : {1, 32}) { - for (size_t inputSize : {7, 14, 54}) { - for (size_t filterSize : {1, 3, 5}) { - for (size_t inputChannels : {3, 64}) { - for (size_t outputChannels : {3, 64, 128}) { - if (inputChannels < outputChannels) break; - for (size_t stride : {1, 2}) { - // if batchSize > 1 NNPACKConv only supports stride = 1 - if (batchSize > 1 && stride > 1) break; - for (size_t padding : {0, 1}) { - if (padding >= filterSize) break; - size_t outputSize = - (inputSize - filterSize + 2 * padding + stride) / stride; - IS_NNPACK_SUPPORT(algo, filterSize, stride); - LOG(INFO) << " batchSize=" << batchSize - << " inputChannels=" << inputChannels - << " inputHeight=" << inputSize - << " inputWidth=" << inputSize - << " outputChannels=" << outputChannels - << " filterHeight=" << filterSize - << " filterWidth=" << filterSize - << " outputHeight=" << outputSize - << " outputWidth=" << outputSize - << " stride=" << stride << " padding=" << padding; - - std::vector paddings = {padding, padding}; - std::vector strides = {stride, stride}; - Compare2Function test( - conv1, - conv2, - FuncConfig() - .set("paddings", paddings) - .set("strides", strides) - .set("groups", (size_t)1) - .set("algo", algo)); - - TensorShape shape0{ - batchSize, inputChannels, inputSize, inputSize}; - TensorShape shape1{ - outputChannels, inputChannels, filterSize, filterSize}; - TensorShape shape2{ - batchSize, outputChannels, outputSize, outputSize}; - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape0)); - test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape1)); - test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape2)); - test.run(); - } - } - } - } - } - } - } - } -}; +TEST(NNPACK, Forward) { + Convolution( + "GemmConv-CPU", "NNPACKConv-CPU", forward); +} -TEST(Convolution, NNPACK) { - // NNPACK only supports stride = 1 - ConvolutionTest test("GemmConv-CPU", "NNPACKConv-CPU", FLAGS_algo); +TEST(NNPACK, Depthwise) { + DepthwiseConvolution( + "GemmConv-CPU", "NNPACKConv-CPU", forward); } } // namespace paddle From 9dccdd77a1a86b6cf08c66dfef4bfecd94944817 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 10 Aug 2017 11:43:39 +0000 Subject: [PATCH 46/76] Add c-api interface, paddle_gradient_machine_create_for_inference_with_parameters, to create a gradient machine for inference using merged model with parameters which is genearted by `paddle merge_model`. --- cmake/flags.cmake | 10 +++++--- paddle/capi/gradient_machine.cpp | 25 +++++++++++++++++++ paddle/capi/gradient_machine.h | 12 +++++++++ .../gradientmachines/NeuralNetwork.cpp | 2 -- 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index e26d8d9df3..b27eb71550 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -9,10 +9,12 @@ function(CheckCompilerCXX11Flag) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8) message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.") endif() - # TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem. - # Use Debug mode instead for now. - if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) - set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) + if(NOT ANDROID) + # TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem. + # Use Debug mode instead for now. + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) + endif() endif() elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" diff --git a/paddle/capi/gradient_machine.cpp b/paddle/capi/gradient_machine.cpp index e2d2d30ddc..f7ad30f3bf 100644 --- a/paddle/capi/gradient_machine.cpp +++ b/paddle/capi/gradient_machine.cpp @@ -54,6 +54,31 @@ paddle_error paddle_gradient_machine_create_for_inference( return kPD_NO_ERROR; } +paddle_error paddle_gradient_machine_create_for_inference_with_parameters( + paddle_gradient_machine* machine, void* mergedModel, uint64_t size) { + if (mergedModel == nullptr) return kPD_NULLPTR; + std::istringstream is(std::string(static_cast(mergedModel), size)); + int64_t modelConfigSize = 0; + is.read((char*)(&modelConfigSize), sizeof(modelConfigSize)); + std::string modelConfigProtobuf; + modelConfigProtobuf.resize(modelConfigSize); + is.read(&modelConfigProtobuf[0], modelConfigSize); + paddle::TrainerConfig config; + if (!config.ParseFromString(modelConfigProtobuf) || !config.IsInitialized()) { + return kPD_PROTOBUF_ERROR; + } + auto ptr = new paddle::capi::CGradientMachine(); + ptr->machine.reset(paddle::GradientMachine::create( + config.model_config(), CREATE_MODE_TESTING, {paddle::PARAMETER_VALUE})); + std::vector& parameters = ptr->machine->getParameters(); + for (auto& para : parameters) { + para->load(is); + } + + *machine = ptr; + return kPD_NO_ERROR; +} + paddle_error paddle_gradient_machine_destroy(paddle_gradient_machine machine) { delete cast(machine); return kPD_NO_ERROR; diff --git a/paddle/capi/gradient_machine.h b/paddle/capi/gradient_machine.h index 2426839050..2205e0e23a 100644 --- a/paddle/capi/gradient_machine.h +++ b/paddle/capi/gradient_machine.h @@ -36,6 +36,18 @@ typedef void* paddle_gradient_machine; PD_API paddle_error paddle_gradient_machine_create_for_inference( paddle_gradient_machine* machine, void* modelConfigProtobuf, int size); +/** + * @brief Create a gradient machine used for model inference, using config with + * parameters which is generated by `paddle merge_model`. + * @param [out] machine that used for model inference. + * @param [in] mergedModel + * @param [in] size + * @return paddle_error + */ +PD_API paddle_error +paddle_gradient_machine_create_for_inference_with_parameters( + paddle_gradient_machine* machine, void* mergedModel, uint64_t size); + /** * @brief Load parameter from disk. * @param machine Gradient Machine. diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 148296d20b..cfa80a8936 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -24,8 +24,6 @@ limitations under the License. */ #include "paddle/gserver/layers/AgentLayer.h" #include "paddle/utils/Stat.h" -#include - namespace paddle { void parameterInitNN(int paramId, Parameter* para, From b49a1644ab9c04af301044cfcdfd0c90b8deaebb Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 10 Aug 2017 19:46:32 +0800 Subject: [PATCH 47/76] add soft links to gcc4.8 version --- Dockerfile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Dockerfile b/Dockerfile index 0d0c88f40c..f9beb1b25d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,6 +28,16 @@ RUN apt-get update && \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ python-matplotlib gcc-4.8 g++-4.8 \ + ln -sf gcc-4.8 /usr/bin/gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \ + ln -sf gcc-4.8 /usr/bin/x86_64-linux-gnu-gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/x86_64-linux-gnu-gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \ + ln -sf g++-4.8 /usr/bin/g++ && \ + ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ && \ automake locales clang-format swig doxygen cmake \ liblapack-dev liblapacke-dev libboost-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \ From 4f1f7e90aa170aef91ac2d60bdc89860f6933dd6 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 10 Aug 2017 11:51:31 +0000 Subject: [PATCH 48/76] Delete c-api interface, paddle_gradient_machine_load_parameter_from_buffer, and related codes in Paddle core. --- paddle/capi/gradient_machine.cpp | 9 ---- paddle/capi/gradient_machine.h | 9 ---- .../gradientmachines/GradientMachine.cpp | 43 ------------------- .../gradientmachines/GradientMachine.h | 2 - paddle/parameter/Parameter.cpp | 40 ++++++++--------- paddle/parameter/Parameter.h | 5 --- 6 files changed, 18 insertions(+), 90 deletions(-) diff --git a/paddle/capi/gradient_machine.cpp b/paddle/capi/gradient_machine.cpp index f7ad30f3bf..b3287552db 100644 --- a/paddle/capi/gradient_machine.cpp +++ b/paddle/capi/gradient_machine.cpp @@ -93,15 +93,6 @@ paddle_error paddle_gradient_machine_load_parameter_from_disk( return kPD_NO_ERROR; } -paddle_error paddle_gradient_machine_load_parameter_from_buffer( - paddle_gradient_machine machine, const char* buf, uint64_t length) { - auto m = cast(machine); - if (m == nullptr || buf == nullptr || m->machine == nullptr) - return kPD_NULLPTR; - m->machine->loadParameters(buf, length); - return kPD_NO_ERROR; -} - paddle_error paddle_gradient_machine_forward(paddle_gradient_machine machine, paddle_arguments inArgs, paddle_arguments outArgs, diff --git a/paddle/capi/gradient_machine.h b/paddle/capi/gradient_machine.h index 2205e0e23a..c613ade5b2 100644 --- a/paddle/capi/gradient_machine.h +++ b/paddle/capi/gradient_machine.h @@ -57,15 +57,6 @@ paddle_gradient_machine_create_for_inference_with_parameters( PD_API paddle_error paddle_gradient_machine_load_parameter_from_disk( paddle_gradient_machine machine, const char* path); -/** - * @brief Load parameter from buffer. - * @param machine Gradient Machine. - * @param buffer containing all parameters. - * @return paddle_error - */ -PD_API paddle_error paddle_gradient_machine_load_parameter_from_buffer( - paddle_gradient_machine machine, const char* buf, uint64_t length); - /** * @brief Forward a gradient machine * @param machine Gradient machine diff --git a/paddle/gserver/gradientmachines/GradientMachine.cpp b/paddle/gserver/gradientmachines/GradientMachine.cpp index b7678d9b2f..b44e4dc202 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.cpp +++ b/paddle/gserver/gradientmachines/GradientMachine.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #include "GradientMachine.h" -#include #include #include "paddle/utils/Logging.h" @@ -82,48 +81,6 @@ void GradientMachine::loadParameters(const std::string& dir) { } } -void GradientMachine::loadParameters(const char* buf, uint64_t length) { - LOG(INFO) << "Loading parameter from pre-load buffer"; - - CHECK_NOTNULL(buf); - CHECK_GE(length, static_cast(sizeof(uint64_t))); - - uint64_t numFiles = 0; - memcpy(&numFiles, buf, sizeof(uint64_t)); - uint64_t position = sizeof(uint64_t); - LOG(INFO) << "numFiles: " << numFiles << ", position: " << position; - - std::map offsets; - std::map lengths; - for (uint64_t i = 0; i < numFiles; i++) { - std::string filename(buf + position); - position += filename.size() + 1; - LOG(INFO) << "filename: " << filename << ", position: " << position; - uint64_t size = 0; - memcpy(&size, buf + position, sizeof(uint64_t)); - position += sizeof(uint64_t); - offsets[filename] = const_cast(buf + position); - lengths[filename] = size; - position += size; - CHECK_GE(length, position); - } - - CHECK_GE(offsets.size(), parameters_.size()); - - for (auto& para : parameters_) { - std::string filename = para->getName(); - if (para->isFullSize()) { - if (offsets.end() == offsets.find(filename)) { - para->loadMiss(filename); - } else { - std::istringstream stream( - std::string(offsets[filename], lengths[filename])); - para->load(stream); - } - } - } -} - void GradientMachine::randParameters() { LOG(INFO) << "Initing parameters.."; diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 081518a9d2..f9c82a2bef 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -221,8 +221,6 @@ public: void loadParameters(const std::string& dir); - void loadParameters(const char* buf, uint64_t length); - void randParameters(); virtual void getStats(real& cost, int64_t& numProcessed) { diff --git a/paddle/parameter/Parameter.cpp b/paddle/parameter/Parameter.cpp index 80dbb73a7d..ebe36d4937 100644 --- a/paddle/parameter/Parameter.cpp +++ b/paddle/parameter/Parameter.cpp @@ -314,31 +314,27 @@ bool Parameter::save(std::ostream& s) const { /** * Load parameter value from a file */ -bool Parameter::loadMiss(const std::string& filename) { - LOG(INFO) << "missing parameters [" << filename << "] while loading model."; - if (kMissParameterFail == FLAGS_load_missing_parameter_strategy) { - LOG(FATAL) << getName() << " missing, not allowed."; - return false; - } - if (kMissParameterRand == FLAGS_load_missing_parameter_strategy) { - LOG(INFO) << getName() << " missing, set to random."; - randomize(); - return true; - } - if (kMissParameterZero == FLAGS_load_missing_parameter_strategy) { - LOG(INFO) << getName() << " missing, set to zero."; - zeroMem(); - return true; - } - LOG(FATAL) << "unsupported load_missing_parameter_strategy: " - << FLAGS_load_missing_parameter_strategy; - return false; -} - bool Parameter::load(const std::string& filename) { std::ifstream fs(filename, std::ios_base::binary); if (!fs) { - loadMiss(filename); + LOG(INFO) << "missing parameters [" << filename << "] while loading model."; + if (kMissParameterFail == FLAGS_load_missing_parameter_strategy) { + LOG(FATAL) << getName() << " missing, not allowed."; + return false; + } + if (kMissParameterRand == FLAGS_load_missing_parameter_strategy) { + LOG(INFO) << getName() << " missing, set to random."; + randomize(); + return true; + } + if (kMissParameterZero == FLAGS_load_missing_parameter_strategy) { + LOG(INFO) << getName() << " missing, set to zero."; + zeroMem(); + return true; + } + LOG(FATAL) << "unsupported load_missing_parameter_strategy: " + << FLAGS_load_missing_parameter_strategy; + return false; } return load(fs); } diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h index 21932f6b6e..0bac76f068 100644 --- a/paddle/parameter/Parameter.h +++ b/paddle/parameter/Parameter.h @@ -201,11 +201,6 @@ public: */ bool save(std::ostream& s) const; - /** - * Fill parameter when file is missed - */ - bool loadMiss(const std::string& filename); - /** * Load parameter value from a file */ From ed4ea674671214d7decbb14d04430a1b0a331897 Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 10 Aug 2017 20:01:22 +0800 Subject: [PATCH 49/76] refine dockerfile --- Dockerfile | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index f9beb1b25d..73310a2ec0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,7 +28,13 @@ RUN apt-get update && \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ python-matplotlib gcc-4.8 g++-4.8 \ - ln -sf gcc-4.8 /usr/bin/gcc && \ + automake locales clang-format swig doxygen cmake \ + liblapack-dev liblapacke-dev libboost-dev \ + clang-3.8 llvm-3.8 libclang-3.8-dev \ + net-tools && \ + apt-get clean -y + +RUN ln -sf gcc-4.8 /usr/bin/gcc && \ ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \ @@ -37,12 +43,7 @@ RUN apt-get update && \ ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \ ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \ ln -sf g++-4.8 /usr/bin/g++ && \ - ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ && \ - automake locales clang-format swig doxygen cmake \ - liblapack-dev liblapacke-dev libboost-dev \ - clang-3.8 llvm-3.8 libclang-3.8-dev \ - net-tools && \ - apt-get clean -y + ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ # paddle is using numpy.flip, which is introduced since 1.12.0 RUN pip --no-cache-dir install 'numpy>=1.12.0' From f48e2fafb47262112a1243d03babbb8b8a476de8 Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 10 Aug 2017 20:31:30 +0800 Subject: [PATCH 50/76] fix pip install error --- Dockerfile | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 73310a2ec0..c9bda6c2f7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,17 +34,6 @@ RUN apt-get update && \ net-tools && \ apt-get clean -y -RUN ln -sf gcc-4.8 /usr/bin/gcc && \ - ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ - ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ - ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \ - ln -sf gcc-4.8 /usr/bin/x86_64-linux-gnu-gcc && \ - ln -sf gcc-ar-4.8 /usr/bin/x86_64-linux-gnu-gcc-ar && \ - ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \ - ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \ - ln -sf g++-4.8 /usr/bin/g++ && \ - ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ - # paddle is using numpy.flip, which is introduced since 1.12.0 RUN pip --no-cache-dir install 'numpy>=1.12.0' @@ -82,6 +71,18 @@ RUN pip install --upgrade pip && \ RUN apt-get install -y libssl-dev libffi-dev RUN pip install certifi urllib3[secure] +# ln -sf to gcc4.8 +RUN ln -sf gcc-4.8 /usr/bin/gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \ + ln -sf gcc-4.8 /usr/bin/x86_64-linux-gnu-gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/x86_64-linux-gnu-gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \ + ln -sf g++-4.8 /usr/bin/g++ && \ + ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ + # Install woboq_codebrowser to /woboq RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \ (cd /woboq \ From 554e08ac7ad4c9dc09e68ad7723092b2447c011f Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 10 Aug 2017 20:56:29 +0800 Subject: [PATCH 51/76] fix build error: 'type name' declared as function returning a function --- paddle/operators/gaussian_random_op.cu | 3 ++- paddle/operators/uniform_random_op.cu | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index 3f6824ebd0..1340b1e1e9 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -33,7 +33,8 @@ class GaussianRandomKernel : public framework::OpKernel { int seed = context.op_.GetAttr("seed"); if (seed == 0) { - seed = std::random_device()(); + std::random_device rd; + seed = rd(); } curandGenerator_t g; PADDLE_ENFORCE(platform::dynload::curandCreateGenerator( diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index f1a63e52ec..b35ebe7b63 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -51,7 +51,8 @@ class GPUUniformRandomKernel : public framework::OpKernel { unsigned int seed = static_cast(context.op_.GetAttr("seed")); if (seed == 0) { - seed = std::random_device()(); + std::random_device rd; + seed = rd(); } T min = static_cast(context.op_.GetAttr("min")); T max = static_cast(context.op_.GetAttr("max")); From 370dcf7654e85351a6fc882f0adce44278df1a8a Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 10 Aug 2017 21:46:42 +0800 Subject: [PATCH 52/76] Add a todo in the nnpack function, when groups > 1 and batchSize > 1. --- paddle/function/nnpack/NNPACKConvOp.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index 00d048eb21..c9f1ddcd92 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -196,10 +196,13 @@ public: CHECK_EQ(status, nnp_status_success); } } else { + // only supports stride = 1 + CHECK_EQ(strideH(), 1); + CHECK_EQ(strideW(), 1); + + // TODO(hedaoyuan): There has some bug when batchSize > 1 and groups_ > 1. + CHECK_EQ(groups_, (size_t)1); for (size_t g = 0; g < groups_; g++) { - // only supports stride = 1 - CHECK_EQ(strideH(), 1); - CHECK_EQ(strideW(), 1); nnp_status status = nnp_convolution_output(algorithm_, batchSize, From 55ce5a82d4942a693f0a54a5b41a2e7b4dc02ab0 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Thu, 10 Aug 2017 17:04:15 -0700 Subject: [PATCH 53/76] Docker build: create deb file and build Go by default. Otherwise the production image generation will fail due to deb not found, or executable built from Go not found. --- paddle/scripts/docker/build.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index f70583c641..2f0205b770 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -31,7 +31,7 @@ Configuring cmake in /paddle/build ... -DWITH_DOC=OFF -DWITH_GPU=${WITH_GPU:-OFF} -DWITH_AVX=${WITH_AVX:-OFF} - -DWITH_GOLANG=${WITH_GOLANG:-OFF} + -DWITH_GOLANG=${WITH_GOLANG:-ON} -DWITH_SWIG_PY=ON -DWITH_C_API=${WITH_C_API:-OFF} -DWITH_PYTHON=${WITH_PYTHON:-ON} @@ -51,7 +51,7 @@ cmake .. \ -DWITH_DOC=OFF \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \ - -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ + -DWITH_GOLANG=${WITH_GOLANG:-ON} \ -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ -DWITH_C_API=${WITH_C_API:-OFF} \ -DWITH_PYTHON=${WITH_PYTHON:-ON} \ @@ -130,7 +130,7 @@ fi # generate deb package for current build # FIXME(typhoonzero): should we remove paddle/scripts/deb ? -if [[ ${WITH_DEB:-OFF} == "ON" ]]; then +if [[ ${WITH_DEB:-ON} == "ON" ]]; then cat < Date: Fri, 11 Aug 2017 10:38:43 +0800 Subject: [PATCH 54/76] add TODO comment --- Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c9bda6c2f7..41b6729124 100644 --- a/Dockerfile +++ b/Dockerfile @@ -71,7 +71,10 @@ RUN pip install --upgrade pip && \ RUN apt-get install -y libssl-dev libffi-dev RUN pip install certifi urllib3[secure] -# ln -sf to gcc4.8 +# TODO(qijun) The template library Eigen doesn't work well with GCC 5 +# coming with the default Docker image, so we switch to use GCC 4.8 +# by default. And I will check Eigen library later. + RUN ln -sf gcc-4.8 /usr/bin/gcc && \ ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ From f485a9bc501e743b5284132a6c06ad8bc365b065 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 11 Aug 2017 13:44:39 +0800 Subject: [PATCH 55/76] add auto gradient check design doc --- doc/design/auto_gradient_check.md | 146 ++++++++++++++++++ .../v2/framework/tests/gradient_checker.py | 16 +- 2 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 doc/design/auto_gradient_check.md diff --git a/doc/design/auto_gradient_check.md b/doc/design/auto_gradient_check.md new file mode 100644 index 0000000000..0303d6fbc0 --- /dev/null +++ b/doc/design/auto_gradient_check.md @@ -0,0 +1,146 @@ +## auto gradient check Design + +## Backgraound: +- Operator forward computing is easy to check if the result is right because it has a clear definition. **But** backpropagation is a notoriously difficult algorithm to debug and get right: + - **Firstly** you should get the right backpropagation formula according to the forward computation. + - **Secondly** you should implement it right in CPP. + - **Thirdly** it's difficult to prepare test data. + +- Auto gradient check gets a numeric gradient by forward Operator and use it as a reference of the backward Operator's result. It has several advantages: + - **Firstly** numeric gradient checker only need forward operator. + - **Secondly** user only need to prepare the input data for forward Operator. + +## mathematical theory +The following two document from stanford has a detailed explanation of how to get numeric gradient and why it's useful. + +- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) +- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) + + +## Numeric Gradient Implementation +### Interface +```python +def get_numeric_gradient(op, + input_values, + output_name, + input_to_check, + delta=0.005, + local_scope=None): + """ + Get Numeric Gradient for an operator's input. + + :param op: C++ operator instance, could be an network + :param input_values: The input variables. Should be an dictionary, key is + variable name. Value is numpy array. + :param output_name: The final output variable name. + :param input_to_check: The input variable need to get gradient. + :param delta: The perturbation value for numeric gradient method. The + smaller delta is, the more accurate result will get. But if that delta is + too small, it could occur numerical stability problem. + :param local_scope: The local scope used for get_numeric_gradient. + :return: The gradient array in numpy format. + """ +``` + +### Explaination: + +1. Why need `output_name` + - One Operator may have multiple Output, you can get independent gradient from each Output. So user should set one output to calculate. + +1. Why need `input_to_check` + - One operator may have multiple inputs. Gradient Op can calculate the gradient of these Inputs at the same time. But Numeric Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times. + + +### Core algorithm implement + + +```python + # we only compute gradient of one element each time. + # we use a for loop to compute the gradient of every element. + for i in xrange(tensor_size): + # get one input element throw it's index i. + origin = tensor_to_check.get_float_element(i) + + # add delta to it, run op and then get the sum of the result tensor. + x_pos = origin + delta + tensor_to_check.set_float_element(i, x_pos) + y_pos = get_output() + + # plus delta to this element, run op and get the sum of the result tensor. + x_neg = origin - delta + tensor_to_check.set_float_element(i, x_neg) + y_neg = get_output() + + # restore old value + tensor_to_check.set_float_element(i, origin) + + # compute the gradient of this element and store it into a numpy array. + gradient_flat[i] = (y_pos - y_neg) / delta / 2 + + # reshape the gradient result to the shape of the source tensor. + return gradient_flat.reshape(tensor_to_check.get_dims()) +``` + +## auto check framework design + +Each Operator Kernel has three kinds of Gradient: + +- 1. Numeric Gradient +- 2. CPU Operator Gradient +- 3. GPU Operator Gradient(if supported) + +Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as the reference value. + +- **Firstly** calculate the numeric gradient. +- **Secondly** calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient. +- **Thirdly** calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU) + +#### auto check python Interface + +```python + def check_grad(self, + forward_op, + input_vars, + inputs_to_check, + output_name, + no_grad_set=None, + only_cpu=False, + max_relative_error=0.005): + """ + :param forward_op: used to create backward_op + :param input_vars: numpy value of input variable. The following + computation will use these variables. + :param inputs_to_check: inputs var names that should check gradient. + :param output_name: output name that used to + :param max_relative_error: The relative tolerance parameter. + :param no_grad_set: used when create backward ops + :param only_cpu: only compute and check gradient on cpu kernel. + :return: + """ +``` + +### How two check two numpy array is close enough? +if `abs_numeric_grad` is nearly zero, then use abs error for numeric_grad, not relative + +```python +numeric_grad = ... +operator_grad = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) + +abs_numeric_grad = numpy.abs(numeric_grad) +# if abs_numeric_grad is nearly zero, then use abs error for numeric_grad, not relative +# error. +abs_numeric_grad[abs_numeric_grad < 1e-3] = 1 + +diff_mat = numpy.abs(abs_numeric_grad - operator_grad) / abs_numeric_grad +max_diff = numpy.max(diff_mat) +``` + + +#### Notes: +1,The Input data for auto gradient checker should be reasonable to avoid numeric problem. + + +#### refs: + +- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) +- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index aacc5e88fe..015e832e82 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -73,21 +73,35 @@ def get_numeric_gradient(op, def product(dim): return reduce(lambda a, b: a * b, dim, 1) + # get the input tensor that we want to get it's numeric gradient. tensor_to_check = local_scope.find_var(input_to_check).get_tensor() tensor_size = product(tensor_to_check.get_dims()) + # prepare a numpy array to store the gradient. gradient_flat = numpy.zeros(shape=(tensor_size, ), dtype='float32') + + # we only compute gradient of one element each time. + # we use a for loop to compute the gradient of every element. for i in xrange(tensor_size): + # get one input element throw it's index i. origin = tensor_to_check.get_float_element(i) + + # add delta to it, run op and then get the sum of the result tensor. x_pos = origin + delta tensor_to_check.set_float_element(i, x_pos) y_pos = get_output() + # plus delta to this element, run op and get the sum of the result tensor. x_neg = origin - delta tensor_to_check.set_float_element(i, x_neg) y_neg = get_output() - tensor_to_check.set_float_element(i, origin) # restore old value + # restore old value + tensor_to_check.set_float_element(i, origin) + + # compute the gradient of this element and store it into a numpy array. gradient_flat[i] = (y_pos - y_neg) / delta / 2 + + # reshape the gradient result to the shape of the source tensor. return gradient_flat.reshape(tensor_to_check.get_dims()) From cac4ad44493a0242ca8bedc9b4bb675ee6af1224 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Fri, 11 Aug 2017 14:12:03 +0800 Subject: [PATCH 56/76] delete useless codes in softmax backward. --- paddle/gserver/activations/ActivationFunction.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index 5de2170877..78e958e06f 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -112,7 +112,6 @@ BEGIN_DEFINE_ACTIVATION(softmax) private: MatrixPtr sftMaxSum_; MatrixPtr sftMaxDot_; -MatrixPtr one_; public: Error __must_check forward(Argument& act) { @@ -138,14 +137,6 @@ Error __must_check backward(Argument& act) { 1, /* trans */ false, useGpu(act.deviceId)); - if (!one_ || one_->getWidth() != outputG->getWidth()) { - Matrix::resizeOrCreate(one_, - 1, - outputG->getWidth(), - /* trans */ false, - useGpu(act.deviceId)); - one_->one(); - } sftMaxDot_->dotMul(*outputG, *outputV); sftMaxSum_->colMerge(*sftMaxDot_); From da616a6f2fe22b42faa9aab1caa5f2ff8c875111 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 11 Aug 2017 14:14:26 +0800 Subject: [PATCH 57/76] Fix some bugs. --- paddle/function/ConvOpTest.h | 5 +-- paddle/function/nnpack/NNPACKConvOp.cpp | 41 ++++++++++++------------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/paddle/function/ConvOpTest.h b/paddle/function/ConvOpTest.h index d8c3bb03b3..cb02a96d0d 100644 --- a/paddle/function/ConvOpTest.h +++ b/paddle/function/ConvOpTest.h @@ -202,9 +202,10 @@ void DepthwiseConvolution(const std::string& conv1, for (size_t outputChannels : {32, 64}) { for (size_t stride : {1, 2}) { for (size_t padding : {0, 1}) { - // NNPACK only supports stride = 1 if batchSize > 1 + // NNPACK only supports stride = 1 if batchSize > 1, + // and there has some bug when batchSize > 1 and groups != 1 if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") && - batchSize > 1 && stride > 1) + batchSize > 1) break; size_t outputSize = diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/function/nnpack/NNPACKConvOp.cpp index c9f1ddcd92..6ccc487cf1 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/function/nnpack/NNPACKConvOp.cpp @@ -201,28 +201,25 @@ public: CHECK_EQ(strideW(), 1); // TODO(hedaoyuan): There has some bug when batchSize > 1 and groups_ > 1. - CHECK_EQ(groups_, (size_t)1); - for (size_t g = 0; g < groups_; g++) { - nnp_status status = - nnp_convolution_output(algorithm_, - batchSize, - inputChannels / groups_, - outputChannels / groups_, - inputSize, - padding, - kernelSize, - inputData + inputOffset * g, - filterData + filterOffset * g, - nullptr, /* bias */ - outputData + outputOffset * g, - bufferPtr, - sizePtr, - nnp_activation_identity, - nullptr, - threadpool_, /* threadpool */ - nullptr); - CHECK_EQ(status, nnp_status_success); - } + CHECK_EQ(groups_, static_cast(1)); + nnp_status status = nnp_convolution_output(algorithm_, + batchSize, + inputChannels, + outputChannels, + inputSize, + padding, + kernelSize, + inputData, + filterData, + nullptr, /* bias */ + outputData, + bufferPtr, + sizePtr, + nnp_activation_identity, + nullptr, + threadpool_, /* threadpool */ + nullptr); + CHECK_EQ(status, nnp_status_success); } } From 886e66a5ff8920d612023e3eb3091bbb1d5d21dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=AD=A6=E6=AF=85?= Date: Fri, 11 Aug 2017 14:35:33 +0800 Subject: [PATCH 58/76] golang pserver use OptimizerConfig.proto (#3358) * golang pserver optimizer config for user * update * update * update * update * update by comments * fix errors * fix errors --- go/pserver/client/c/test/test_train.py | 6 +- paddle/api/ParameterUpdater.cpp | 2 +- paddle/trainer/NewRemoteParameterUpdater.cpp | 98 ++++++++++++++++---- python/paddle/v2/optimizer.py | 24 ++++- python/paddle/v2/parameters.py | 14 +++ 5 files changed, 117 insertions(+), 27 deletions(-) diff --git a/go/pserver/client/c/test/test_train.py b/go/pserver/client/c/test/test_train.py index 572a61e4cc..8d9c6b9b20 100644 --- a/go/pserver/client/c/test/test_train.py +++ b/go/pserver/client/c/test/test_train.py @@ -17,12 +17,10 @@ def main(): # network config x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) y_predict = paddle.layer.fc(input=x, - param_attr=paddle.attr.Param( - name='w', learning_rate=1e-3), + param_attr=paddle.attr.Param(name='w'), size=1, act=paddle.activation.Linear(), - bias_attr=paddle.attr.Param( - name='b', learning_rate=1e-3)) + bias_attr=paddle.attr.Param(name='b')) y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) cost = paddle.layer.mse_cost(input=y_predict, label=y) diff --git a/paddle/api/ParameterUpdater.cpp b/paddle/api/ParameterUpdater.cpp index 5934cb898b..8cd73b348c 100644 --- a/paddle/api/ParameterUpdater.cpp +++ b/paddle/api/ParameterUpdater.cpp @@ -41,7 +41,7 @@ ParameterUpdater *ParameterUpdater::createNewRemoteUpdater( config->m->getConfig(), pserverSpec, useEtcd)); return updater; #else - throw UnsupportError(); + throw UnsupportError("not compiled with WITH_GOLANG"); #endif } diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp index af1dceed02..cccb7e7cdd 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.cpp +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -66,28 +66,92 @@ void NewRemoteParameterUpdater::init( // from parameter server if (paddle_begin_init_params(parameterClient_)) { LOG(INFO) << "paddle_begin_init_params start"; + // NOTE: convert V1 OptimizatioinConfig proto to V2 OptimizerConfig. + // This makes golang pserver compatible with handy V1 demos. + // TODO: Refine or remove these ugly converting lines + OptimizerConfig optimizerConfigV2; + if (trainerConfig_.learning_method() == "momentum") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + } else if (trainerConfig_.learning_method() == "adagrad") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad); + optimizerConfigV2.mutable_adagrad()->set_epsilon( + trainerConfig_.ada_epsilon()); + } else if (trainerConfig_.learning_method() == "adadelta") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad); + optimizerConfigV2.mutable_adadelta()->set_epsilon( + trainerConfig_.ada_epsilon()); + optimizerConfigV2.mutable_adadelta()->set_rho(trainerConfig_.ada_rou()); + } else if (trainerConfig_.learning_method() == "adam") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adam); + optimizerConfigV2.mutable_adam()->set_beta_1(trainerConfig_.adam_beta1()); + optimizerConfigV2.mutable_adam()->set_beta_2(trainerConfig_.adam_beta2()); + optimizerConfigV2.mutable_adam()->set_epsilon( + trainerConfig_.adam_epsilon()); + } else { + LOG(ERROR) << "got unsupported v1 optimizer config: " + << trainerConfig_.learning_method(); + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + } + + if (trainerConfig_.learning_rate_schedule() == "constant") { + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); + optimizerConfigV2.mutable_const_lr()->set_learning_rate( + trainerConfig_.learning_rate()); + } else if (trainerConfig_.learning_rate_schedule() == "linear") { + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Linear); + optimizerConfigV2.mutable_linear_lr()->set_learning_rate( + trainerConfig_.learning_rate()); + optimizerConfigV2.mutable_linear_lr()->set_lr_decay_a( + trainerConfig_.learning_rate_decay_a()); + optimizerConfigV2.mutable_linear_lr()->set_lr_decay_b( + trainerConfig_.learning_rate_decay_b()); + } else { + LOG(ERROR) << "got unsupported v1 learning_rate_schedule config: " + << trainerConfig_.learning_rate_schedule() << ", set to const"; + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); + } + + // overwrite optimizerConfigV2 for per-parameter(layer) configs for (int i = 0; i < parameterSize(); ++i) { auto paramConfig = parameters_[i]->getConfig(); - LOG(INFO) << "old param config: " << paramConfig.DebugString(); - // FIXME(typhoonzero): convert old paramConfig to optimizerConfig - OptimizerConfig optimizeConfigV2; - auto sgdConfigV2 = optimizeConfigV2.mutable_sgd(); - sgdConfigV2->set_momentum(paramConfig.momentum()); - sgdConfigV2->set_decay(paramConfig.decay_rate()); - optimizeConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); - auto constlr = optimizeConfigV2.mutable_const_lr(); + if (paramConfig.has_momentum() && + trainerConfig_.learning_method() == "momentum") { + optimizerConfigV2.mutable_sgd()->set_momentum(paramConfig.momentum()); + } if (paramConfig.has_learning_rate()) { - constlr->set_learning_rate(paramConfig.learning_rate()); - } else { - constlr->set_learning_rate(trainerConfig_.learning_rate()); + switch (optimizerConfigV2.lr_policy()) { + case 0: + optimizerConfigV2.mutable_const_lr()->set_learning_rate( + paramConfig.learning_rate()); + break; + case 1: + optimizerConfigV2.mutable_linear_lr()->set_learning_rate( + paramConfig.learning_rate()); + break; + } } - if (trainerConfig_.algorithm() == "sgd") { - optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); - // FIXME: config all algorithms - } else { - optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + if (paramConfig.has_decay_rate()) { + switch (optimizerConfigV2.optimizer()) { + case 1: // SGD + optimizerConfigV2.mutable_sgd()->set_decay( + paramConfig.decay_rate()); + break; + case 2: // Adadelta + optimizerConfigV2.mutable_adadelta()->set_decay( + paramConfig.decay_rate()); + break; + case 3: // Adagrad + optimizerConfigV2.mutable_adagrad()->set_decay( + paramConfig.decay_rate()); + break; + case 4: // Adam + optimizerConfigV2.mutable_adam()->set_decay( + paramConfig.decay_rate()); + break; + } } - std::string bytes = optimizeConfigV2.SerializeAsString(); + // send param and config to pserver + std::string bytes = optimizerConfigV2.SerializeAsString(); const char *array = bytes.data(); int size = (int)bytes.size(); paddle_init_param( diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index ba58198033..29f0945eb4 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -1,13 +1,26 @@ -import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils -import paddle.trainer_config_helpers.optimizers as v1_optimizers +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Optimizers(update equation) for SGD method. -TODO(zhihong) : create new optimizer with proto config, add new optimizer here - TODO(yuyang18): Complete comments. """ +import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils +import paddle.trainer_config_helpers.optimizers as v1_optimizers +from paddle.proto.OptimizerConfig_pb2 import OptimizerConfig + __all__ = [ 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', 'RMSProp', 'ModelAverage', 'L2Regularization' @@ -70,7 +83,8 @@ class Optimizer(object): gradient_machine.prefetch(in_args) parameter_updater.getParametersRemote() - :param pserver_spec: pserver location, eg: localhost:3000 + :param pserver_spec: pserver location, eg: localhost:3000, if use etcd, + pserver_spec should be the etcd endpoints, eg: http://localhost:2379 :return: parameter_updater """ if is_local: diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index a9cba8ca0b..364306d674 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import numpy as np from paddle.proto.ParameterConfig_pb2 import ParameterConfig import paddle.trainer.config_parser as cp From 01e9e44348ddea11e2e6041ff63e98e28bba4905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=AD=A6=E6=AF=85?= Date: Fri, 11 Aug 2017 14:40:58 +0800 Subject: [PATCH 59/76] able to print gradients in event_handler (#3085) --- python/paddle/v2/parameters.py | 39 ++++++++++++++++++++++++---------- python/paddle/v2/trainer.py | 4 ++-- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index 364306d674..8d8012e5d5 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -127,16 +127,7 @@ class Parameters(object): """ return iter(self.__param_conf__) - def __getitem__(self, key): - """ - Get parameter by parameter name. It uses Python dict syntax. - - :note: It will always copy the parameter from C++ side. - :param key: Parameter name - :type key: basestring - :return: parameter value - :rtype: np.ndarray - """ + def __getter_inner(self, key, param_type): import py_paddle.swig_paddle as api shape = self.get_shape(key) @@ -152,7 +143,7 @@ class Parameters(object): each_gradient_machine, key) # for simplify implementation now, we always copy from C++ assert isinstance(param, api.Parameter) - val = param.getBuf(api.PARAMETER_VALUE) + val = param.getBuf(param_type) assert isinstance(val, api.Vector) val = val.copyToNumpyArray() return val @@ -160,6 +151,19 @@ class Parameters(object): raise RuntimeError("Unexpected branch") + def __getitem__(self, key): + """ + Get parameter by parameter name. It uses Python dict syntax. + + :note: It will always copy the parameter from C++ side. + :param key: Parameter name + :type key: basestring + :return: parameter value + :rtype: np.ndarray + """ + import py_paddle.swig_paddle as api + return self.__getter_inner(key, api.PARAMETER_VALUE) + def get_shape(self, key): """ get shape of the parameter. @@ -216,6 +220,19 @@ class Parameters(object): """ return self.__getitem__(key=parameter_name) + def get_grad(self, key): + """ + Get grandient by parameter name. + + :note: It will always copy the parameter from C++ side. + :param key: parameter name + :type key: basestring + :return: The grandient matrix. + :rtype: np.ndarray + """ + import py_paddle.swig_paddle as api + return self.__getter_inner(key, api.PARAMETER_GRADIENT) + def set(self, parameter_name, value): """ Set parameter by parameter name & matrix. diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 76bae0bb12..9c4dd5f250 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -161,14 +161,14 @@ class SGD(object): self.__parameter_updater__.update(each_param) cost_sum = out_args.sum() cost = cost_sum / len(data_batch) - self.__parameter_updater__.finishBatch(cost) - batch_evaluator.finish() event_handler( v2_event.EndIteration( pass_id=pass_id, batch_id=batch_id, cost=cost, evaluator=batch_evaluator)) + self.__parameter_updater__.finishBatch(cost) + batch_evaluator.finish() self.__parameter_updater__.finishPass() pass_evaluator.finish() From 138646fa19547a4add13de44ecd9c1bf5b35196a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 11 Aug 2017 14:58:34 +0800 Subject: [PATCH 60/76] Fix compile on develop branch --- paddle/framework/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index db7c874741..9ac87acdac 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -36,8 +36,8 @@ py_proto_compile(framework_py_proto SRCS attribute.proto op_proto.proto op_desc. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) add_custom_command(TARGET framework_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PROJ_ROOT}/python/paddle/v2/framework/proto - COMMAND cp *.py ${PROJ_ROOT}/python/paddle/v2/framework/proto/ + COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto + COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto/ COMMENT "Copy generated python proto into directory paddle/v2/framework/proto." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) From e7822dcdc999e8b97d908803926811baf60e67bd Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 11 Aug 2017 15:56:08 +0800 Subject: [PATCH 61/76] Capitalize the first character of some title --- doc/design/auto_gradient_check.md | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/doc/design/auto_gradient_check.md b/doc/design/auto_gradient_check.md index 0303d6fbc0..1f4d4ec16f 100644 --- a/doc/design/auto_gradient_check.md +++ b/doc/design/auto_gradient_check.md @@ -1,16 +1,16 @@ -## auto gradient check Design +## Auto Gradient Checker Design ## Backgraound: - Operator forward computing is easy to check if the result is right because it has a clear definition. **But** backpropagation is a notoriously difficult algorithm to debug and get right: - - **Firstly** you should get the right backpropagation formula according to the forward computation. - - **Secondly** you should implement it right in CPP. - - **Thirdly** it's difficult to prepare test data. + - 1. you should get the right backpropagation formula according to the forward computation. + - 2. you should implement it right in CPP. + - 3. it's difficult to prepare test data. - Auto gradient check gets a numeric gradient by forward Operator and use it as a reference of the backward Operator's result. It has several advantages: - - **Firstly** numeric gradient checker only need forward operator. - - **Secondly** user only need to prepare the input data for forward Operator. + - 1. numeric gradient checker only need forward operator. + - 2. user only need to prepare the input data for forward Operator. -## mathematical theory +## Mathematical Theory The following two document from stanford has a detailed explanation of how to get numeric gradient and why it's useful. - [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) @@ -18,7 +18,7 @@ The following two document from stanford has a detailed explanation of how to ge ## Numeric Gradient Implementation -### Interface +### Python Interface ```python def get_numeric_gradient(op, input_values, @@ -44,14 +44,14 @@ def get_numeric_gradient(op, ### Explaination: -1. Why need `output_name` +- Why need `output_name` - One Operator may have multiple Output, you can get independent gradient from each Output. So user should set one output to calculate. -1. Why need `input_to_check` +- Why need `input_to_check` - One operator may have multiple inputs. Gradient Op can calculate the gradient of these Inputs at the same time. But Numeric Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times. -### Core algorithm implement +### Core Algorithm Implementation ```python @@ -81,7 +81,7 @@ def get_numeric_gradient(op, return gradient_flat.reshape(tensor_to_check.get_dims()) ``` -## auto check framework design +## Auto Graident Checker Framework Each Operator Kernel has three kinds of Gradient: @@ -91,11 +91,11 @@ Each Operator Kernel has three kinds of Gradient: Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as the reference value. -- **Firstly** calculate the numeric gradient. -- **Secondly** calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient. -- **Thirdly** calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU) +- 1. calculate the numeric gradient. +- 2. calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient. +- 3. calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU) -#### auto check python Interface +#### Python Interface ```python def check_grad(self, @@ -119,7 +119,7 @@ Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as """ ``` -### How two check two numpy array is close enough? +### How to check if two numpy array is close enough? if `abs_numeric_grad` is nearly zero, then use abs error for numeric_grad, not relative ```python @@ -140,7 +140,7 @@ max_diff = numpy.max(diff_mat) 1,The Input data for auto gradient checker should be reasonable to avoid numeric problem. -#### refs: +#### Refs: - [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) - [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) From 9a592ec3aacb96d68fe80b0bb21968b7873b3093 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 11 Aug 2017 16:03:16 +0800 Subject: [PATCH 62/76] Remove FC Op, since it should be added in Python side --- paddle/framework/CMakeLists.txt | 5 +- paddle/framework/pybind.cc | 1 - paddle/operators/CMakeLists.txt | 3 - paddle/operators/fc_op.cc | 76 ------------------- .../paddle/v2/framework/tests/CMakeLists.txt | 1 - .../paddle/v2/framework/tests/test_fc_op.py | 45 ----------- python/paddle/v2/framework/tests/test_net.py | 21 +++-- 7 files changed, 19 insertions(+), 133 deletions(-) delete mode 100644 paddle/operators/fc_op.cc delete mode 100644 python/paddle/v2/framework/tests/test_fc_op.py diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 9ac87acdac..9e98afb311 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -48,9 +48,12 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python backward - fc_op sgd_op add_op + mul_op + rowwise_add_op + sigmoid_op + softmax_op mean_op cross_entropy_op recurrent_op diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 5fd6754e56..7f47b38900 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -31,7 +31,6 @@ namespace py = pybind11; USE_OP(add_two); USE_OP_CPU(onehot_cross_entropy); -USE_OP_WITHOUT_KERNEL(fc); USE_OP(sgd); USE_OP(mul); USE_OP(mean); diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index b3399aaf0f..c181bd7b88 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -61,9 +61,6 @@ op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) -op_library(fc_op - SRCS fc_op.cc - DEPS mul_op rowwise_add_op sigmoid_op softmax_op net_op) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS op_desc tensor op_registry operator net_op) cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) diff --git a/paddle/operators/fc_op.cc b/paddle/operators/fc_op.cc deleted file mode 100644 index 01a1a81206..0000000000 --- a/paddle/operators/fc_op.cc +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/operators/net_op.h" - -#include "paddle/framework/eigen.h" -#include "paddle/framework/op_registry.h" - -namespace paddle { -namespace operators { - -using OpRegistry = framework::OpRegistry; - -class FullyConnectedOp : public NetOp { - public: - void Init() override { - AddOp(OpRegistry::CreateOp("mul", - { - Input("X"), Input("W"), - }, - {Output("before_act")}, {})); - auto b = Input("b"); - if (b != framework::kEmptyVarName) { - AddOp(OpRegistry::CreateOp("rowwise_add", - {Output("before_act"), Input("b")}, - {Output("before_act")}, {})); - } - - auto activation = GetAttr("activation"); - AddOp(OpRegistry::CreateOp(activation, {Output("before_act")}, - {Output("Y")}, {})); - CompleteAddOp(false); - } -}; - -class FullyConnectedOpMaker : public framework::OpProtoAndCheckerMaker { - public: - FullyConnectedOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "the input of fc operator"); - AddInput("W", "the weight of fc operator"); - AddInput("b", "the bias of fc operator"); - - AddOutput("Y", "the output of fc operator"); - AddOutput("before_act", "the before activation output of fc operator") - .SetTemporary(); - AddAttr("activation", "The activation key for fc layer") - .SetDefault("sigmoid") - .InEnum({"sigmoid", "softmax"}); - - //! TODO(yuyang18): Complete comment; - AddComment("FullyConnected Operator"); - } -}; -} // namespace operators -} // namespace paddle - -USE_OP(mul); -USE_OP(rowwise_add); -USE_OP(sigmoid); -USE_OP(softmax); - -namespace ops = paddle::operators; -REGISTER_OP(fc, ops::FullyConnectedOp, ops::FullyConnectedOpMaker); diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 55ed724e8f..b76c05dc81 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -1,6 +1,5 @@ py_test(test_net SRCS test_net.py) -py_test(test_fc_op SRCS test_fc_op.py) py_test(test_scope SRCS test_scope.py) py_test(test_tensor SRCS test_tensor.py) diff --git a/python/paddle/v2/framework/tests/test_fc_op.py b/python/paddle/v2/framework/tests/test_fc_op.py deleted file mode 100644 index e24435839d..0000000000 --- a/python/paddle/v2/framework/tests/test_fc_op.py +++ /dev/null @@ -1,45 +0,0 @@ -import paddle.v2.framework.core as core -import unittest -import numpy -from paddle.v2.framework.op import Operator - - -class TestFc(unittest.TestCase): - def test_fc(self): - scope = core.Scope() - place = core.CPUPlace() - x = scope.new_var("X") - - x_tensor = x.get_tensor() - x_tensor.set_dims([1000, 784]) - x_tensor.alloc_float(place) - - w = scope.new_var("W") - w_tensor = w.get_tensor() - w_tensor.set_dims([784, 100]) - w_tensor.alloc_float(place) - - w_tensor.set(numpy.random.random((784, 100)).astype("float32"), place) - - # Set a real numpy array here. - # x_tensor.set(numpy.array([])) - - op = Operator("fc", X="X", Y="Y", W="W") - - for out in op.outputs(): - if scope.find_var(out) is None: - scope.new_var(out).get_tensor() - - tensor = scope.find_var("Y").get_tensor() - op.infer_shape(scope) - self.assertEqual([1000, 100], tensor.shape()) - - ctx = core.DeviceContext.create(place) - - op.run(scope, ctx) - - # After complete all ops, check Y is expect or not. - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py index b30896553d..cc7f09e715 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/framework/tests/test_net.py @@ -3,6 +3,15 @@ from paddle.v2.framework.op import Operator import unittest +def fc(X, W, Y): + ret_v = core.Net.create() + + ret_v.add_op(Operator("mul", X="X", Y="W", Out="pre_activation")) + ret_v.add_op(Operator("sigmoid", X="pre_activation", Y=Y)) + ret_v.complete_add_op(True) + return ret_v + + class TestNet(unittest.TestCase): def test_net_all(self): net = core.Net.create() @@ -10,18 +19,18 @@ class TestNet(unittest.TestCase): net.add_op(op1) net2 = core.Net.create() - net2.add_op(Operator("fc", X="X", W="w", Y="fc.out")) + net2.add_op(fc(X="X", W="w", Y="fc.out")) net2.complete_add_op(True) net.add_op(net2) net.complete_add_op(True) expected = ''' -Op(plain_net), inputs:(@EMPTY@, X, Y, w), outputs:(@TEMP@fc@0, Out, fc.out). +Op(plain_net), inputs:(W, X, Y), outputs:(Out, fc.out, pre_activation). Op(add_two), inputs:(X, Y), outputs:(Out). - Op(plain_net), inputs:(@EMPTY@, X, w), outputs:(@TEMP@fc@0, fc.out). - Op(fc), inputs:(X, w, @EMPTY@), outputs:(fc.out, @TEMP@fc@0). - Op(mul), inputs:(X, w), outputs:(@TEMP@fc@0). - Op(sigmoid), inputs:(@TEMP@fc@0), outputs:(fc.out). + Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation). + Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation). + Op(mul), inputs:(X, W), outputs:(pre_activation). + Op(sigmoid), inputs:(pre_activation), outputs:(fc.out). ''' self.assertEqual(expected, "\n" + str(net)) From 7e8c337a6805f7c97ec94dcd628fb525b58d10f2 Mon Sep 17 00:00:00 2001 From: Yancey Date: Fri, 11 Aug 2017 16:15:42 +0800 Subject: [PATCH 63/76] Fix save large parameters failed (#3397) Fix save large parameters crashed --- python/paddle/v2/parameters.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index 8d8012e5d5..b8af5abaea 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -281,7 +281,13 @@ class Parameters(object): size = reduce(lambda a, b: a * b, param.shape) f.write(struct.pack("IIQ", 0, 4, size)) param = param.astype(np.float32) - f.write(param.tostring()) + s = param.tostring() + wrote_size = 0 + buf = buffer(s, wrote_size, 65535) + while buf: # f.write crashes with big data blog. + f.write(buf) + wrote_size += 65535 + buf = buffer(s, wrote_size, 65535) def deserialize(self, name, f): """ From d08b95382d6674a03b5d182a267c82ee20a0eea3 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 10:50:37 -0700 Subject: [PATCH 64/76] Add OperatorBase accessors --- paddle/framework/operator.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 50fc6d1013..f5d167a16e 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -95,16 +95,21 @@ class OperatorBase { //! Get a input with argument's name described in `op_proto` const std::string& Input(const std::string& name) const; - //! Get a input which has multiple variables. //! TODO add a vector_view to prevent memory copy. std::vector Inputs(const std::string& name) const; + //! Get a output with argument's name described in `op_proto` const std::string& Output(const std::string& name) const; //! Get an output which has multiple variables. //! TODO add a vector_view to prevent memory copy. std::vector Outputs(const std::string& name) const; + const std::string Type() const { return type_; } + const std::vector Inputs() const { return inputs_; } + const std::vector Outputs() const { return outputs_; } + const AttributeMap& Attrs() const { return attrs_; } + public: std::string type_; // NOTE: in case of OpGrad, inputs_ contains: From f83876a015a779ca5b9575e80a67d4a08ac94284 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 11:31:10 -0700 Subject: [PATCH 65/76] Add constructors to OperatorBase and all sub-classes --- paddle/framework/backward_test.cc | 5 +++++ paddle/framework/grad_op_builder_test.cc | 5 +++++ paddle/framework/op_registry_test.cc | 10 ++++++++++ paddle/framework/operator.h | 20 ++++++++++++++++++++ paddle/framework/operator_test.cc | 14 ++++++++++++++ 5 files changed, 54 insertions(+) diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 1677a3ed4c..b930b86ed6 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -30,6 +30,11 @@ using DeviceContext = platform::DeviceContext; class EmptyOp : public OperatorBase { public: + EmptyOp(const std::string &type, const std::vector &inputs, + const std::vector &outputs, const AttributeMap &attrs, + std::unordered_map *in_out_idxs) + : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, const DeviceContext &dev_ctx) const override {} }; diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index f1ebbae52f..c3ce69a344 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -10,6 +10,11 @@ namespace framework { class NOP : public OperatorBase { public: + NOP(const std::string &type, const std::vector &inputs, + const std::vector &outputs, const AttributeMap &attrs, + std::unordered_map *in_out_idxs) + : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, const platform::DeviceContext &dev_ctx) const override {} diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index 9894928a7a..de3435ad35 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -7,6 +7,11 @@ namespace paddle { namespace framework { class CosineOp : public OperatorBase { public: + CosineOp(const std::string& type, const std::vector& inputs, + const std::vector& outputs, const AttributeMap& attrs, + std::unordered_map* in_out_idxs) + : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} void InferShape(const Scope& scope) const override {} @@ -27,6 +32,11 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class MyTestOp : public OperatorBase { public: + MyTestOp(const std::string& type, const std::vector& inputs, + const std::vector& outputs, const AttributeMap& attrs, + std::unordered_map* in_out_idxs) + : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index f5d167a16e..8b7f743671 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -63,6 +63,16 @@ class ExecutionContext; */ class OperatorBase { public: + OperatorBase(const std::string& type, const std::vector& inputs, + const std::vector& outputs, + const AttributeMap& attrs, + std::unordered_map* in_out_idxs) + : type_(type), + inputs_(input), + outputs_(output), + attrs_(attrs), + in_out_idxs_(in_out_idxs) {} + virtual ~OperatorBase() {} template @@ -109,6 +119,9 @@ class OperatorBase { const std::vector Inputs() const { return inputs_; } const std::vector Outputs() const { return outputs_; } const AttributeMap& Attrs() const { return attrs_; } + const std::unordered_map* InOutIdx() const { + return in_out_idxs_.get(); + } public: std::string type_; @@ -286,6 +299,13 @@ class OpKernel { class OperatorWithKernel : public OperatorBase { public: + OperatorWithKernel(const std::string& type, + const std::vector& inputs, + const std::vector& outputs, + const AttributeMap& attrs, + std::unordered_map* in_out_idxs) + : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + struct OpKernelKey { platform::Place place_; diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 387aada749..a538abe7fe 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -23,6 +23,13 @@ static int op_run_num = 0; class OpWithoutKernelTest : public OperatorBase { public: + OpWithoutKernelTest(const std::string& type, + const std::vector& inputs, + const std::vector& outputs, + const AttributeMap& attrs, + std::unordered_map* in_out_idxs) + : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + void Init() override { x = 1; } void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, @@ -116,6 +123,13 @@ class CPUKernelTest : public OpKernel { // multiple inputs test class OperatorMultiInputsTest : public OperatorBase { public: + OperatorMultiInputsTest(const std::string& type, + const std::vector& inputs, + const std::vector& outputs, + const AttributeMap& attrs, + std::unordered_map* in_out_idxs) + : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + void Init() override { x = 1; } void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, From 08e1b40aa8ab254e90932fbb50bc7eb42bdd0982 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 11:35:21 -0700 Subject: [PATCH 66/76] Fix minor bugs --- paddle/framework/operator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 8b7f743671..10034c58e9 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -68,8 +68,8 @@ class OperatorBase { const AttributeMap& attrs, std::unordered_map* in_out_idxs) : type_(type), - inputs_(input), - outputs_(output), + inputs_(inputs), + outputs_(outputs), attrs_(attrs), in_out_idxs_(in_out_idxs) {} From 89ba59e24f62d4837590329f4cd2702c38ffc239 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 11:38:46 -0700 Subject: [PATCH 67/76] Add a temporary anonymous constructor to OperatorBAse --- paddle/framework/operator.h | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 10034c58e9..5f44972dd6 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -63,6 +63,7 @@ class ExecutionContext; */ class OperatorBase { public: + OperatorBase() {} // TODO(yi): This constructor is to be removed. OperatorBase(const std::string& type, const std::vector& inputs, const std::vector& outputs, const AttributeMap& attrs, From 65bd7c77e4c867bece7bb4a59e83c821991887fd Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 12:38:40 -0700 Subject: [PATCH 68/76] Update --- paddle/framework/backward_test.cc | 5 +---- paddle/framework/grad_op_builder_test.cc | 5 +---- paddle/framework/op_registry_test.cc | 10 ++-------- paddle/framework/operator.h | 11 +++++++++++ paddle/framework/operator_test.cc | 16 ++++------------ paddle/operators/add_op.cc | 2 ++ paddle/operators/cross_entropy_op.cc | 3 +++ paddle/operators/fill_zeros_like_op.cc | 1 + paddle/operators/gaussian_random_op.cc | 1 + paddle/operators/mean_op.cc | 2 ++ paddle/operators/mul_op.cc | 2 ++ paddle/operators/net_op.h | 2 ++ paddle/operators/net_op_test.cc | 4 ++++ paddle/operators/recurrent_op.h | 1 + paddle/operators/rowwise_add_op.cc | 1 + paddle/operators/sgd_op.cc | 1 + paddle/operators/sigmoid_op.cc | 2 ++ paddle/operators/softmax_op.cc | 2 ++ paddle/operators/uniform_random_op.cc | 1 + 19 files changed, 44 insertions(+), 28 deletions(-) diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index b930b86ed6..da3b9c8bed 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -30,10 +30,7 @@ using DeviceContext = platform::DeviceContext; class EmptyOp : public OperatorBase { public: - EmptyOp(const std::string &type, const std::vector &inputs, - const std::vector &outputs, const AttributeMap &attrs, - std::unordered_map *in_out_idxs) - : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + DEFINE_OPERATOR_CTOR(EmptyOp, OperatorBase) void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, const DeviceContext &dev_ctx) const override {} diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index c3ce69a344..19e552b745 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -10,10 +10,7 @@ namespace framework { class NOP : public OperatorBase { public: - NOP(const std::string &type, const std::vector &inputs, - const std::vector &outputs, const AttributeMap &attrs, - std::unordered_map *in_out_idxs) - : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + DEFINE_OPERATOR_CTOR(NOP, OperatorBase) void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index de3435ad35..e64126c709 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -7,10 +7,7 @@ namespace paddle { namespace framework { class CosineOp : public OperatorBase { public: - CosineOp(const std::string& type, const std::vector& inputs, - const std::vector& outputs, const AttributeMap& attrs, - std::unordered_map* in_out_idxs) - : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + DEFINE_OPERATOR_CTOR(CosineOp, OperatorBase) void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} @@ -32,10 +29,7 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class MyTestOp : public OperatorBase { public: - MyTestOp(const std::string& type, const std::vector& inputs, - const std::vector& outputs, const AttributeMap& attrs, - std::unordered_map* in_out_idxs) - : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + DEFINE_OPERATOR_CTOR(MyTestOp, OperatorBase) void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 5f44972dd6..68e7fedcd6 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -300,6 +300,7 @@ class OpKernel { class OperatorWithKernel : public OperatorBase { public: + OperatorWithKernel() {} // TODO(yi): This constructor is to be removed. OperatorWithKernel(const std::string& type, const std::vector& inputs, const std::vector& outputs, @@ -356,5 +357,15 @@ class OperatorWithKernel : public OperatorBase { virtual void InferShape(const InferShapeContext& ctx) const = 0; }; +#define DEFINE_OPERATOR_CTOR(Class, ParentClass) \ + public: \ + Class() { /* TODO(yi): This constructor is to be removed. */ \ + } \ + Class(const std::string& type, const std::vector& inputs, \ + const std::vector& outputs, \ + const ::paddle::framework::AttributeMap& attrs, \ + std::unordered_map* in_out_idxs) \ + : ParentClass(type, inputs, outputs, attrs, in_out_idxs) {} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index a538abe7fe..7dbd5b14ab 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -23,12 +23,7 @@ static int op_run_num = 0; class OpWithoutKernelTest : public OperatorBase { public: - OpWithoutKernelTest(const std::string& type, - const std::vector& inputs, - const std::vector& outputs, - const AttributeMap& attrs, - std::unordered_map* in_out_idxs) - : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + DEFINE_OPERATOR_CTOR(OpWithoutKernelTest, OperatorBase) void Init() override { x = 1; } void InferShape(const Scope& scope) const override {} @@ -104,6 +99,8 @@ class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { static int cpu_kernel_run_num = 0; class OpWithKernelTest : public OperatorWithKernel { + public: + DEFINE_OPERATOR_CTOR(OpWithKernelTest, OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext& ctx) const override {} }; @@ -123,12 +120,7 @@ class CPUKernelTest : public OpKernel { // multiple inputs test class OperatorMultiInputsTest : public OperatorBase { public: - OperatorMultiInputsTest(const std::string& type, - const std::vector& inputs, - const std::vector& outputs, - const AttributeMap& attrs, - std::unordered_map* in_out_idxs) - : OperatorBase(type, inputs, outputs, attrs, in_out_idxs) {} + DEFINE_OPERATOR_CTOR(OperatorMultiInputsTest, OperatorBase) void Init() override { x = 1; } void InferShape(const Scope& scope) const override {} diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 086245ef62..b886ded9bb 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class AddOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(AddOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 2); @@ -47,6 +48,7 @@ The equation is: Out = X + Y }; class AddOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(AddOpGrad, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override {} }; diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index c813d54e17..09aa589d3c 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class OnehotCrossEntropyOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(OnehotCrossEntropyOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, @@ -38,6 +39,8 @@ class OnehotCrossEntropyOp : public framework::OperatorWithKernel { }; class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(OnehotCrossEntropyGradientOp, + framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { auto X_grad = ctx.Output(framework::GradVarName("X")); diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 3759a88678..eda23a0ccf 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class FillZerosLikeOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(FillZerosLikeOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL, diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index ef417ae2f0..893cf56e5c 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -43,6 +43,7 @@ class GaussianRandomKernel : public framework::OpKernel { }; class GaussianRandomOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(GaussianRandomOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext& context) const override { auto* tensor = context.Output(0); diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index 2ea049cb36..f6abba7ab4 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class MeanOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MeanOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 1, "Input size of AddOp must be one"); @@ -39,6 +40,7 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker { }; class MeanGradOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MeanGradOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output(framework::GradVarName("X")) diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index db81fd555d..6115a3f333 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class MulOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MulOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputSize() == 2, "The mul op must take two inputs"); @@ -53,6 +54,7 @@ The equation is: Out = X * Y }; class MulOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MulOpGrad, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override {} std::string DebugString() const override { diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 792b336675..24c9e61c66 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -35,6 +35,8 @@ namespace operators { */ class NetOp : public framework::OperatorBase { public: + DEFINE_OPERATOR_CTOR(NetOp, framework::OperatorBase) + /** * Infer all the operators' input and output variables' shapes, will be called * before every mini-batch diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc index 76bf79f9b5..0d5c3de798 100644 --- a/paddle/operators/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -12,6 +12,8 @@ static int run_cnt = 0; class TestOp : public framework::OperatorBase { public: + DEFINE_OPERATOR_CTOR(TestOp, framework::OperatorBase) + void InferShape(const Scope& scope) const override { ++infer_shape_cnt; } void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override { @@ -21,6 +23,8 @@ class TestOp : public framework::OperatorBase { class EmptyOp : public framework::OperatorBase { public: + DEFINE_OPERATOR_CTOR(EmptyOp, framework::OperatorBase) + void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const DeviceContext& dev_ctx) const override {} }; diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index d1e60fed9c..fdd9d00537 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -100,6 +100,7 @@ class RecurrentGradientAlgorithm { }; class RecurrentOp final : public framework::OperatorBase { + DEFINE_OPERATOR_CTOR(RecurrentOp, framework::OperatorBase) public: void Init() override; diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index 55ed1c2f4c..402f6340a0 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class RowWiseAddOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(RowWiseAddOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputSize() == 2UL, diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index f9a28ff8a6..5b8093f0f7 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class SGDOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SGDOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, "Input size of SGDOp must be two"); diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index bc5e0bbb18..a02e2dc39e 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class SigmoidOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SigmoidOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputSize() == 1, "Sigmoid Op only have one input"); @@ -38,6 +39,7 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { }; class SigmoidOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SigmoidOpGrad, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output(0)->Resize(ctx.Input(0)->dims()); diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 3dd4e86918..9b6a679642 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class SoftmaxOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SoftmaxOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL, @@ -42,6 +43,7 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { }; class SoftmaxOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SoftmaxOpGrad, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 3UL, diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 405b84b76d..ea81ec053f 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -46,6 +46,7 @@ class CPUUniformRandomKernel : public framework::OpKernel { }; class UniformRandomOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(UniformRandomOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext& ctx) const override { PADDLE_ENFORCE(GetAttr("min") < GetAttr("max"), From 8e0bf6d9337b3a615c0203639f0a6755c51dfd6e Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 13:45:51 -0700 Subject: [PATCH 69/76] Update --- paddle/framework/grad_op_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index 6d032fb78f..0121d99961 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -76,7 +76,7 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, } OperatorBase* BuildGradOp(const OperatorBase* op) { - std::string grad_op_type = OpRegistry::grad_ops().at(op->type_); + const std::string& grad_op_type = OpRegistry::grad_ops().at(op->Type()); OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); grad_op->type_ = grad_op_type; grad_op->attrs_ = op->attrs_; From f40988af0aee507f806b54b0b0b22eeb1d95644e Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 13:52:33 -0700 Subject: [PATCH 70/76] Correct the use of protobuf generated methods --- paddle/framework/op_registry.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 84bf325fed..cb9164eec1 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -69,18 +69,18 @@ class OpProtoAndCheckerMaker { VariableBuilder AddInput(const std::string& name, const std::string& comment) { - auto input = proto_->mutable_inputs()->Add(); - *input->mutable_name() = name; - *input->mutable_comment() = comment; + VarProto* input = proto_->add_inputs(); + input->set_name(name); + input->set_comment(comment); return VariableBuilder{input, [=] { this->SetHasMultipleInput(); }, nullptr}; } VariableBuilder AddOutput(const std::string& name, const std::string& comment) { - auto output = proto_->mutable_outputs()->Add(); - *output->mutable_name() = name; - *output->mutable_comment() = comment; + VarProto* output = proto_->add_outputs(); + output->set_name(name); + output->set_comment(comment); return VariableBuilder{output, [=] { this->SetHasMultipleOutput(); }, [=] { this->SetHasTemporaryOutput(); }}; } @@ -89,17 +89,15 @@ class OpProtoAndCheckerMaker { TypedAttrChecker& AddAttr(const std::string& name, const std::string& comment, bool generated = false) { - auto attr = proto_->mutable_attrs()->Add(); - *attr->mutable_name() = name; - *attr->mutable_comment() = comment; + AttrProto* attr = proto_->add_attrs(); + attr->set_name(name); + attr->set_comment(comment); attr->set_generated(generated); attr->set_type(AttrTypeID()); return op_checker_->AddAttrChecker(name); } - void AddComment(const std::string& comment) { - *(proto_->mutable_comment()) = comment; - } + void AddComment(const std::string& comment) { proto_->set_comment(comment); } private: void SetHasMultiple(const std::string& in_out, bool* flag) { @@ -187,7 +185,7 @@ class OpRegistry { OpProto& op_proto = protos()[op_type]; auto maker = ProtoMakerType(&op_proto, &op_checker); maker.Validate(); - *op_proto.mutable_type() = op_type; + op_proto.set_type(op_type); PADDLE_ENFORCE( op_proto.IsInitialized(), "Fail to initialize %s's OpProto, because %s is not initialized", From 717fe5495e413eef0852dbd01689385d263aa256 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 15:02:25 -0700 Subject: [PATCH 71/76] UPdate grad_op_builder.cc --- paddle/framework/grad_op_builder.cc | 83 ++++++++++++++++------------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index 0121d99961..cbfc1bfab0 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -19,45 +19,46 @@ permissions and limitations under the License. */ namespace paddle { namespace framework { -class OpRegistry; - using VarIndexMap = std::unordered_map; +typedef std::vector Ints; + enum class OpArgType { IN, OUT }; -static std::vector* GetOpFormat(OperatorBase* op, const OpArgType& type) { - std::string key = type == OpArgType::IN ? "input_format" : "output_format"; - return op->attrs_.count(key) - ? &boost::get>(op->attrs_.at(key)) - : nullptr; +const Ints* AttrFormat(const AttributeMap& attrs, const std::string& key) { + return (attrs.count(key) > 0) ? &boost::get(attrs.at(key)) : nullptr; } -static const std::vector* GetOpFormat(const OperatorBase* op, - const OpArgType& type) { - std::string key = type == OpArgType::IN ? "input_format" : "output_format"; - return op->attrs_.count(key) - ? &boost::get>(op->attrs_.at(key)) - : nullptr; +Ints* AttrFormat(AttributeMap& attrs, const std::string& key) { + return (attrs.count(key) > 0) ? &boost::get(attrs.at(key)) : nullptr; } -static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, - const OpArgType& src_type, const OpArgType& dst_type, +static void TransOpArg(const OperatorBase* src_op, + std::vector& grad_inputs, + std::vector& grad_outputs, + AttributeMap& grad_attrs, + std::unordered_map& grad_idxs, + const std::string& src_type, const std::string& dst_type, int& idx, bool is_grad) { const std::vector& src_inout = - src_type == OpArgType::IN ? src_op->inputs_ : src_op->outputs_; - const std::vector* src_format = GetOpFormat(src_op, src_type); + (src_type == "input_format") ? src_op->inputs_ : src_op->outputs_; + + const std::vector* src_format = AttrFormat(src_op->Attrs(), src_type); std::vector& dst_inout = - dst_type == OpArgType::IN ? dst_op->inputs_ : dst_op->outputs_; - std::vector* dst_format = GetOpFormat(dst_op, dst_type); + (dst_type == "input_format") ? grad_inputs : grad_outputs; + + std::vector* dst_format = AttrFormat(grad_attrs, dst_type); + const OpProto& proto = OpRegistry::protos().at(src_op->type_); + const auto& src_arg_list = - src_type == OpArgType::IN ? proto.inputs() : proto.outputs(); + (src_type == "input_format") ? proto.inputs() : proto.outputs(); for (const auto& arg : src_arg_list) { std::string src_name = arg.name(); std::string dst_name = is_grad ? src_name + kGradVarSuffix : src_name; - (*dst_op->in_out_idxs_)[dst_name] = idx++; + grad_idxs[dst_name] = idx++; int src_arg_idx = src_op->in_out_idxs_->at(src_name); int src_begin = src_format == nullptr ? src_arg_idx : src_format->at(src_arg_idx); @@ -77,25 +78,35 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, OperatorBase* BuildGradOp(const OperatorBase* op) { const std::string& grad_op_type = OpRegistry::grad_ops().at(op->Type()); - OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); - grad_op->type_ = grad_op_type; - grad_op->attrs_ = op->attrs_; - grad_op->attrs_.erase("input_format"); - grad_op->attrs_.erase("output_format"); - if (GetOpFormat(op, OpArgType::IN) != nullptr) { - grad_op->attrs_["output_format"] = std::vector({0}); + + AttributeMap grad_attrs(op->Attrs()); + grad_attrs.erase("input_format"); + grad_attrs.erase("output_format"); + if (op->Attrs().count("input_format") > 0) { + grad_attrs["output_format"] = std::vector({0}); } - if (GetOpFormat(op, OpArgType::IN) != nullptr || - GetOpFormat(op, OpArgType::OUT) != nullptr) { - grad_op->attrs_["input_format"] = std::vector({0}); + if (op->Attrs().count("input_format") > 0 || + op->Attrs().count("output_format") > 0) { + grad_attrs["input_format"] = std::vector({0}); } - grad_op->in_out_idxs_.reset(new VarIndexMap()); + + std::vector grad_inputs, grad_outputs; + std::unordered_map grad_idxs; int in_idx = 0; int out_idx = 0; - TransOpArg(op, grad_op, OpArgType::IN, OpArgType::IN, in_idx, false); // I - TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, false); // G - TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, true); // OG - TransOpArg(op, grad_op, OpArgType::IN, OpArgType::OUT, out_idx, true); // IG + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + "input_format", "input_format", in_idx, false); // I + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + "output_format", "input_format", in_idx, false); // G + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + "output_format", "input_format", in_idx, true); // OG + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + "input_format", "output_format", out_idx, true); // IG + + OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); + + // TODO(yi): Set data member of grad_op. + return grad_op; } From 5381a6eef8f1313c46105fe019a60eb753e0b75c Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 15:08:57 -0700 Subject: [PATCH 72/76] Update --- paddle/framework/grad_op_builder.cc | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index cbfc1bfab0..8bd2bc5902 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -19,8 +19,6 @@ permissions and limitations under the License. */ namespace paddle { namespace framework { -using VarIndexMap = std::unordered_map; - typedef std::vector Ints; enum class OpArgType { IN, OUT }; @@ -91,21 +89,27 @@ OperatorBase* BuildGradOp(const OperatorBase* op) { } std::vector grad_inputs, grad_outputs; - std::unordered_map grad_idxs; + + using VarIndexMap = std::unordered_map; + VarIndexMap* grad_idxs = new VarIndexMap; int in_idx = 0; int out_idx = 0; - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, "input_format", "input_format", in_idx, false); // I - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, "output_format", "input_format", in_idx, false); // G - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, "output_format", "input_format", in_idx, true); // OG - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, "input_format", "output_format", out_idx, true); // IG OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); - // TODO(yi): Set data member of grad_op. + grad_op->type_ = grad_op_type; + grad_op->inputs_ = grad_inputs; + grad_op->outputs_ = grad_outputs; + grad_op->attrs_ = grad_attrs; + grad_op->in_out_idxs_.reset(grad_idxs); return grad_op; } From 37c2a23884524e6cf76b83eb981638f58d30d22d Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 11 Aug 2017 22:12:44 +0000 Subject: [PATCH 73/76] fix cpplint error --- paddle/trainer/NewRemoteParameterUpdater.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp index cccb7e7cdd..35dcb235e7 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.cpp +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -68,7 +68,7 @@ void NewRemoteParameterUpdater::init( LOG(INFO) << "paddle_begin_init_params start"; // NOTE: convert V1 OptimizatioinConfig proto to V2 OptimizerConfig. // This makes golang pserver compatible with handy V1 demos. - // TODO: Refine or remove these ugly converting lines + // TODO(wuyi): Refine or remove these ugly converting lines OptimizerConfig optimizerConfigV2; if (trainerConfig_.learning_method() == "momentum") { optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); From 610a25844fa33e0a0c028c4bc9e56a57db60d90e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 12 Aug 2017 12:38:23 +0800 Subject: [PATCH 74/76] Fix all unit tests in Python --- paddle/framework/pybind.cc | 7 +++- .../v2/framework/tests/gradient_checker.py | 34 ++++++++++++------- .../framework/tests/test_cross_entropy_op.py | 23 +++++++------ python/paddle/v2/framework/tests/test_net.py | 12 +++---- .../v2/framework/tests/test_protobuf.py | 7 ++-- .../v2/framework/tests/test_softmax_op.py | 11 +++--- 6 files changed, 54 insertions(+), 40 deletions(-) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 57d8d3b2e5..05ed603e1a 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -60,7 +60,12 @@ void ExposeOperator(ClassType &m) { -> std::unordered_map> { return op.outputs_; }) - .def("__str__", &ClassType::type::DebugString); + .def("inputs", + [](const typename ClassType::type &op) { return op.inputs_; }) + .def("__str__", &ClassType::type::DebugString) + .def("no_intermediate_outputs", [](const typename ClassType::type &op) { + return op.OutputVars(false); + }); } static size_t UniqueIntegerGenerator() { diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 015e832e82..501cf6110f 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -53,15 +53,18 @@ def get_numeric_gradient(op, tensor.set(input_values[var_name], core.CPUPlace()) # Create all output variable in local_scope - for output in op.outputs(): - if local_scope.find_var(output) is None: - local_scope.new_var(output).get_tensor() - + opts = op.outputs() + for key in opts: + for output in opts[key]: + if local_scope.find_var(output) is None: + local_scope.new_var(output).get_tensor() op.infer_shape(local_scope) # allocate output memory - for output in op.outputs(): - local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace()) + for key in opts: + for output in opts[key]: + local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace( + )) # TODO(yuyang18): Only CPU is support now. cpu_ctx = core.DeviceContext.create(core.CPUPlace()) @@ -150,19 +153,24 @@ class GradientChecker(unittest.TestCase): if no_grad_set is None: no_grad_set = set() - tmp_outs = forward_op.temp_outputs() - no_tmp_out = filter(lambda name: name not in tmp_outs, - forward_op.outputs()) + no_tmp_out = forward_op.no_intermediate_outputs() if len(no_tmp_out) != 1: raise ValueError("non temp out_names should be 1") - in_names = forward_op.inputs() + inputs = forward_op.inputs() + in_names = [item for k in inputs for item in inputs[k]] + outputs = forward_op.outputs() + out_names = [item for k in outputs for item in outputs[k]] + for no_grad in no_grad_set: if no_grad not in in_names: raise ValueError("no_grad should be in in_names") backward_op = core.Operator.backward(forward_op, no_grad_set) + bwd_outputs = backward_op.outputs() + bwd_out_names = [item for k in bwd_outputs for item in bwd_outputs[k]] + places = [core.CPUPlace()] if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu(): places.append(core.GPUPlace(0)) @@ -188,7 +196,7 @@ class GradientChecker(unittest.TestCase): var.set(value, place) # create output var - for out_name in forward_op.outputs(): + for out_name in out_names: scope.new_var(out_name).get_tensor() # infer the shape of output var and compute/set value of output var @@ -198,7 +206,7 @@ class GradientChecker(unittest.TestCase): # create output grad var # set shape as the output var # set value of this grad to ones - for name in forward_op.outputs(): + for name in out_names: out_tensor = scope.find_var(name).get_tensor() grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() grad_tensor.set_dims(out_tensor.shape()) @@ -206,7 +214,7 @@ class GradientChecker(unittest.TestCase): grad_tensor.set(data, place) # create input grad var - for name in backward_op.outputs(): + for name in bwd_out_names: scope.new_var(name).get_tensor() # infer the shape of input gradient var and compute/set it's value diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index fe89bf8e2c..4815192e25 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -21,17 +21,18 @@ class TestCrossEntropy(unittest.TestCase): self.outputs = {'Y': numpy.array(Y).astype("float32")} -# class CrossEntropyGradOpTest(GradientChecker): -# def test_softmax_grad(self): -# op = create_op("onehot_cross_entropy") -# batch_size = 100 -# class_num = 10 -# inputs = { -# "X": numpy.random.uniform( -# 0.1, 1.0, [batch_size, class_num]).astype("float32"), -# "label": (class_num / 2) * numpy.ones(batch_size).astype("int32") -# } -# self.check_grad(op, inputs, set("X"), "Y") +class CrossEntropyGradOpTest(GradientChecker): + def test_softmax_grad(self): + op = create_op("onehot_cross_entropy") + batch_size = 100 + class_num = 10 + inputs = { + "X": numpy.random.uniform( + 0.1, 1.0, [batch_size, class_num]).astype("float32"), + "label": (class_num / 2) * numpy.ones(batch_size).astype("int32") + } + self.check_grad(op, inputs, set("X"), "Y") + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py index cc7f09e715..b42cadd11a 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/framework/tests/test_net.py @@ -25,12 +25,12 @@ class TestNet(unittest.TestCase): net.complete_add_op(True) expected = ''' -Op(plain_net), inputs:(W, X, Y), outputs:(Out, fc.out, pre_activation). - Op(add_two), inputs:(X, Y), outputs:(Out). - Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation). - Op(plain_net), inputs:(W, X), outputs:(fc.out, pre_activation). - Op(mul), inputs:(X, W), outputs:(pre_activation). - Op(sigmoid), inputs:(pre_activation), outputs:(fc.out). +Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}. + Op(add_two), inputs:{X[X], Y[Y]}, outputs:{Out[Out]}. + Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. + Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. + Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}. + Op(sigmoid), inputs:{X[pre_activation]}, outputs:{Y[fc.out]}. ''' self.assertEqual(expected, "\n" + str(net)) diff --git a/python/paddle/v2/framework/tests/test_protobuf.py b/python/paddle/v2/framework/tests/test_protobuf.py index 69e98e2f25..848a396b3b 100644 --- a/python/paddle/v2/framework/tests/test_protobuf.py +++ b/python/paddle/v2/framework/tests/test_protobuf.py @@ -1,11 +1,10 @@ -import paddle.v2.framework.proto.op_proto_pb2 as op_proto_lib -import paddle.v2.framework.proto.attribute_pb2 as attr_type_lib +import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import unittest class TestFrameworkProto(unittest.TestCase): def test_all(self): - op_proto = op_proto_lib.OpProto() + op_proto = framework_pb2.OpProto() ipt0 = op_proto.inputs.add() ipt0.name = "a" ipt0.comment = "the input of cosine op" @@ -19,7 +18,7 @@ class TestFrameworkProto(unittest.TestCase): attr = op_proto.attrs.add() attr.name = "scale" attr.comment = "scale of cosine op" - attr.type = attr_type_lib.FLOAT + attr.type = framework_pb2.FLOAT op_proto.type = "cos" self.assertTrue(op_proto.IsInitialized()) diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py index 3c6b229f94..e670d93653 100644 --- a/python/paddle/v2/framework/tests/test_softmax_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_op.py @@ -24,11 +24,12 @@ class TestSoftmaxOp(unittest.TestCase): } -# class SoftmaxGradOpTest(GradientChecker): -# def test_softmax(self): -# op = create_op("softmax") -# inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")} -# self.check_grad(op, inputs, set("X"), "Y") +class SoftmaxGradOpTest(GradientChecker): + def test_softmax(self): + op = create_op("softmax") + inputs = {"X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")} + self.check_grad(op, inputs, set("X"), "Y") + if __name__ == '__main__': unittest.main() From 509d3209dbe407ebf8be798af4caee4850f5c417 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 12 Aug 2017 14:42:58 +0800 Subject: [PATCH 75/76] Fix CI and style --- paddle/framework/backward.cc | 2 +- paddle/framework/grad_op_builder_test.cc | 14 ++++++-------- paddle/framework/op_registry.h | 2 +- paddle/framework/operator.h | 4 ++-- paddle/framework/pybind.cc | 10 ++++++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 36cc616358..315bdde76d 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -31,7 +31,7 @@ static void ForEachVarName(Map& names, T callback) { } static bool AllInSet( - const std::unordered_map>& names, + const std::map>& names, const std::string& suffix, const std::unordered_set& set) { bool all_in_set = true; ForEachVarName(names, [&all_in_set, &set, &suffix](const std::string& n) { diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index 85e745322b..f54a66110f 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -68,10 +68,9 @@ REGISTER_GRADIENT_OP(io_ignored, io_ignored_grad, f::NOP); TEST(GradOpBuilder, MutiInOut) { std::shared_ptr test_op(f::OpRegistry::CreateOp( - "mult_io", - {{"In1", {"in1"}}, - {"In2_mult", {"in2_1", "in2_2", "in2_3"}}, - {"In3", {"in3"}}}, + "mult_io", {{"In1", {"in1"}}, + {"In2_mult", {"in2_1", "in2_2", "in2_3"}}, + {"In3", {"in3"}}}, {{"Out1", {"out1"}}, {"Out2_mult", {"out2_1", "out2_2"}}}, {})); std::shared_ptr grad_test_op = f::OpRegistry::CreateGradOp(*test_op); @@ -101,10 +100,9 @@ TEST(GradOpBuilder, MutiInOut) { TEST(GradOpBuilder, IOIgnoredInGradient) { std::shared_ptr test_op(f::OpRegistry::CreateOp( - "io_ignored", - {{"In1", {"in1"}}, - {"In2_mult", {"in2_1", "in2_2"}}, - {"In3_mult", {"in3_1", "in3_2"}}}, + "io_ignored", {{"In1", {"in1"}}, + {"In2_mult", {"in2_1", "in2_2"}}, + {"In3_mult", {"in3_1", "in3_2"}}}, {{"Out1_mult", {"out1_1", "out1_2"}}, {"Out2", {"out2"}}}, {})); std::shared_ptr grad_test_op = f::OpRegistry::CreateGradOp(*test_op); diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index f2236e60d8..f6b71a4efd 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -118,7 +118,7 @@ class OpProtoAndCheckerMaker { class OpRegistry { using OpCreator = std::function; - using VarNameMap = std::unordered_map>; + using VarNameMap = std::map>; public: template diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 6dc331b2f0..5ed199adc6 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -143,11 +143,11 @@ class OperatorBase { // I (Inputs) // O (Outputs) // OG (Output Gradients) - std::unordered_map> inputs_; + std::map> inputs_; // NOTE: in case of OpGrad, outputs_ contains // IG (Inputs Gradients) - std::unordered_map> outputs_; + std::map> outputs_; AttributeMap attrs_; }; diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 05ed603e1a..07b42c8371 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -57,15 +57,17 @@ void ExposeOperator(ClassType &m) { }) .def("outputs", [](const typename ClassType::type &op) - -> std::unordered_map> { + -> std::map> { return op.outputs_; }) .def("inputs", [](const typename ClassType::type &op) { return op.inputs_; }) .def("__str__", &ClassType::type::DebugString) - .def("no_intermediate_outputs", [](const typename ClassType::type &op) { - return op.OutputVars(false); - }); + .def("no_intermediate_outputs", + [](const typename ClassType::type &op) { + return op.OutputVars(false); + }) + .def("support_gpu", &ClassType::type::SupportGPU); } static size_t UniqueIntegerGenerator() { From 0b1052fca5392b9cf9d0e5c534869bad949224ea Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 12 Aug 2017 20:19:21 +0800 Subject: [PATCH 76/76] Get `DEFINE_OPERATOR_CTOR` Back to code --- paddle/framework/backward_test.cc | 2 ++ paddle/framework/grad_op_builder_test.cc | 1 + paddle/framework/op_registry_test.cc | 2 ++ paddle/framework/operator.h | 22 ++++++++++++++++++++++ paddle/framework/operator_test.cc | 3 +++ paddle/operators/add_op.cc | 3 +++ paddle/operators/cross_entropy_op.cc | 3 +++ paddle/operators/fill_zeros_like_op.cc | 2 ++ paddle/operators/gaussian_random_op.cc | 2 ++ paddle/operators/mean_op.cc | 2 ++ paddle/operators/mul_op.cc | 3 +++ paddle/operators/net_op.h | 1 + paddle/operators/net_op_test.cc | 2 ++ paddle/operators/recurrent_op.h | 3 +++ paddle/operators/rowwise_add_op.cc | 1 + paddle/operators/sgd_op.cc | 1 + paddle/operators/sigmoid_op.cc | 2 ++ paddle/operators/softmax_op.cc | 2 ++ paddle/operators/uniform_random_op.cc | 1 + 19 files changed, 58 insertions(+) diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index c6e91e243e..dc09f095b9 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -30,6 +30,7 @@ using DeviceContext = platform::DeviceContext; class EmptyOp : public OperatorBase { public: + DEFINE_OPERATOR_CTOR(EmptyOp, OperatorBase); void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, const DeviceContext &dev_ctx) const override {} }; @@ -78,6 +79,7 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker { class FcOp : public operators::NetOp { public: + DEFINE_OPERATOR_CTOR(FcOp, operators::NetOp) void Init() override { AddOp(OpRegistry::CreateOp("mul", {{"X", {Input("X")}}, {"Y", {Input("W")}}}, diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index f54a66110f..c95583c0af 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -10,6 +10,7 @@ namespace framework { class NOP : public OperatorBase { public: + DEFINE_OPERATOR_CTOR(NOP, OperatorBase); void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, const platform::DeviceContext &dev_ctx) const override {} diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index 3e0df6909f..456a967629 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -7,6 +7,7 @@ namespace paddle { namespace framework { class CosineOp : public OperatorBase { public: + DEFINE_OPERATOR_CTOR(CosineOp, OperatorBase); void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} void InferShape(const Scope& scope) const override {} @@ -27,6 +28,7 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class MyTestOp : public OperatorBase { public: + DEFINE_OPERATOR_CTOR(MyTestOp, OperatorBase); void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 5ed199adc6..b5a409a23e 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -64,6 +64,17 @@ class ExecutionContext; */ class OperatorBase { public: + using VarNameMap = std::map>; + + OperatorBase() = default; + OperatorBase(const std::string& type, const VarNameMap& inputs, + const VarNameMap& outputs, const AttributeMap& attrs) + : type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs) {} + + OperatorBase(const OperatorBase& o) = delete; + OperatorBase& operator=(const OperatorBase& o) = delete; + OperatorBase(OperatorBase&& o) = delete; + virtual ~OperatorBase() {} template @@ -151,6 +162,15 @@ class OperatorBase { AttributeMap attrs_; }; +#define DEFINE_OPERATOR_CTOR(Class, ParentClass) \ + public: \ + Class() : ParentClass() { /* TODO(yi): This constructor is to be removed. */ \ + } \ + Class(const std::string& type, const VarNameMap& inputs, \ + const VarNameMap& outputs, \ + const paddle::framework::AttributeMap& attrs) \ + : ParentClass(type, inputs, outputs, attrs) {} + class InferShapeContext { public: InferShapeContext(const OperatorBase& op, const Scope& scope) @@ -290,6 +310,8 @@ class OpKernel { class OperatorWithKernel : public OperatorBase { public: + DEFINE_OPERATOR_CTOR(OperatorWithKernel, OperatorBase) + struct OpKernelKey { platform::Place place_; diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 6cfcdd161e..5fdb6bca02 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -22,6 +22,8 @@ namespace framework { static int op_run_num = 0; class OpWithoutKernelTest : public OperatorBase { + DEFINE_OPERATOR_CTOR(OpWithoutKernelTest, framework::OperatorBase) + public: void Init() override { x = 1; } void InferShape(const Scope& scope) const override {} @@ -102,6 +104,7 @@ class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { static int cpu_kernel_run_num = 0; class OpWithKernelTest : public OperatorWithKernel { + DEFINE_OPERATOR_CTOR(OpWithKernelTest, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext& ctx) const override {} }; diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index adb1c4f041..bf0982e095 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -18,6 +18,8 @@ namespace paddle { namespace operators { class AddOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(AddOp, framework::OperatorWithKernel) + protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), @@ -43,6 +45,7 @@ The equation is: Out = X + Y }; class AddOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(AddOpGrad, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override {} }; diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index 7cb2aa4e78..e40351a1c1 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class OnehotCrossEntropyOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(OnehotCrossEntropyOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { auto *X = ctx.Input("X"); @@ -31,6 +32,8 @@ class OnehotCrossEntropyOp : public framework::OperatorWithKernel { }; class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(OnehotCrossEntropyGradientOp, + framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { auto X_grad = ctx.Output(framework::GradVarName("X")); diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 04a820b616..881d4128bb 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -18,6 +18,8 @@ namespace paddle { namespace operators { class FillZerosLikeOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(FillZerosLikeOp, framework::OperatorWithKernel); + protected: void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output("Dst")->Resize( diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index ef417ae2f0..9a4d4addd4 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -43,6 +43,8 @@ class GaussianRandomKernel : public framework::OpKernel { }; class GaussianRandomOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(GaussianRandomOp, framework::OperatorWithKernel); + protected: void InferShape(const framework::InferShapeContext& context) const override { auto* tensor = context.Output(0); diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index 2787ac46b7..99e27a11a8 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class MeanOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MeanOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), @@ -37,6 +38,7 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker { }; class MeanGradOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MeanGradOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output(framework::GradVarName("X")) diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index 9c570cff28..ae924375c2 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -18,6 +18,8 @@ namespace paddle { namespace operators { class MulOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MulOp, framework::OperatorWithKernel); + protected: void InferShape(const framework::InferShapeContext &ctx) const override { auto dim0 = ctx.Input("X")->dims(); @@ -51,6 +53,7 @@ The equation is: Out = X * Y }; class MulOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(MulOpGrad, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override {} std::string DebugString() const override { diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 61f6187aec..4560578121 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -37,6 +37,7 @@ namespace operators { class NetOp : public framework::OperatorBase { public: static const char kAll[]; + DEFINE_OPERATOR_CTOR(NetOp, framework::OperatorBase); /** * Infer all the operators' input and output variables' shapes, will be called diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc index c167f90824..8872c8d92b 100644 --- a/paddle/operators/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -12,6 +12,7 @@ static int run_cnt = 0; class TestOp : public framework::OperatorBase { public: + DEFINE_OPERATOR_CTOR(TestOp, framework::OperatorBase); void InferShape(const Scope& scope) const override { ++infer_shape_cnt; } void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override { @@ -21,6 +22,7 @@ class TestOp : public framework::OperatorBase { class EmptyOp : public framework::OperatorBase { public: + DEFINE_OPERATOR_CTOR(EmptyOp, framework::OperatorBase); void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const DeviceContext& dev_ctx) const override {} }; diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index d1e60fed9c..b22ac0ddc9 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -101,6 +101,8 @@ class RecurrentGradientAlgorithm { class RecurrentOp final : public framework::OperatorBase { public: + DEFINE_OPERATOR_CTOR(RecurrentOp, framework::OperatorBase); + void Init() override; /** @@ -123,6 +125,7 @@ class RecurrentOp final : public framework::OperatorBase { class RecurrentGradientOp final : public framework::OperatorBase { public: + DEFINE_OPERATOR_CTOR(RecurrentGradientOp, framework::OperatorBase) void Init() override; /** diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index 28b56a6934..fcc6e16364 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class RowWiseAddOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(RowWiseAddOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { auto dim0 = ctx.Input("X")->dims(); diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index 30fe6fd491..29a6a77006 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class SGDOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SGDOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE( diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index 315887d8c4..40a8ba12d7 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class SigmoidOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SigmoidOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output("Y")->Resize(ctx.Input("X")->dims()); @@ -36,6 +37,7 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { }; class SigmoidOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SigmoidOpGrad, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { ctx.Output(0)->Resize(ctx.Input(0)->dims()); diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 962787fffd..16351b4bbd 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { class SoftmaxOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SoftmaxOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, @@ -38,6 +39,7 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { }; class SoftmaxOpGrad : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(SoftmaxOpGrad, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 57db9a5099..8c40eed9d4 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -46,6 +46,7 @@ class CPUUniformRandomKernel : public framework::OpKernel { }; class UniformRandomOp : public framework::OperatorWithKernel { + DEFINE_OPERATOR_CTOR(UniformRandomOp, framework::OperatorWithKernel) protected: void InferShape(const framework::InferShapeContext& ctx) const override { PADDLE_ENFORCE(GetAttr("min") < GetAttr("max"),