From 8266fcc3be869cfef42ab0ec597b2b4ce08dd37d Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 16 Jan 2018 21:31:08 +0800 Subject: [PATCH 001/105] Add pyton wrapper for row conv operator. --- doc/api/v2/fluid/layers.rst | 5 +++ python/paddle/v2/fluid/layers/nn.py | 65 ++++++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst index 62c154e65d..ad3c70a6f1 100644 --- a/doc/api/v2/fluid/layers.rst +++ b/doc/api/v2/fluid/layers.rst @@ -493,3 +493,8 @@ swish ------ .. autofunction:: paddle.v2.fluid.layers.swish :noindex: + +row_conv +-------- +.. autofunction:: paddle.v2.fluid.layers.row_conv + :noindex: diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 4e8fd407c9..7c694ed777 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -50,6 +50,7 @@ __all__ = [ 'sequence_last_step', 'dropout', 'split', + 'row_conv', ] @@ -1547,13 +1548,13 @@ def split(input, num_or_sections, dim=-1): Args: input (Variable): The input variable which is a Tensor or LoDTensor. - num_or_sections (int|list): If :attr:`num_or_sections` is an integer, - then the integer indicates the number of equal sized sub-tensors - that the tensor will be divided into. If :attr:`num_or_sections` - is a list of integers, the length of list indicates the number of - sub-tensors and the integers indicate the sizes of sub-tensors' + num_or_sections (int|list): If :attr:`num_or_sections` is an integer, + then the integer indicates the number of equal sized sub-tensors + that the tensor will be divided into. If :attr:`num_or_sections` + is a list of integers, the length of list indicates the number of + sub-tensors and the integers indicate the sizes of sub-tensors' :attr:`dim` dimension orderly. - dim (int): The dimension along which to split. If :math:`dim < 0`, the + dim (int): The dimension along which to split. If :math:`dim < 0`, the dimension to split along is :math:`rank(input) + dim`. Returns: @@ -1597,3 +1598,55 @@ def split(input, num_or_sections, dim=-1): 'axis': dim }) return outs + + +def row_conv(input, future_context_size, param_attr=None, act=None): + """Row Conv Operator. This layer will apply lookahead convolution to + **input**. The input variable should be a 2D LoDTensor with shape [T, D]. + Parameters with shape [future_context_size + 1, D] will be created. The math + equation of row convolution is as following: + + .. math:: + Out_{i} = \sum_{j = i} ^ {i + \\tau} X_{j} \odot W_{i - j} + + In the above equation: + + * :math:`Out_{i}`: The i-th row of output variable with shape [1, D]. + * :math:`\\tau`: Future context size. + * :math:`X_{j}`: The j-th row of input variable with shape [1, D]. + * :math:`W_{i-j}`: The (i-j)-th row of parameters with shape [1, D]. + + More details about row_conv please refer to the paper \ + (http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf) and + the design document \ + (https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645). + + Args: + input (Variable): Input variable, a 2D LoDTensor with shape [T, D]. + future_context_size (int): Future context size. + param_attr (ParamAttr): Attributes of parameters, including + name, initializer etc. + act (str): Non-linear activation to be applied to output variable. + + Returns: + Variable: The output tensor with same shape as input tensor. + + Examples: + .. code-block:: python + + x = fluid.layers.data(name='x', shape=[16], + dtype='float32', lod_level=1) + out = fluid.layers.row_conv(input=x, future_context_size=2) + """ + helper = LayerHelper('row_conv', **locals()) + dtype = helper.input_dtype() + filter_shape = [future_context_size + 1, input.shape[1]] + filter_param = helper.create_parameter( + attr=helper.param_attr, shape=filter_shape, dtype=dtype) + out = helper.create_tmp_variable(dtype) + helper.append_op( + type='row_conv', + inputs={'X': [input], + 'Filter': [filter_param]}, + outputs={'Out': [out]}) + return out From 2a0a576130b3b04f3555479f1850fd91dbba4d9a Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 16 Jan 2018 21:40:34 +0800 Subject: [PATCH 002/105] Add non-linear activation. --- python/paddle/v2/fluid/layers/nn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 7c694ed777..4546616d1a 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -1649,4 +1649,4 @@ def row_conv(input, future_context_size, param_attr=None, act=None): inputs={'X': [input], 'Filter': [filter_param]}, outputs={'Out': [out]}) - return out + return helper.append_activation(out) From d2a70243f1179654fd7224a4114cff5d984d424e Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 16 Jan 2018 13:33:13 +0800 Subject: [PATCH 003/105] Refine profiler and expose to Python. --- cmake/external/pybind11.cmake | 2 +- paddle/framework/CMakeLists.txt | 3 +- paddle/framework/executor.cc | 6 ++ paddle/platform/profiler.cc | 37 +++++++--- paddle/platform/profiler.h | 22 ++++-- paddle/platform/profiler_test.cc | 10 ++- paddle/pybind/CMakeLists.txt | 2 +- paddle/pybind/protobuf.cc | 70 +++---------------- paddle/pybind/protobuf.h | 1 + paddle/pybind/pybind.cc | 27 ++++++- python/paddle/v2/fluid/profiler.py | 45 ++++++++++++ python/paddle/v2/fluid/tests/test_profiler.py | 37 +++++++++- 12 files changed, 171 insertions(+), 91 deletions(-) diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake index 4e87dc49d8..ab23663695 100644 --- a/cmake/external/pybind11.cmake +++ b/cmake/external/pybind11.cmake @@ -26,7 +26,7 @@ ExternalProject_Add( extern_pybind ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY "https://github.com/pybind/pybind11.git" - GIT_TAG "v2.1.1" + GIT_TAG "v2.2.1" PREFIX ${PYBIND_SOURCE_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 597ea959f2..9bf712250d 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -68,7 +68,8 @@ cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op) cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) -cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table) +cc_library(executor SRCS executor.cc DEPS op_registry device_context scope +framework_proto backward glog lod_rank_table profiler) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index c0418c9266..d7233882e7 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" #include "paddle/platform/place.h" +#include "paddle/platform/profiler.h" DEFINE_bool(check_nan_inf, false, "Checking whether operator produce NAN/INF or not. It will be " @@ -116,6 +117,11 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, for (auto& op_desc : block.AllOps()) { auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); VLOG(3) << op->DebugStringEx(local_scope); + + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto dev_ctx = const_cast(pool.Get(place_)); + platform::RecordEvent record_event(op->Type(), dev_ctx); + op->Run(*local_scope, place_); if (FLAGS_check_nan_inf) { for (auto& vname : op->OutputVars(true)) { diff --git a/paddle/platform/profiler.cc b/paddle/platform/profiler.cc index 7e2e2d968e..8175b827c3 100644 --- a/paddle/platform/profiler.cc +++ b/paddle/platform/profiler.cc @@ -163,14 +163,17 @@ void EnableProfiler(ProfilerState state) { Mark("_start_profiler_", nullptr); } -std::vector> DisableProfiler() { - PADDLE_ENFORCE(g_state != ProfilerState::kDisabled, - "Can't disable profiling, since it's not starting."); - // Mark the profiling stop. - Mark("_stop_profiler_", nullptr); - g_state = ProfilerState::kDisabled; - std::vector> result; +void ResetProfiler() { std::lock_guard guard(g_all_event_lists_mutex); + for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end(); + ++it) { + (*it)->Clear(); + } +} + +std::vector> GetAllEvents() { + std::lock_guard guard(g_all_event_lists_mutex); + std::vector> result; for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end(); ++it) { result.emplace_back((*it)->Reduce()); @@ -178,6 +181,18 @@ std::vector> DisableProfiler() { return result; } +void DisableProfiler(EventSortingKey sorted_key) { + PADDLE_ENFORCE(g_state != ProfilerState::kDisabled, + "Can't disable profiling, since it's not starting."); + // Mark the profiling stop. + Mark("_stop_profiler_", nullptr); + g_state = ProfilerState::kDisabled; + + std::vector> all_events = GetAllEvents(); + ParseEvents(all_events, sorted_key); + ResetProfiler(); +} + void ParseEvents(std::vector>& events, EventSortingKey sorted_by) { if (g_profiler_place == "") return; @@ -291,12 +306,12 @@ void ParseEvents(std::vector>& events, } // Print report - PrintProfilingReport(events_table, sorted_domain, max_name_width + 4, 12); + PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12); } -void PrintProfilingReport(std::vector>& events_table, - std::string& sorted_domain, const size_t name_width, - const size_t data_width) { +void PrintProfiler(std::vector>& events_table, + std::string& sorted_domain, const size_t name_width, + const size_t data_width) { // Output header information std::cout << "\n------------------------->" << " Profiling Report " diff --git a/paddle/platform/profiler.h b/paddle/platform/profiler.h index 6df48ef880..85823af1d7 100644 --- a/paddle/platform/profiler.h +++ b/paddle/platform/profiler.h @@ -84,6 +84,8 @@ struct EventList { return result; } + void Clear() { event_blocks.clear(); } + std::forward_list> event_blocks; }; @@ -110,12 +112,9 @@ struct RecordEvent { std::string name_; }; -// Enable the profiling function. -void EnableProfiler(ProfilerState state); - // Return the event list of all threads. Asummed the returned value calls // event_lists, event_lists[i][j] represents the j-th Event of i-th thread. -std::vector> DisableProfiler(); +std::vector> GetAllEvents(); // The information of each event given in the profiling report struct EventItem { @@ -130,13 +129,22 @@ struct EventItem { // Candidate keys to sort the profiling report enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve }; +// Enable the profiling function. +void EnableProfiler(ProfilerState state); + +// Clear the g_all_event_lists, which is total event lists of all threads. +void ResetProfiler(); + +void DisableProfiler(EventSortingKey sorted_key); + // Parse the event list and output the profiling report void ParseEvents(std::vector>&, EventSortingKey sorted_by = EventSortingKey::kDefault); // Print results -void PrintProfilingReport(std::vector>& events_table, - std::string& sorted_domain, const size_t name_width, - const size_t data_width); +void PrintProfiler(std::vector>& events_table, + std::string& sorted_domain, const size_t name_width, + const size_t data_width); + } // namespace platform } // namespace paddle diff --git a/paddle/platform/profiler_test.cc b/paddle/platform/profiler_test.cc index 13dea713c7..81f10c9134 100644 --- a/paddle/platform/profiler_test.cc +++ b/paddle/platform/profiler_test.cc @@ -103,18 +103,14 @@ TEST(RecordEvent, RecordEvent) { // Bad Usage: PushEvent("event_without_pop", dev_ctx); PopEvent("event_without_push", dev_ctx); - std::vector> events = paddle::platform::DisableProfiler(); - // Will remove parsing-related code from test later - ParseEvents(events, EventSortingKey::kTotal); + std::vector> events = paddle::platform::GetAllEvents(); int cuda_startup_count = 0; int start_profiler_count = 0; - int stop_profiler_count = 0; for (size_t i = 0; i < events.size(); ++i) { for (size_t j = 0; j < events[i].size(); ++j) { if (events[i][j].name() == "_cuda_startup_") ++cuda_startup_count; if (events[i][j].name() == "_start_profiler_") ++start_profiler_count; - if (events[i][j].name() == "_stop_profiler_") ++stop_profiler_count; if (events[i][j].name() == "push") { EXPECT_EQ(events[i][j + 1].name(), "pop"); #ifdef PADDLE_WITH_CUDA @@ -127,5 +123,7 @@ TEST(RecordEvent, RecordEvent) { } EXPECT_EQ(cuda_startup_count % 5, 0); EXPECT_EQ(start_profiler_count, 1); - EXPECT_EQ(stop_profiler_count, 1); + + // Will remove parsing-related code from test later + DisableProfiler(EventSortingKey::kTotal); } diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 7b37430707..e78673e0ba 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,7 +1,7 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc const_value.cc - DEPS pybind python backward proto_desc paddle_memory executor prune init + DEPS pybind python backward proto_desc paddle_memory executor prune init profiler ${GLOB_OP_LIB}) if(NOT APPLE AND NOT ANDROID) target_link_libraries(paddle_pybind rt) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 4f95948153..d80f6b71e9 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -21,74 +21,24 @@ limitations under the License. */ #include "paddle/framework/program_desc.h" #include "paddle/framework/var_desc.h" -// Cast boost::variant for PyBind. -// Copy from -// https://github.com/pybind/pybind11/issues/576#issuecomment-269563199 +using boost::variant; + namespace pybind11 { namespace detail { -// Can be replaced by a generic lambda in C++14 -struct variant_caster_visitor : public boost::static_visitor { - return_value_policy policy; - handle parent; - - variant_caster_visitor(return_value_policy policy, handle parent) - : policy(policy), parent(parent) {} - - template - handle operator()(T const &src) const { - return make_caster::cast(src, policy, parent); - } -}; - -template -struct variant_caster; - -template