merge develop

test=develop
revert-15207-remove_op_handle_lock_and_fix_var
sneaxiy 6 years ago
commit db2daefe50

@ -54,7 +54,7 @@ option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF) option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF) option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF)
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
@ -254,6 +254,12 @@ elseif()
set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in MKL only now." FORCE) set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in MKL only now." FORCE)
endif() endif()
if (WITH_PROFILER)
find_package(Gperftools REQUIRED)
include_directories(${GPERFTOOLS_INCLUDE_DIR})
add_definitions(-DWITH_GPERFTOOLS)
endif()
include(generic) # simplify cmake module include(generic) # simplify cmake module
include(package) # set paddle packages include(package) # set paddle packages
include(ccache) # set ccache for compilation include(ccache) # set ccache for compilation

@ -2,8 +2,8 @@
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.1/getstarted/index_en.html) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.1/beginners_guide/index.html) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
@ -19,7 +19,7 @@ Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle. Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
### Latest PaddlePaddle Release: [Fluid 1.1.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.1) ### Latest PaddlePaddle Release: [Fluid 1.2.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.2)
### Install Latest Stable Release: ### Install Latest Stable Release:
``` ```
# Linux CPU # Linux CPU
@ -27,9 +27,9 @@ pip install paddlepaddle
# Linux GPU cuda9cudnn7 # Linux GPU cuda9cudnn7
pip install paddlepaddle-gpu pip install paddlepaddle-gpu
# Linux GPU cuda8cudnn7 # Linux GPU cuda8cudnn7
pip install paddlepaddle-gpu==1.1.0.post87 pip install paddlepaddle-gpu==1.2.0.post87
# Linux GPU cuda8cudnn5 # Linux GPU cuda8cudnn5
pip install paddlepaddle-gpu==1.1.0.post85 pip install paddlepaddle-gpu==1.2.0.post85
# For installation on other platform, refer to http://paddlepaddle.org/ # For installation on other platform, refer to http://paddlepaddle.org/
``` ```
@ -76,26 +76,26 @@ pip install paddlepaddle-gpu==1.1.0.post85
## Installation ## Installation
It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.1/beginners_guide/index.html) on our website. It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html) on our website.
## Documentation ## Documentation
We provide [English](http://paddlepaddle.org/documentation/docs/en/1.1/getstarted/index_en.html) and We provide [English](http://paddlepaddle.org/documentation/docs/en/1.2/getstarted/index_en.html) and
[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.1/beginners_guide/index.html) documentation. [Chinese](http://paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/index.html) documentation.
- [Deep Learning 101](https://github.com/PaddlePaddle/book) - [Deep Learning 101](https://github.com/PaddlePaddle/book)
You might want to start from this online interactive book that can run in a Jupyter Notebook. You might want to start from this online interactive book that can run in a Jupyter Notebook.
- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.1/user_guides/howto/training/cluster_howto.html) - [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.2/user_guides/howto/training/cluster_howto.html)
You can run distributed training jobs on MPI clusters. You can run distributed training jobs on MPI clusters.
- [Python API](http://paddlepaddle.org/documentation/api/zh/1.1/fluid.html) - [Python API](http://paddlepaddle.org/documentation/docs/zh/1.2/api_cn/index_cn.html)
Our new API enables much shorter programs. Our new API enables much shorter programs.
- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.1/advanced_usage/development/contribute_to_paddle.html) - [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.2/advanced_usage/development/contribute_to_paddle/index_cn.html)
We appreciate your contributions! We appreciate your contributions!

@ -0,0 +1,63 @@
# Tries to find Gperftools.
#
# Usage of this module as follows:
#
# find_package(Gperftools)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# Gperftools_ROOT_DIR Set this variable to the root installation of
# Gperftools if the module has problems finding
# the proper installation path.
#
# Variables defined by this module:
#
# GPERFTOOLS_FOUND System has Gperftools libs/headers
# GPERFTOOLS_LIBRARIES The Gperftools libraries (tcmalloc & profiler)
# GPERFTOOLS_INCLUDE_DIR The location of Gperftools headers
find_library(GPERFTOOLS_TCMALLOC
NAMES tcmalloc
HINTS ${Gperftools_ROOT_DIR}/lib)
find_library(GPERFTOOLS_PROFILER
NAMES profiler
HINTS ${Gperftools_ROOT_DIR}/lib)
find_library(GPERFTOOLS_TCMALLOC_AND_PROFILER
NAMES tcmalloc_and_profiler
HINTS ${Gperftools_ROOT_DIR}/lib)
find_path(GPERFTOOLS_INCLUDE_DIR
NAMES gperftools/heap-profiler.h
HINTS ${Gperftools_ROOT_DIR}/include)
set(GPERFTOOLS_LIBRARIES ${GPERFTOOLS_TCMALLOC_AND_PROFILER})
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
Gperftools
DEFAULT_MSG
GPERFTOOLS_LIBRARIES
GPERFTOOLS_INCLUDE_DIR)
mark_as_advanced(
Gperftools_ROOT_DIR
GPERFTOOLS_TCMALLOC
GPERFTOOLS_PROFILER
GPERFTOOLS_TCMALLOC_AND_PROFILER
GPERFTOOLS_LIBRARIES
GPERFTOOLS_INCLUDE_DIR)
# create IMPORTED targets
if (Gperftools_FOUND AND NOT TARGET gperftools::tcmalloc)
add_library(gperftools::tcmalloc UNKNOWN IMPORTED)
set_target_properties(gperftools::tcmalloc PROPERTIES
IMPORTED_LOCATION ${GPERFTOOLS_TCMALLOC}
INTERFACE_INCLUDE_DIRECTORIES "${GPERFTOOLS_INCLUDE_DIR}")
add_library(gperftools::profiler UNKNOWN IMPORTED)
set_target_properties(gperftools::profiler PROPERTIES
IMPORTED_LOCATION ${GPERFTOOLS_PROFILER}
INTERFACE_INCLUDE_DIRECTORIES "${GPERFTOOLS_INCLUDE_DIR}")
endif()

@ -86,6 +86,7 @@ endif(NOT WITH_GOLANG)
if(WITH_GPU) if(WITH_GPU)
add_definitions(-DPADDLE_WITH_CUDA) add_definitions(-DPADDLE_WITH_CUDA)
add_definitions(-DEIGEN_USE_GPU)
FIND_PACKAGE(CUDA REQUIRED) FIND_PACKAGE(CUDA REQUIRED)

@ -110,6 +110,14 @@ function(find_fluid_modules TARGET_NAME)
endif() endif()
endfunction(find_fluid_modules) endfunction(find_fluid_modules)
function(common_link TARGET_NAME)
if (WITH_PROFILER)
target_link_libraries(${TARGET_NAME} gperftools::profiler)
endif()
endfunction()
# find all third_party modules is used for paddle static library # find all third_party modules is used for paddle static library
# for reduce the dependency when building the inference libs. # for reduce the dependency when building the inference libs.
set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY) set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY)
@ -274,6 +282,7 @@ function(cc_library TARGET_NAME)
endif() endif()
target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
common_link(${TARGET_NAME})
endif() endif()
# cpplint code style # cpplint code style
@ -340,6 +349,7 @@ function(cc_binary TARGET_NAME)
if(cc_binary_DEPS) if(cc_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS})
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS}) add_dependencies(${TARGET_NAME} ${cc_binary_DEPS})
common_link(${TARGET_NAME})
endif() endif()
endfunction(cc_binary) endfunction(cc_binary)
@ -362,6 +372,7 @@ function(cc_test TARGET_NAME)
target_link_libraries(${TARGET_NAME} ${win32_deps}) target_link_libraries(${TARGET_NAME} ${win32_deps})
endif(WIN32) endif(WIN32)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS} COMMAND ${TARGET_NAME} ${cc_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
@ -420,6 +431,7 @@ function(nv_binary TARGET_NAME)
if(nv_binary_DEPS) if(nv_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS})
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS}) add_dependencies(${TARGET_NAME} ${nv_binary_DEPS})
common_link(${TARGET_NAME})
endif() endif()
endif() endif()
endfunction(nv_binary) endfunction(nv_binary)
@ -433,6 +445,7 @@ function(nv_test TARGET_NAME)
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
if (nv_test_SERIAL) if (nv_test_SERIAL)
set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
@ -499,6 +512,7 @@ function(hip_binary TARGET_NAME)
if(hip_binary_DEPS) if(hip_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS}) target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS})
add_dependencies(${TARGET_NAME} ${hip_binary_DEPS}) add_dependencies(${TARGET_NAME} ${hip_binary_DEPS})
common_link(${TARGET_NAME})
endif() endif()
endif() endif()
endfunction(hip_binary) endfunction(hip_binary)
@ -518,6 +532,7 @@ function(hip_test TARGET_NAME)
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP) set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags) target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags)
add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags) add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
endif() endif()
endfunction(hip_test) endfunction(hip_test)
@ -560,6 +575,7 @@ function(go_library TARGET_NAME)
endif() endif()
if(go_library_DEPS) if(go_library_DEPS)
add_dependencies(${TARGET_NAME} ${go_library_DEPS}) add_dependencies(${TARGET_NAME} ${go_library_DEPS})
common_link(${TARGET_NAME})
endif(go_library_DEPS) endif(go_library_DEPS)
# The "source file" of the library is `${dummyfile}` which never # The "source file" of the library is `${dummyfile}` which never

@ -66,6 +66,7 @@ paddle.fluid.layers.linear_chain_crf ArgSpec(args=['input', 'label', 'param_attr
paddle.fluid.layers.crf_decoding ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.crf_decoding ArgSpec(args=['input', 'param_attr', 'label'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.cos_sim ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.cos_sim ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.cross_entropy ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)) paddle.fluid.layers.cross_entropy ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100))
paddle.fluid.layers.bpr_loss ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.square_error_cost ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.square_error_cost ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.chunk_eval ArgSpec(args=['input', 'label', 'chunk_scheme', 'num_chunk_types', 'excluded_chunk_types'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None)) paddle.fluid.layers.sequence_conv ArgSpec(args=['input', 'num_filters', 'filter_size', 'filter_stride', 'padding', 'bias_attr', 'param_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(3, 1, None, None, None, None, None))

@ -1,6 +1,7 @@
add_subdirectory(memory) add_subdirectory(memory)
add_subdirectory(platform) add_subdirectory(platform)
add_subdirectory(framework) add_subdirectory(framework)
add_subdirectory(imperative)
add_subdirectory(operators) add_subdirectory(operators)
add_subdirectory(string) add_subdirectory(string)
add_subdirectory(recordio) add_subdirectory(recordio)

@ -129,11 +129,13 @@ cc_test(version_test SRCS version_test.cc DEPS version)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version)
if(NOT WIN32) if(WITH_NGRAPH)
cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph) if(NOT WIN32)
cc_library(ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph)
shape_inference data_transform lod_tensor profiler) cc_library(ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog
endif(NOT WIN32) shape_inference data_transform lod_tensor profiler ngraph)
endif(NOT WIN32)
endif(WITH_NGRAPH)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
@ -169,11 +171,15 @@ if(WITH_DISTRIBUTE)
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
else() else()
if(NOT WIN32) if(WITH_NGRAPH)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass ngraph_operator variable_helper) if(NOT WIN32)
else(NOT WIN32) cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass ngraph ngraph_operator variable_helper)
else(NOT WIN32)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
endif(NOT WIN32)
else(WITH_NGRAPH)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper) cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper)
endif(NOT WIN32) endif(WITH_NGRAPH)
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op) cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op)
endif() endif()

@ -151,19 +151,22 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
auto out_format = auto out_format =
platform::MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout)); platform::MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout));
void* in_data = GetDataFromTensor(in, in_type);
// output tensor has the same dims as input. Reorder don't change dims // output tensor has the same dims as input. Reorder don't change dims
out->Resize(in.dims()); out->Resize(in.dims());
auto out_data = out->mutable_data(expected_kernel_type.place_, in.type()); if (in_format != out_format) {
void* in_data = GetDataFromTensor(in, in_type);
auto in_memory = memory({{{in_tz}, in_type, in_format}, cpu_engine}, in_data); auto out_data = out->mutable_data(expected_kernel_type.place_, in.type());
auto out_memory =
memory({{{out_tz}, out_type, out_format}, cpu_engine}, out_data);
platform::Reorder(in_memory, out_memory); auto in_memory =
memory({{{in_tz}, in_type, in_format}, cpu_engine}, in_data);
auto out_memory =
memory({{{out_tz}, out_type, out_format}, cpu_engine}, out_data);
platform::Reorder(in_memory, out_memory);
} else {
out->ShareDataWith(in);
}
out->set_layout(out_layout); out->set_layout(out_layout);
// reset format since the out tensor will be feed to non-MKLDNN OPkernel // reset format since the out tensor will be feed to non-MKLDNN OPkernel
out->set_format(memory::format::format_undef); out->set_format(memory::format::format_undef);

@ -17,7 +17,6 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/ngraph_operator.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/transfer_scope_cache.h" #include "paddle/fluid/framework/transfer_scope_cache.h"
@ -26,6 +25,10 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#ifdef PADDLE_WITH_NGRAPH
#include "paddle/fluid/framework/ngraph_operator.h"
#endif
DECLARE_bool(benchmark); DECLARE_bool(benchmark);
DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run"); DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run");
DEFINE_bool(use_ngraph, false, "Use NGRAPH to run"); DEFINE_bool(use_ngraph, false, "Use NGRAPH to run");
@ -88,11 +91,11 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
static void EnableFusedOp(ExecutorPrepareContext* ctx) { static void EnableFusedOp(ExecutorPrepareContext* ctx) {
#ifdef PADDLE_WITH_NGRAPH #ifdef PADDLE_WITH_NGRAPH
VLOG(3) << "use_ngraph=True"; VLOG(3) << "use_ngraph=True";
auto intervals = FusedOperator::FusedOpIntervals(&ctx->ops_); auto intervals = NgraphOperator::NgraphOpIntervals(&ctx->ops_);
for (auto& interval : intervals) { for (auto& interval : intervals) {
auto* fused_op = new FusedOperator(ctx->prog_, ctx->block_id_, auto* ng_op = new NgraphOperator(ctx->prog_, ctx->block_id_, interval.at(0),
interval.at(0), interval.at(1)); interval.at(1));
*interval[0] = std::unique_ptr<OperatorBase>(fused_op); *interval[0] = std::unique_ptr<OperatorBase>(ng_op);
} }
for (auto it = intervals.rbegin(); it != intervals.rend(); ++it) { for (auto it = intervals.rbegin(); it != intervals.rend(); ++it) {
ctx->ops_.erase(it->at(0) + 1, it->at(1)); ctx->ops_.erase(it->at(0) + 1, it->at(1));

@ -16,7 +16,9 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
@ -53,5 +55,12 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
return tensor; return tensor;
} }
LoDTensor& GetVariableTensor(const Scope& scope, const std::string& var_name) {
Variable* var = scope.FindVar(var_name);
PADDLE_ENFORCE(var, "%s no in scope", var_name);
PADDLE_ENFORCE(var->IsType<LoDTensor>(), "Only support lod tensor now.");
return *var->GetMutable<LoDTensor>();
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -27,5 +27,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name, LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
size_t index); size_t index);
LoDTensor& GetVariableTensor(const Scope& scope, const std::string& var_name);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -38,9 +38,8 @@ void CheckProgram(const ProgramDesc &program) {
switch (role_id) { switch (role_id) {
case _INT(OpRole::kForward): case _INT(OpRole::kForward):
if (visit.find(_INT(OpRole::kBackward)) != visit.end()) { if (visit.find(_INT(OpRole::kBackward)) != visit.end()) {
LOG(ERROR) LOG(ERROR) << "Cannot add backward operator before forward operator "
<< "Cannot add backward operator before forward operator %s." << op->Type();
<< op->Type();
} }
break; break;
case _INT(OpRole::kBackward): case _INT(OpRole::kBackward):

@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> IsTestPass::ApplyImpl(
for (const Node* n : graph->Nodes()) { for (const Node* n : graph->Nodes()) {
if (n->IsOp()) { if (n->IsOp()) {
auto* op = n->Op(); auto* op = n->Op();
if (n->RuntimeHasAttr("is_test")) { if (op->HasAttr("is_test") || op->HasProtoAttr("is_test")) {
op->SetAttr("is_test", true); op->SetAttr("is_test", true);
} else if (std::find(begin(op_list), end(op_list), op->Type()) != } else if (std::find(begin(op_list), end(op_list), op->Type()) !=
end(op_list)) { end(op_list)) {

@ -104,9 +104,9 @@ TEST(IsTestPass, basic) {
auto* op = node->Op(); auto* op = node->Op();
auto op_name = boost::get<std::string>(op->GetAttr("name")); auto op_name = boost::get<std::string>(op->GetAttr("name"));
if (op_name == "conv3") { if (op_name == "conv3") {
ASSERT_FALSE(node->RuntimeHasAttr("is_test")); ASSERT_FALSE(op->HasAttr("is_test"));
} else { } else {
ASSERT_TRUE(node->RuntimeHasAttr("is_test")); ASSERT_TRUE(op->HasAttr("is_test"));
EXPECT_TRUE(boost::get<bool>(op->GetAttr("is_test"))); EXPECT_TRUE(boost::get<bool>(op->GetAttr("is_test")));
} }
} }

@ -25,12 +25,15 @@ std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
const auto& op_types_list = const auto& op_types_list =
Get<std::unordered_set<std::string>>("mkldnn_enabled_op_types"); Get<std::unordered_set<std::string>>("mkldnn_enabled_op_types");
for (const Node* n : graph->Nodes()) { for (const Node* n : graph->Nodes()) {
if (n->IsOp() && n->RuntimeHasAttr("use_mkldnn")) { if (n->IsOp()) {
if (op_types_list.empty()) { auto* op = n->Op();
n->Op()->SetAttr("use_mkldnn", true); if (op->HasAttr("use_mkldnn") || op->HasProtoAttr("use_mkldnn")) {
} else if (std::find(op_types_list.begin(), op_types_list.end(), if (op_types_list.empty()) {
n->Name()) != op_types_list.end()) { op->SetAttr("use_mkldnn", true);
n->Op()->SetAttr("use_mkldnn", true); } else if (std::find(op_types_list.begin(), op_types_list.end(),
n->Name()) != op_types_list.end()) {
op->SetAttr("use_mkldnn", true);
}
} }
} }
} }

@ -30,28 +30,6 @@ std::unique_ptr<Node> CreateNodeForTest(const std::string &name,
return std::unique_ptr<Node>(new Node(name, type)); return std::unique_ptr<Node>(new Node(name, type));
} }
bool Node::RuntimeHasAttr(const std::string &name) const {
if (Op()->HasAttr(name)) {
return true;
} else {
auto &op_info = OpInfoMap::Instance();
auto op_type = Op()->Type();
if (op_info.Has(op_type)) {
auto op_info_ptr = op_info.Get(op_type);
if (op_info_ptr.HasOpProtoAndChecker()) {
const proto::OpProto &proto = op_info_ptr.Proto();
for (int i = 0; i != proto.attrs_size(); ++i) {
const proto::OpProto::Attr &attr = proto.attrs(i);
if (attr.name() == name) {
return true;
}
}
}
}
}
return false;
}
} // namespace ir } // namespace ir
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -108,18 +108,6 @@ class Node {
Name().find(ir::Node::kControlDepVarName) != std::string::npos; Name().find(ir::Node::kControlDepVarName) != std::string::npos;
} }
// RuntimeHasAttr is different with HasAttr now.
// 1. For Op()->HasAttr(), it judges whether a stored program_desc_ has attr,
// thus, if stored program_desc_ are old which don't have an attr, a new
// library which adds the attr already will fail on this function.
// Details:
// https://github.com/PaddlePaddle/Paddle/pull/14608#issuecomment-442309087
// 2. For Op()->RuntimeHasAttr, it judges the attr in runtime to avoid above
// problem.
// TODO(luotao): Maybe we should enhance HasAttr later, instead of adding
// RuntimeHasAttr.
bool RuntimeHasAttr(const std::string& name) const;
std::vector<Node*> inputs; std::vector<Node*> inputs;
std::vector<Node*> outputs; std::vector<Node*> outputs;

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm> #include <algorithm>
#include <functional> #include <functional>
#include <vector> #include <vector>
@ -27,14 +26,15 @@ namespace paddle {
namespace framework { namespace framework {
static std::shared_ptr<ngraph::Node> GetNode( static std::shared_ptr<ngraph::Node> GetNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm, const std::shared_ptr<OperatorBase>& op, const std::string name,
const VariableNameMap& var_map, const VariableNameMap& var_map,
std::shared_ptr< std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>> std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) { ngb_node_map) {
auto& var_names = var_map.at(prm); auto& var_names = var_map.at(name);
PADDLE_ENFORCE_EQ(var_names.size(), 1, PADDLE_ENFORCE_EQ(var_names.size(), 1,
"op %s prm %s expects one associated var", op->Type(), prm); "op %s name %s expects one associated var", op->Type(),
name);
if (ngb_node_map->find(var_names[0]) != ngb_node_map->end()) { if (ngb_node_map->find(var_names[0]) != ngb_node_map->end()) {
return (*ngb_node_map)[var_names[0]]; return (*ngb_node_map)[var_names[0]];
} else { } else {
@ -43,42 +43,42 @@ static std::shared_ptr<ngraph::Node> GetNode(
} }
static std::shared_ptr<ngraph::Node> GetInputNode( static std::shared_ptr<ngraph::Node> GetInputNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm, const std::shared_ptr<OperatorBase>& op, const std::string name,
std::shared_ptr< std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>> std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) { ngb_node_map) {
return GetNode(op, prm, op->Inputs(), ngb_node_map); return GetNode(op, name, op->Inputs(), ngb_node_map);
} }
static std::shared_ptr<ngraph::Node> GetOutputNode( static std::shared_ptr<ngraph::Node> GetOutputNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm, const std::shared_ptr<OperatorBase>& op, const std::string name,
std::shared_ptr< std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>> std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) { ngb_node_map) {
return GetNode(op, prm, op->Outputs(), ngb_node_map); return GetNode(op, name, op->Outputs(), ngb_node_map);
} }
static void SetOutputNode( static void SetOutputNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm, const std::shared_ptr<OperatorBase>& op, const std::string name,
std::shared_ptr<ngraph::Node> node, std::shared_ptr<ngraph::Node> node,
std::shared_ptr< std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>> std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) { ngb_node_map) {
auto& var_names = op->Outputs().at(prm); auto& var_names = op->Outputs().at(name);
if (var_names.size() == 1) { if (var_names.size() == 1) {
(*ngb_node_map)[var_names[0]] = node; (*ngb_node_map)[var_names[0]] = node;
} else if (var_names.size() == 0) { } else if (var_names.size() == 0) {
(*ngb_node_map)[""] = node; (*ngb_node_map)[""] = node;
} else { } else {
PADDLE_THROW("prm %s has more than 1 var_names.", prm); PADDLE_THROW("name %s has more than 1 var_names.", name);
} }
} }
static bool HasOutput(const std::shared_ptr<OperatorBase>& op, static bool HasOutput(const std::shared_ptr<OperatorBase>& op,
const std::string prm) { const std::string name) {
auto& outputs = op->Outputs(); auto& outputs = op->Outputs();
if (outputs.find(prm) == outputs.end()) return false; if (outputs.find(name) == outputs.end()) return false;
return outputs.at(prm).size() > 0; return outputs.at(name).size() > 0;
} }
template <typename T> template <typename T>
@ -118,4 +118,3 @@ void NgraphBridge::BuildNgNode(const std::shared_ptr<OperatorBase>& op) {
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#endif

@ -14,8 +14,6 @@ limitations under the License. */
#pragma once #pragma once
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <string> #include <string>
@ -53,4 +51,3 @@ class NgraphBridge {
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#endif

@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <glog/logging.h> #include <glog/logging.h>
#include <algorithm> #include <algorithm>
@ -58,16 +57,16 @@ typedef enum { /* nGraph support state on ops */
} op_state; } op_state;
// perform graph build through bridge and execute computation // perform graph build through bridge and execute computation
class NgraphOperator { class NgraphEngine {
public: public:
explicit NgraphOperator(const Scope& scope, const platform::Place& place, explicit NgraphEngine(const Scope& scope, const platform::Place& place,
const std::vector<std::shared_ptr<OperatorBase>>& ops, const std::vector<std::shared_ptr<OperatorBase>>& ops,
const std::unordered_map< const std::unordered_map<
std::string, ngraph::element::Type>& var_type_map, std::string, ngraph::element::Type>& var_type_map,
const std::unordered_set<std::string>& persist, const std::unordered_set<std::string>& persist,
const std::unordered_set<std::string>& fetches, const std::unordered_set<std::string>& fetches,
const std::unordered_set<std::string>& post_op_inputs, const std::unordered_set<std::string>& post_op_inputs,
op_state ng_op_state) op_state ng_op_state)
: scope_(scope), : scope_(scope),
place_(place), place_(place),
fused_ops_(ops), fused_ops_(ops),
@ -132,7 +131,7 @@ class NgraphOperator {
}; };
std::vector<std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>> std::vector<std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>>
FusedOperator::FusedOpIntervals( NgraphOperator::NgraphOpIntervals(
std::vector<std::unique_ptr<paddle::framework::OperatorBase>>* ops) { std::vector<std::unique_ptr<paddle::framework::OperatorBase>>* ops) {
std::vector<std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>> std::vector<std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>>
intervals; intervals;
@ -185,7 +184,7 @@ FusedOperator::FusedOpIntervals(
return intervals; return intervals;
} }
FusedOperator::FusedOperator( NgraphOperator::NgraphOperator(
const ProgramDesc& prog, size_t block_id, const ProgramDesc& prog, size_t block_id,
std::vector<std::unique_ptr<OperatorBase>>::iterator start, std::vector<std::unique_ptr<OperatorBase>>::iterator start,
std::vector<std::unique_ptr<OperatorBase>>::iterator end, std::vector<std::unique_ptr<OperatorBase>>::iterator end,
@ -215,7 +214,7 @@ FusedOperator::FusedOperator(
Process(); Process();
} }
void FusedOperator::Process() { void NgraphOperator::Process() {
auto& bdesc = pdesc_.Block(block_); auto& bdesc = pdesc_.Block(block_);
for (auto& var : bdesc.AllVars()) { for (auto& var : bdesc.AllVars()) {
if (!(var->GetType() == proto::VarType::SELECTED_ROWS || if (!(var->GetType() == proto::VarType::SELECTED_ROWS ||
@ -251,8 +250,8 @@ void FusedOperator::Process() {
} }
} }
void FusedOperator::RunImpl(const Scope& scope, void NgraphOperator::RunImpl(const Scope& scope,
const platform::Place& place) const { const platform::Place& place) const {
op_state ng_op_state = PARTIAL_TEST; op_state ng_op_state = PARTIAL_TEST;
auto& bdesc = pdesc_.Block(block_); auto& bdesc = pdesc_.Block(block_);
for (auto* op : bdesc.AllOps()) { for (auto* op : bdesc.AllOps()) {
@ -266,19 +265,19 @@ void FusedOperator::RunImpl(const Scope& scope,
ng_op_state = ng_op_state == PARTIAL_TEST ? FULL_TEST : FULL_TRAIN; ng_op_state = ng_op_state == PARTIAL_TEST ? FULL_TEST : FULL_TRAIN;
} }
NgraphOperator ngraph_op(scope, place, fused_ops_, var_type_map_, NgraphEngine ngraph_engine(scope, place, fused_ops_, var_type_map_,
persistables_, fetches_, post_op_inputs_, persistables_, fetches_, post_op_inputs_,
ng_op_state); ng_op_state);
ngraph_op.Run(scope, place); ngraph_engine.Run(scope, place);
} }
std::unordered_map<std::string, std::shared_ptr<ngraph::Function>> std::unordered_map<std::string, std::shared_ptr<ngraph::Function>>
NgraphOperator::func_cache_ = {}; NgraphEngine::func_cache_ = {};
std::shared_ptr<ngraph::runtime::Backend> NgraphOperator::backend_ = std::shared_ptr<ngraph::runtime::Backend> NgraphEngine::backend_ =
ngraph::runtime::Backend::create("CPU"); ngraph::runtime::Backend::create("CPU");
void NgraphOperator::GetNgInputShape(std::shared_ptr<OperatorBase> op) { void NgraphEngine::GetNgInputShape(std::shared_ptr<OperatorBase> op) {
op->RuntimeInferShape(scope_, place_); op->RuntimeInferShape(scope_, place_);
for (auto& var_name_item : op->Inputs()) { for (auto& var_name_item : op->Inputs()) {
for (auto& var_name : var_name_item.second) { for (auto& var_name : var_name_item.second) {
@ -301,7 +300,7 @@ void NgraphOperator::GetNgInputShape(std::shared_ptr<OperatorBase> op) {
} }
} }
void NgraphOperator::BuildNgNodes() { void NgraphEngine::BuildNgNodes() {
for (auto& var_name : var_out_) { for (auto& var_name : var_out_) {
if (var_node_map_->find(var_name) == var_node_map_->end()) { if (var_node_map_->find(var_name) == var_node_map_->end()) {
auto* var = scope_.FindVar(var_name); auto* var = scope_.FindVar(var_name);
@ -323,7 +322,7 @@ void NgraphOperator::BuildNgNodes() {
} }
} }
void NgraphOperator::BuildNgIO() { void NgraphEngine::BuildNgIO() {
std::unordered_set<std::string> inputs; std::unordered_set<std::string> inputs;
std::unordered_set<std::string> outputs; std::unordered_set<std::string> outputs;
@ -395,7 +394,7 @@ void NgraphOperator::BuildNgIO() {
} }
} }
void NgraphOperator::BuildNgFunction() { void NgraphEngine::BuildNgFunction() {
BuildNgNodes(); BuildNgNodes();
ngraph_function_ = nullptr; ngraph_function_ = nullptr;
ngraph::NodeVector func_outputs; ngraph::NodeVector func_outputs;
@ -416,7 +415,7 @@ void NgraphOperator::BuildNgFunction() {
std::make_shared<ngraph::Function>(func_outputs, func_inputs); std::make_shared<ngraph::Function>(func_outputs, func_inputs);
} }
std::shared_ptr<std::string> NgraphOperator::GetCacheKey() { std::shared_ptr<std::string> NgraphEngine::GetCacheKey() {
auto cache_key = std::make_shared<std::string>(""); auto cache_key = std::make_shared<std::string>("");
*cache_key += std::to_string(fused_ops_.size()); *cache_key += std::to_string(fused_ops_.size());
for (auto& op : fused_ops_) { for (auto& op : fused_ops_) {
@ -444,7 +443,7 @@ std::shared_ptr<std::string> NgraphOperator::GetCacheKey() {
return cache_key; return cache_key;
} }
void NgraphOperator::GetNgFunction() { void NgraphEngine::GetNgFunction() {
bool cache_on = true; bool cache_on = true;
if (cache_on) { if (cache_on) {
std::string cache_key_val = *GetCacheKey(); std::string cache_key_val = *GetCacheKey();
@ -459,8 +458,7 @@ void NgraphOperator::GetNgFunction() {
} }
} }
void NgraphOperator::Run(const Scope& scope, void NgraphEngine::Run(const Scope& scope, const platform::Place& place) const {
const platform::Place& place) const {
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> t_in; std::vector<std::shared_ptr<ngraph::runtime::Tensor>> t_in;
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> t_out; std::vector<std::shared_ptr<ngraph::runtime::Tensor>> t_out;
@ -545,7 +543,6 @@ void NgraphOperator::Run(const Scope& scope,
} }
backend_->call(ngraph_function_, t_out, t_in); backend_->call(ngraph_function_, t_out, t_in);
} // NgraphOperator::RunImpl } // NgraphEngine::RunImpl
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#endif

@ -14,8 +14,6 @@ limitations under the License. */
#pragma once #pragma once
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm> #include <algorithm>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
@ -34,14 +32,14 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class FusedOperator : public OperatorBase { class NgraphOperator : public OperatorBase {
public: public:
static std::vector< static std::vector<
std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>> std::vector<std::vector<std::unique_ptr<OperatorBase>>::iterator>>
FusedOpIntervals( NgraphOpIntervals(
std::vector<std::unique_ptr<paddle::framework::OperatorBase>>* ops); std::vector<std::unique_ptr<paddle::framework::OperatorBase>>* ops);
explicit FusedOperator( explicit NgraphOperator(
const ProgramDesc& prog, size_t block_id, const ProgramDesc& prog, size_t block_id,
std::vector<std::unique_ptr<OperatorBase>>::iterator start, std::vector<std::unique_ptr<OperatorBase>>::iterator start,
std::vector<std::unique_ptr<OperatorBase>>::iterator end, std::vector<std::unique_ptr<OperatorBase>>::iterator end,
@ -64,4 +62,3 @@ class FusedOperator : public OperatorBase {
}; };
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#endif

@ -239,6 +239,23 @@ void OpDesc::SetOutput(const std::string &param_name,
this->outputs_[param_name] = args; this->outputs_[param_name] = args;
} }
bool OpDesc::HasProtoAttr(const std::string &name) const {
auto &op_info = OpInfoMap::Instance();
if (op_info.Has(desc_.type())) {
auto op_info_ptr = op_info.Get(desc_.type());
if (op_info_ptr.HasOpProtoAndChecker()) {
const proto::OpProto &proto = op_info_ptr.Proto();
for (int i = 0; i != proto.attrs_size(); ++i) {
const proto::OpProto::Attr &attr = proto.attrs(i);
if (attr.name() == name) {
return true;
}
}
}
}
return false;
}
proto::AttrType OpDesc::GetAttrType(const std::string &name) const { proto::AttrType OpDesc::GetAttrType(const std::string &name) const {
auto it = attrs_.find(name); auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);

@ -65,6 +65,8 @@ class OpDesc {
return attrs_.find(name) != attrs_.end(); return attrs_.find(name) != attrs_.end();
} }
bool HasProtoAttr(const std::string &name) const;
proto::AttrType GetAttrType(const std::string &name) const; proto::AttrType GetAttrType(const std::string &name) const;
std::vector<std::string> AttrNames() const; std::vector<std::string> AttrNames() const;

@ -30,13 +30,36 @@ limitations under the License. */
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#endif
DEFINE_string(pe_profile_fname, "",
"Profiler filename for PE, which generated by gperftools."
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable.");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
static std::once_flag gProfileOnce;
#ifdef WITH_GPERFTOOLS
static bool gProfileStarted = false;
#endif
class ParallelExecutorPrivate { class ParallelExecutorPrivate {
public: public:
explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places) explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places)
: places_(places) {} : places_(places) {
if (!FLAGS_pe_profile_fname.empty()) {
std::call_once(gProfileOnce, [] {
#ifdef WITH_GPERFTOOLS
ProfilerStart(FLAGS_pe_profile_fname.c_str());
gProfileStarted = true;
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_pe_profile_fname will be ignored";
#endif
});
}
}
~ParallelExecutorPrivate() { ~ParallelExecutorPrivate() {
if (own_local_scope_) { if (own_local_scope_) {
@ -270,6 +293,12 @@ void ParallelExecutor::BCastParamsToDevices(
void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors, void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
const std::string &fetched_var_name) { const std::string &fetched_var_name) {
#ifdef WITH_GPERFTOOLS
if (gProfileStarted) {
ProfilerFlush();
}
#endif
platform::RecordBlock b(0); platform::RecordBlock b(0);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (!gcs_.empty()) { if (!gcs_.empty()) {

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save