Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into data_reader

wangkuiyi-patch-2
weixing02 7 years ago
commit b858c10319

@ -1,6 +1,6 @@
# A image for building paddle binaries # A image for building paddle binaries
# Use cuda devel base image for both cpu and gpu environment # Use cuda devel base image for both cpu and gpu environment
FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04 FROM nvidia/cuda:8.0-cudnn7-devel-ubuntu16.04
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com> MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ARG UBUNTU_MIRROR ARG UBUNTU_MIRROR

@ -62,10 +62,9 @@ endif()
## Then find the reference-cblas. www.netlib.org/blas/ ## Then find the reference-cblas. www.netlib.org/blas/
set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH
"Folder contains reference-cblas") "Folder contains reference-cblas")
if(NOT CMAKE_CROSSCOMPILING)
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
${REFERENCE_CBLAS_ROOT}/include ${REFERENCE_CBLAS_ROOT}/include
/usr/include /usr/include
@ -78,6 +77,11 @@ set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
/usr/lib/blas/reference/ /usr/lib/blas/reference/
/usr/lib/reference/ /usr/lib/reference/
) )
else()
# Disable the finding of reference cblas under host's system path
set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include)
set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib)
endif()
find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS
${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS}) ${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS})

@ -24,16 +24,16 @@ SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE) SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE) SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
IF(APPLE) IF(APPLE)
SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin | sed "s/-Werror//g" | sh) SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
ELSE() ELSE()
SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin) SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin)
ENDIF() ENDIF()
ExternalProject_Add( ExternalProject_Add(
extern_grpc extern_grpc
DEPENDS protobuf zlib DEPENDS protobuf zlib
GIT_REPOSITORY "https://github.com/grpc/grpc.git" GIT_REPOSITORY "https://github.com/grpc/grpc.git"
GIT_TAG "v1.8.x" GIT_TAG "v1.11.x"
PREFIX ${GRPC_SOURCES_DIR} PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""

@ -11,19 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#
IF(MOBILE_INFERENCE) if(MOBILE_INFERENCE OR RPI)
return() return()
ENDIF() endif()
include (ExternalProject) include (ExternalProject)
# NOTE: snappy is needed when linking with recordio # NOTE: snappy is needed when linking with recordio
SET(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy) set(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
SET(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy) set(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy)
SET(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include/" CACHE PATH "snappy include directory." FORCE) set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy include directory." FORCE)
set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
ExternalProject_Add( ExternalProject_Add(
extern_snappy extern_snappy
@ -51,8 +52,7 @@ ExternalProject_Add(
) )
add_library(snappy STATIC IMPORTED GLOBAL) add_library(snappy STATIC IMPORTED GLOBAL)
set_property(TARGET snappy PROPERTY IMPORTED_LOCATION set_property(TARGET snappy PROPERTY IMPORTED_LOCATION ${SNAPPY_LIBRARIES})
"${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
include_directories(${SNAPPY_INCLUDE_DIR}) include_directories(${SNAPPY_INCLUDE_DIR})
add_dependencies(snappy extern_snappy) add_dependencies(snappy extern_snappy)

@ -11,9 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#
IF(MOBILE_INFERENCE) IF(MOBILE_INFERENCE OR RPI)
return() return()
ENDIF() ENDIF()
@ -21,9 +20,11 @@ include (ExternalProject)
# NOTE: snappy is needed when linking with recordio # NOTE: snappy is needed when linking with recordio
SET(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream) set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream)
SET(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream) set(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream)
SET(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include/" CACHE PATH "snappy stream include directory." FORCE) set(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include" CACHE PATH "snappy stream include directory." FORCE)
set(SNAPPYSTREAM_LIBRARIES "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
ExternalProject_Add( ExternalProject_Add(
extern_snappystream extern_snappystream
@ -51,8 +52,7 @@ ExternalProject_Add(
) )
add_library(snappystream STATIC IMPORTED GLOBAL) add_library(snappystream STATIC IMPORTED GLOBAL)
set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION ${SNAPPYSTREAM_LIBRARIES})
"${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) # For snappysteam to include its own headers. include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) # For snappysteam to include its own headers.
include_directories(${THIRD_PARTY_PATH}/install) # For Paddle to include snappy stream headers. include_directories(${THIRD_PARTY_PATH}/install) # For Paddle to include snappy stream headers.

@ -195,14 +195,7 @@ function(cc_library TARGET_NAME)
list(REMOVE_ITEM cc_library_DEPS warpctc) list(REMOVE_ITEM cc_library_DEPS warpctc)
add_dependencies(${TARGET_NAME} warpctc) add_dependencies(${TARGET_NAME} warpctc)
endif() endif()
if("${cc_library_DEPS}" MATCHES "ARCHIVE_START")
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS).
# WARNING: Please don't use ARCHIVE_START&ARCHIVE_END if TARGET_NAME will be linked by other libraries.
target_circle_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
list(REMOVE_ITEM cc_library_DEPS ARCHIVE_START ARCHIVE_END)
else()
target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
endif() endif()
@ -243,11 +236,7 @@ function(cc_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS) set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS}) add_executable(${TARGET_NAME} ${cc_test_SRCS})
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
if("${cc_test_DEPS}" MATCHES "ARCHIVE_START")
list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END)
endif()
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS} COMMAND ${TARGET_NAME} ${cc_test_ARGS}

@ -1,7 +1,22 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set_property(GLOBAL PROPERTY FLUID_MODULES "") set_property(GLOBAL PROPERTY FLUID_MODULES "")
# find all fluid modules is used for paddle fluid static library # find all fluid modules is used for paddle fluid static library
function(find_fluid_modules TARGET_NAME) function(find_fluid_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "fluid" pos) string(FIND "${__target_path}" "fluid" pos)
if(pos GREATER 1) if(pos GREATER 1)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
@ -77,6 +92,23 @@ elseif (WITH_MKLML)
) )
endif() endif()
if(NOT MOBILE_INFERENCE AND NOT RPI)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy")
copy(snappy_lib
SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappystream")
copy(snappystream_lib
SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/zlib")
copy(zlib_lib
SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
endif()
# paddle fluid module # paddle fluid module
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid") set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid")

@ -119,7 +119,7 @@ An actual Fluid example is described [here](https://github.com/PaddlePaddle/Pad
From the example, the Fluid programs look very similar to their PyTorch equivalent programs, except that Fluid's loop structure, wrapped with Python's `with` statement, could run much faster than just a Python loop. From the example, the Fluid programs look very similar to their PyTorch equivalent programs, except that Fluid's loop structure, wrapped with Python's `with` statement, could run much faster than just a Python loop.
We have more examples of the [`if-then-else`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/if_else_op.md) structure of Fluid. We have more examples of the [`if-then-else`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/execution/if_else_op.md) structure of Fluid.
## Turing Completeness ## Turing Completeness

@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY)
endif() endif()
add_subdirectory(testing) add_subdirectory(testing)
if(NOT MOBILE_INFERENCE AND NOT ANDROID AND NOT IOS) if(NOT MOBILE_INFERENCE AND NOT RPI)
add_subdirectory(fluid) add_subdirectory(fluid)
endif() endif()

@ -3,6 +3,7 @@ add_subdirectory(platform)
add_subdirectory(framework) add_subdirectory(framework)
add_subdirectory(operators) add_subdirectory(operators)
add_subdirectory(pybind) add_subdirectory(pybind)
add_subdirectory(inference)
add_subdirectory(string) add_subdirectory(string)
add_subdirectory(recordio) add_subdirectory(recordio)
# NOTE: please add subdirectory inference at last.
add_subdirectory(inference)

@ -92,7 +92,7 @@ class BlockDesc {
/* /*
* Remove Op and its input/output variables. * Remove Op and its input/output variables.
* Note that for either input or ouput variable, if it is also an input or * Note that for either input or output variable, if it is also an input or
* output variable of other ops, we should remain it. * output variable of other ops, we should remain it.
*/ */
void RemoveOp(size_t s, size_t e); void RemoveOp(size_t s, size_t e);

@ -14,6 +14,8 @@
#include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/computation_op_handle.h"
#include <string>
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
@ -33,7 +35,7 @@ void ComputationOpHandle::RunImpl() {
} }
} }
op_->Run(*scope_->FindVar("@TMP_SCOPE@")->Get<Scope *>(), place_); op_->Run(*scope_->FindVar(kLocalExecScopeName)->Get<Scope *>(), place_);
} }
std::string ComputationOpHandle::Name() const { return op_->Type(); } std::string ComputationOpHandle::Name() const { return op_->Type(); }

@ -14,6 +14,9 @@
#include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/fetch_op_handle.h"
#include <string>
#include <vector>
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
@ -57,7 +60,10 @@ void FetchOpHandle::RunImpl() {
for (size_t i = 0; i < scopes.size(); ++i) { for (size_t i = 0; i < scopes.size(); ++i) {
auto &scope = scopes[i]; auto &scope = scopes[i];
auto &t = scope->FindVar(var_name)->Get<framework::LoDTensor>(); auto &t = scope->FindVar(kLocalExecScopeName)
->Get<Scope *>()
->FindVar(var_name)
->Get<framework::LoDTensor>();
if (platform::is_gpu_place(var->place_)) { if (platform::is_gpu_place(var->place_)) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]); TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]);

@ -24,6 +24,8 @@ namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
constexpr char kLocalExecScopeName[] = "@LCOAL_SCOPE@";
class OpHandleBase { class OpHandleBase {
private: private:
DISABLE_COPY_AND_ASSIGN(OpHandleBase); DISABLE_COPY_AND_ASSIGN(OpHandleBase);

@ -15,13 +15,15 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/ssa_graph.h" #include "paddle/fluid/framework/details/ssa_graph.h"
#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/feed_fetch_type.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
class SSAGraphExecutor { class SSAGraphExecutor {
DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor); DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor);

@ -136,12 +136,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
ready_ops.clear(); ready_ops.clear();
}; };
// Create local scopes.
for (auto &scope : local_scopes_) {
auto &local_scope = scope->NewScope();
*scope->Var("@TMP_SCOPE@")->GetMutable<Scope *>() = &local_scope;
}
// Step 3. Execution // Step 3. Execution
while (!pending_vars.empty() || !ready_ops.empty() || !delayed_ops.empty()) { while (!pending_vars.empty() || !ready_ops.empty() || !delayed_ops.empty()) {
// 1. Run All Ready ops // 1. Run All Ready ops
@ -189,34 +183,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
PADDLE_ENFORCE(ready_ops.empty()); PADDLE_ENFORCE(ready_ops.empty());
PADDLE_ENFORCE(delayed_ops.empty()); PADDLE_ENFORCE(delayed_ops.empty());
PADDLE_ENFORCE(blocked_by_delayed_ops.empty()); PADDLE_ENFORCE(blocked_by_delayed_ops.empty());
++computation_count_;
auto sync_computation = [&] {
computation_count_ = 0;
// Wait All computational streams
for (auto p : this->places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait();
}
for (auto &scope : local_scopes_) {
scope->DropKids();
}
};
// Wait FetchOps. // Wait FetchOps.
if (!fetch_ops.empty()) { if (!fetch_ops.empty()) {
fetch_ops.clear(); fetch_ops.clear();
sync_computation();
}
if (computation_count_ == max_async_computation) {
sync_computation();
}
// NOTE: the temp scope can be dropped lazily if needed.
// Drop tmp scopes;
for (auto &scope : local_scopes_) {
auto &kid = *scope->Var("@TMP_SCOPE@")->GetMutable<Scope *>();
kid = nullptr;
} }
return fetch_data; return fetch_data;

@ -99,9 +99,6 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
std::unique_ptr<platform::EnforceNotMet> exception_; std::unique_ptr<platform::EnforceNotMet> exception_;
std::atomic<int> running_ops_; std::atomic<int> running_ops_;
bool allow_op_delay_; bool allow_op_delay_;
size_t computation_count_{0};
size_t max_async_computation{100};
}; };
} // namespace details } // namespace details

@ -83,8 +83,8 @@ static void CheckTensorNANOrInf(const std::string& name,
if (tensor.memory_size() == 0) { if (tensor.memory_size() == 0) {
return; return;
} }
if (tensor.type().hash_code() != typeid(float).hash_code() && if (tensor.type().hash_code() != typeid(float).hash_code() && // NOLINT
tensor.type().hash_code() != typeid(double).hash_code()) { tensor.type().hash_code() != typeid(double).hash_code()) { // NOLINT
return; return;
} }
PADDLE_ENFORCE(!framework::TensorContainsInf(tensor), PADDLE_ENFORCE(!framework::TensorContainsInf(tensor),
@ -145,12 +145,13 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
// Return true if the block has feed operators and holder of matching info. // Return true if the block has feed operators and holder of matching info.
static bool has_feed_operators( static bool has_feed_operators(
const BlockDesc& block, const BlockDesc& block,
std::map<std::string, const LoDTensor*>& feed_targets, const std::map<std::string, const LoDTensor*>& feed_targets,
const std::string& feed_holder_name) { const std::string& feed_holder_name) {
size_t feed_count = 0; size_t feed_count = 0;
for (auto* op : block.AllOps()) { for (auto* op : block.AllOps()) {
if (op->Type() == kFeedOpType) { if (op->Type() == kFeedOpType) {
feed_count++; feed_count++;
// The input variable's name of feed_op should be feed_holder_name.
PADDLE_ENFORCE_EQ(op->Input("X")[0], feed_holder_name, PADDLE_ENFORCE_EQ(op->Input("X")[0], feed_holder_name,
"Input to feed op should be '%s'", feed_holder_name); "Input to feed op should be '%s'", feed_holder_name);
std::string feed_target_name = op->Output("Out")[0]; std::string feed_target_name = op->Output("Out")[0];
@ -166,7 +167,8 @@ static bool has_feed_operators(
feed_count, feed_targets.size(), feed_count, feed_targets.size(),
"The number of feed operators should match 'feed_targets'"); "The number of feed operators should match 'feed_targets'");
// When feed operator are present, so should be feed_holder if (!feed_holder_name.empty()) {
// When feed operator are present, so should be feed_holder.
auto var = block.FindVar(feed_holder_name); auto var = block.FindVar(feed_holder_name);
PADDLE_ENFORCE_NOT_NULL(var, "Block should already have a '%s' variable", PADDLE_ENFORCE_NOT_NULL(var, "Block should already have a '%s' variable",
feed_holder_name); feed_holder_name);
@ -174,6 +176,7 @@ static bool has_feed_operators(
"'%s' variable should be 'FEED_MINIBATCH' type", "'%s' variable should be 'FEED_MINIBATCH' type",
feed_holder_name); feed_holder_name);
} }
}
return feed_count > 0; return feed_count > 0;
} }
@ -185,12 +188,14 @@ static bool has_feed_operators(
// and fetch_holder_name. Raise exception when any mismatch is found. // and fetch_holder_name. Raise exception when any mismatch is found.
// Return true if the block has fetch operators and holder of matching info. // Return true if the block has fetch operators and holder of matching info.
static bool has_fetch_operators( static bool has_fetch_operators(
const BlockDesc& block, std::map<std::string, LoDTensor*>& fetch_targets, const BlockDesc& block,
const std::map<std::string, LoDTensor*>& fetch_targets,
const std::string& fetch_holder_name) { const std::string& fetch_holder_name) {
size_t fetch_count = 0; size_t fetch_count = 0;
for (auto* op : block.AllOps()) { for (auto* op : block.AllOps()) {
if (op->Type() == kFetchOpType) { if (op->Type() == kFetchOpType) {
fetch_count++; fetch_count++;
// The output variable's name of fetch_op should be fetch_holder_name.
PADDLE_ENFORCE_EQ(op->Output("Out")[0], fetch_holder_name, PADDLE_ENFORCE_EQ(op->Output("Out")[0], fetch_holder_name,
"Output of fetch op should be '%s'", fetch_holder_name); "Output of fetch op should be '%s'", fetch_holder_name);
std::string fetch_target_name = op->Input("X")[0]; std::string fetch_target_name = op->Input("X")[0];
@ -206,7 +211,8 @@ static bool has_fetch_operators(
fetch_count, fetch_targets.size(), fetch_count, fetch_targets.size(),
"The number of fetch operators should match 'fetch_targets'"); "The number of fetch operators should match 'fetch_targets'");
// When fetch operator are present, so should be fetch_holder if (!fetch_holder_name.empty()) {
// When fetch operator are present, so should be fetch_holder.
auto var = block.FindVar(fetch_holder_name); auto var = block.FindVar(fetch_holder_name);
PADDLE_ENFORCE_NOT_NULL(var, "Block should already have a '%s' variable", PADDLE_ENFORCE_NOT_NULL(var, "Block should already have a '%s' variable",
fetch_holder_name); fetch_holder_name);
@ -214,6 +220,7 @@ static bool has_fetch_operators(
"'%s' variable should be 'FETCH_LIST' type", "'%s' variable should be 'FETCH_LIST' type",
fetch_holder_name); fetch_holder_name);
} }
}
return fetch_count > 0; return fetch_count > 0;
} }
@ -259,16 +266,6 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
} }
} }
// map the data of feed_targets to feed_holder
for (auto* op : global_block->AllOps()) {
if (op->Type() == kFeedOpType) {
std::string feed_target_name = op->Output("Out")[0];
int idx = boost::get<int>(op->GetAttr("col"));
SetFeedVariable(scope, *feed_targets[feed_target_name], feed_holder_name,
idx);
}
}
if (!has_fetch_ops) { if (!has_fetch_ops) {
// create fetch_holder variable // create fetch_holder variable
auto* fetch_holder = global_block->Var(fetch_holder_name); auto* fetch_holder = global_block->Var(fetch_holder_name);
@ -292,17 +289,9 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
} }
} }
Run(*copy_program, scope, 0, create_vars, create_vars); auto ctx = Prepare(*copy_program, 0);
RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets, create_vars,
// obtain the data of fetch_targets from fetch_holder feed_holder_name, fetch_holder_name);
for (auto* op : global_block->AllOps()) {
if (op->Type() == kFetchOpType) {
std::string fetch_target_name = op->Input("X")[0];
int idx = boost::get<int>(op->GetAttr("col"));
*fetch_targets[fetch_target_name] =
GetFetchVariable(*scope, fetch_holder_name, idx);
}
}
} }
std::unique_ptr<ExecutorPrepareContext> Executor::Prepare( std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
@ -370,5 +359,42 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
} }
} }
void Executor::RunPreparedContext(
ExecutorPrepareContext* ctx, Scope* scope,
std::map<std::string, const LoDTensor*>& feed_targets,
std::map<std::string, LoDTensor*>& fetch_targets, bool create_vars,
const std::string& feed_holder_name, const std::string& fetch_holder_name) {
auto& global_block = ctx->prog_.Block(ctx->block_id_);
PADDLE_ENFORCE(
has_feed_operators(global_block, feed_targets, feed_holder_name),
"Program in ExecutorPrepareContext should has feed_ops.");
PADDLE_ENFORCE(
has_fetch_operators(global_block, fetch_targets, fetch_holder_name),
"Program in the prepared context should has fetch_ops.");
// map the data of feed_targets to feed_holder
for (auto* op : global_block.AllOps()) {
if (op->Type() == kFeedOpType) {
std::string feed_target_name = op->Output("Out")[0];
int idx = boost::get<int>(op->GetAttr("col"));
SetFeedVariable(scope, *feed_targets[feed_target_name], feed_holder_name,
idx);
}
}
RunPreparedContext(ctx, scope, create_vars, create_vars);
// obtain the data of fetch_targets from fetch_holder
for (auto* op : global_block.AllOps()) {
if (op->Type() == kFetchOpType) {
std::string fetch_target_name = op->Input("X")[0];
int idx = boost::get<int>(op->GetAttr("col"));
*fetch_targets[fetch_target_name] =
GetFetchVariable(*scope, fetch_holder_name, idx);
}
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -14,6 +14,9 @@ limitations under the License. */
#pragma once #pragma once
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
@ -70,6 +73,13 @@ class Executor {
bool create_local_scope = true, bool create_local_scope = true,
bool create_vars = true); bool create_vars = true);
void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
std::map<std::string, const LoDTensor*>& feed_targets,
std::map<std::string, LoDTensor*>& fetch_targets,
bool create_vars = true,
const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch");
private: private:
const platform::Place place_; const platform::Place place_;
}; };

@ -46,7 +46,8 @@ proto::VarType::Type GetDataTypeOfVar(const Variable* var) {
} }
} }
static DDim GetDims(const Scope& scope, const std::string& name) { static DDim GetDims(const Scope& scope, const std::string& name,
bool get_actual_dim = false) {
Variable* var = scope.FindVar(name); Variable* var = scope.FindVar(name);
if (var == nullptr) { if (var == nullptr) {
return DDim({-1}); return DDim({-1});
@ -55,7 +56,11 @@ static DDim GetDims(const Scope& scope, const std::string& name) {
if (var->IsType<LoDTensor>()) { if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>().dims(); return var->Get<LoDTensor>().dims();
} else if (var->IsType<SelectedRows>()) { } else if (var->IsType<SelectedRows>()) {
if (get_actual_dim) {
return var->Get<SelectedRows>().value().dims();
} else {
return var->Get<SelectedRows>().GetCompleteDims(); return var->Get<SelectedRows>().GetCompleteDims();
}
} else { } else {
return DDim({-1}); return DDim({-1});
} }
@ -129,7 +134,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
for (size_t i = 0; i < input.second.size(); ++i) { for (size_t i = 0; i < input.second.size(); ++i) {
ss << input.second[i]; ss << input.second[i];
if (scope) { if (scope) {
ss << "[" << GetDims(*scope, input.second[i]) << "]"; ss << "[" << GetDims(*scope, input.second[i], true) << "]";
ss << "(" << GetLoD(*scope, input.second[i]) << ")"; ss << "(" << GetLoD(*scope, input.second[i]) << ")";
} }
if (i != input.second.size() - 1) { if (i != input.second.size() - 1) {
@ -149,7 +154,7 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
for (size_t i = 0; i < output.second.size(); ++i) { for (size_t i = 0; i < output.second.size(); ++i) {
ss << output.second[i]; ss << output.second[i];
if (scope) { if (scope) {
ss << "[" << GetDims(*scope, output.second[i]) << "]"; ss << "[" << GetDims(*scope, output.second[i], true) << "]";
ss << "(" << GetLoD(*scope, output.second[i]) << ")"; ss << "(" << GetLoD(*scope, output.second[i]) << ")";
} }
if (i != output.second.size() - 1) { if (i != output.second.size() - 1) {

@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/parallel_executor.h"
#include <string> #include <string>
#include <tuple>
#include <vector> #include <vector>
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
@ -41,6 +42,8 @@ class ParallelExecutorPrivate {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_; std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
#endif #endif
std::vector<std::tuple<std::string, proto::VarType::Type, bool>> var_types_;
}; };
std::vector<Scope *> &ParallelExecutor::GetLocalScopes() { std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
@ -97,14 +100,9 @@ ParallelExecutor::ParallelExecutor(
allow_op_delay)); allow_op_delay));
// Step 3. Create vars in each scope; // Step 3. Create vars in each scope;
for (auto *scope : member_->local_scopes_) {
for (auto *var : main_program.Block(0).AllVars()) { for (auto *var : main_program.Block(0).AllVars()) {
if (scope->FindVar(var->Name()) != nullptr) { member_->var_types_.emplace_back(var->Name(), var->GetType(),
continue; var->Persistable());
}
InitializeVariable(scope->Var(var->Name()), var->GetType());
}
} }
} }
@ -163,9 +161,42 @@ void ParallelExecutor::Run(
const std::unordered_map<std::string, LoDTensor> &feed_tensors) { const std::unordered_map<std::string, LoDTensor> &feed_tensors) {
platform::RecordBlock b(0); platform::RecordBlock b(0);
SplitTensorToPlaces(feed_tensors); SplitTensorToPlaces(feed_tensors);
// Create local scopes.
for (auto &scope : member_->local_scopes_) {
Scope &local_scope = scope->NewScope();
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>() =
&local_scope;
for (auto &name_type_pair : member_->var_types_) {
if (scope->FindVar(std::get<0>(name_type_pair)) != nullptr) {
continue;
}
if (std::get<2>(name_type_pair)) { // Persistable
InitializeVariable(scope->Var(std::get<0>(name_type_pair)),
std::get<1>(name_type_pair));
} else {
InitializeVariable(scope->Var(std::get<0>(name_type_pair)),
std::get<1>(name_type_pair));
}
}
}
auto fetch_data = member_->executor_->Run(fetch_tensors); auto fetch_data = member_->executor_->Run(fetch_tensors);
*member_->global_scope_->Var(fetched_var_name)->GetMutable<FeedFetchList>() = *member_->global_scope_->Var(fetched_var_name)->GetMutable<FeedFetchList>() =
fetch_data; fetch_data;
// Wait All computational streams
for (auto p : member_->places_) {
platform::DeviceContextPool::Instance().Get(p)->Wait();
}
for (auto &scope : member_->local_scopes_) {
auto &local_scope =
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>();
scope->DeleteScope(local_scope);
local_scope = nullptr;
}
} }
void ParallelExecutor::SplitTensorToPlaces( void ParallelExecutor::SplitTensorToPlaces(

@ -14,8 +14,12 @@
#include "paddle/fluid/framework/threadpool.h" #include "paddle/fluid/framework/threadpool.h"
#include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
DEFINE_int32(io_threadpool_size, 100,
"number of threads used for doing IO, default 100");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
@ -91,5 +95,20 @@ void ThreadPool::TaskLoop() {
} }
} }
std::unique_ptr<ThreadPool> ThreadPoolIO::io_threadpool_(nullptr);
std::once_flag ThreadPoolIO::io_init_flag_;
ThreadPool* ThreadPoolIO::GetInstanceIO() {
std::call_once(io_init_flag_, &ThreadPoolIO::InitIO);
return io_threadpool_.get();
}
void ThreadPoolIO::InitIO() {
if (io_threadpool_.get() == nullptr) {
// TODO(typhoonzero1986): make this configurable
io_threadpool_.reset(new ThreadPool(FLAGS_io_threadpool_size));
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -14,12 +14,12 @@ limitations under the License. */
#pragma once #pragma once
#include <condition_variable> #include <condition_variable> // NOLINT
#include <functional> #include <functional>
#include <future> #include <future> // NOLINT
#include <mutex> #include <mutex> // NOLINT
#include <queue> #include <queue>
#include <thread> #include <thread> // NOLINT
#include <vector> #include <vector>
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
@ -28,6 +28,22 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
struct ExceptionHandler {
mutable std::future<std::unique_ptr<platform::EnforceNotMet>> future_;
explicit ExceptionHandler(
std::future<std::unique_ptr<platform::EnforceNotMet>>&& f)
: future_(std::move(f)) {}
void operator()() const {
auto ex = this->future_.get();
if (ex != nullptr) {
LOG(FATAL) << "The exception is thrown inside the thread pool. You "
"should use RunAndGetException to handle the exception.\n"
"The default exception handler is LOG(FATAL)."
<< ex->what();
}
}
};
// ThreadPool maintains a queue of tasks, and runs them using a fixed // ThreadPool maintains a queue of tasks, and runs them using a fixed
// number of threads. // number of threads.
class ThreadPool { class ThreadPool {
@ -87,22 +103,6 @@ class ThreadPool {
void Wait(); void Wait();
private: private:
struct ExceptionHandler {
mutable std::future<std::unique_ptr<platform::EnforceNotMet>> future_;
explicit ExceptionHandler(
std::future<std::unique_ptr<platform::EnforceNotMet>>&& f)
: future_(std::move(f)) {}
void operator()() const {
auto ex = this->future_.get();
if (ex != nullptr) {
LOG(FATAL) << "The exception is thrown inside the thread pool. You "
"should use RunAndGetException to handle the exception.\n"
"The default exception handler is LOG(FATAL)."
<< ex->what();
}
}
};
DISABLE_COPY_AND_ASSIGN(ThreadPool); DISABLE_COPY_AND_ASSIGN(ThreadPool);
// If the task queue is empty and avaialbe is equal to the number of // If the task queue is empty and avaialbe is equal to the number of
@ -135,6 +135,17 @@ class ThreadPool {
std::condition_variable completed_; std::condition_variable completed_;
}; };
class ThreadPoolIO : ThreadPool {
public:
static ThreadPool* GetInstanceIO();
static void InitIO();
private:
// NOTE: threadpool in base will be inhereted here.
static std::unique_ptr<ThreadPool> io_threadpool_;
static std::once_flag io_init_flag_;
};
// Run a function asynchronously. // Run a function asynchronously.
// NOTE: The function must return void. If the function need to return a value, // NOTE: The function must return void. If the function need to return a value,
// you can use lambda to capture a value pointer. // you can use lambda to capture a value pointer.
@ -143,5 +154,10 @@ std::future<void> Async(Callback callback) {
return ThreadPool::GetInstance()->Run(callback); return ThreadPool::GetInstance()->Run(callback);
} }
template <typename Callback>
std::future<void> AsyncIO(Callback callback) {
return ThreadPoolIO::GetInstanceIO()->Run(callback);
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -1,4 +1,4 @@
set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor prune init) set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init)
cc_library(paddle_fluid_api cc_library(paddle_fluid_api
SRCS io.cc SRCS io.cc
@ -11,7 +11,7 @@ cc_library(paddle_fluid DEPS ${fluid_modules})
# Create shared library # Create shared library
cc_library(paddle_fluid_shared SHARED cc_library(paddle_fluid_shared SHARED
SRCS io.cc SRCS io.cc
DEPS ARCHIVE_START ${GLOB_OP_LIB} ${FLUID_CORE_MODULES} ARCHIVE_END) DEPS ${fluid_modules})
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid) set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
if(NOT APPLE) if(NOT APPLE)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.

@ -17,10 +17,16 @@ limitations under the License. */
#include <fstream> #include <fstream>
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/pybind/pybind.h"
namespace paddle { namespace paddle {
namespace inference { namespace inference {
// Temporarily add this function for exposing framework::InitDevices() when
// linking the inference shared library.
void Init(bool init_p2p) { framework::InitDevices(init_p2p); }
void ReadBinaryFile(const std::string& filename, std::string& contents) { void ReadBinaryFile(const std::string& filename, std::string& contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary); std::ifstream fin(filename, std::ios::in | std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s", filename); PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s", filename);

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save