Merge branch 'develop' into core_fix_openblas_threads

wangkuiyi-patch-1
Liu Yiqun 7 years ago
commit 50ba205d79

@ -70,7 +70,7 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# specify sphinx version as 1.5.6 and remove -U option for [pip install -U # specify sphinx version as 1.5.6 and remove -U option for [pip install -U
# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest # sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
# version(1.7.1 for now), which causes building documentation failed. # version(1.7.1 for now), which causes building documentation failed.
RUN pip install --upgrade pip==9.0.3 && \ RUN easy_install -U pip && \
pip install -U wheel && \ pip install -U wheel && \
pip install -U docopt PyYAML sphinx==1.5.6 && \ pip install -U docopt PyYAML sphinx==1.5.6 && \
pip install sphinx-rtd-theme==0.1.9 recommonmark pip install sphinx-rtd-theme==0.1.9 recommonmark

@ -159,6 +159,7 @@ def run_benchmark(model, args):
paddle.dataset.mnist.train(), batch_size=args.batch_size) paddle.dataset.mnist.train(), batch_size=args.batch_size)
accuracy = fluid.metrics.Accuracy() accuracy = fluid.metrics.Accuracy()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
iters, num_samples, start_time = 0, 0, time.time() iters, num_samples, start_time = 0, 0, time.time()
for pass_id in range(args.pass_num): for pass_id in range(args.pass_num):
accuracy.reset() accuracy.reset()
@ -175,17 +176,20 @@ def run_benchmark(model, args):
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([len(y_data), 1]) y_data = y_data.reshape([len(y_data), 1])
outs = exe.run( outs = train_exe.run(
fluid.default_main_program(),
feed={"pixel": img_data, feed={"pixel": img_data,
"label": y_data}, "label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor] fetch_list=[
avg_cost.name, batch_acc.name, batch_size_tensor.name
]
) # The accuracy is the accumulation of batches, but not the current batch. ) # The accuracy is the accumulation of batches, but not the current batch.
accuracy.update(value=outs[1], weight=outs[2]) accuracy.update(
value=np.array(np.mean(outs[1])),
weight=np.mean(np.array(outs[2])))
iters += 1 iters += 1
num_samples += len(y_data) num_samples += len(y_data)
loss = np.array(outs[0]) loss = np.mean(np.array(outs[0]))
acc = np.array(outs[1]) acc = np.mean(np.array(outs[1]))
train_losses.append(loss) train_losses.append(loss)
train_accs.append(acc) train_accs.append(acc)
print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" % print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" %

@ -241,6 +241,7 @@ def run_benchmark(model, args):
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
accuracy = fluid.average.WeightedAverage() accuracy = fluid.average.WeightedAverage()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
if args.use_fake_data: if args.use_fake_data:
data = train_reader().next() data = train_reader().next()
image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype( image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype(
@ -264,14 +265,17 @@ def run_benchmark(model, args):
data)).astype('float32') data)).astype('float32')
label = np.array(map(lambda x: x[1], data)).astype('int64') label = np.array(map(lambda x: x[1], data)).astype('int64')
label = label.reshape([-1, 1]) label = label.reshape([-1, 1])
loss, acc, weight = exe.run( loss, acc, weight = train_exe.run(
fluid.default_main_program(),
feed={'data': image, feed={'data': image,
'label': label}, 'label': label},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]) fetch_list=[
avg_cost.name, batch_acc.name, batch_size_tensor.name
])
iters += 1 iters += 1
num_samples += len(label) num_samples += len(label)
accuracy.add(value=acc, weight=weight) accuracy.add(value=np.array(np.mean(acc)), weight=np.mean(weight))
loss = np.mean(np.array(loss))
acc = np.mean(np.array(acc))
train_losses.append(loss) train_losses.append(loss)
train_accs.append(acc) train_accs.append(acc)
print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" % print("Pass: %d, Iter: %d, Loss: %f, Accuracy: %f" %

@ -169,6 +169,7 @@ def main():
iters, num_samples, start_time = 0, 0, time.time() iters, num_samples, start_time = 0, 0, time.time()
accuracy = fluid.average.WeightedAverage() accuracy = fluid.average.WeightedAverage()
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
for pass_id in range(args.pass_num): for pass_id in range(args.pass_num):
accuracy.reset() accuracy.reset()
train_accs = [] train_accs = []
@ -184,14 +185,17 @@ def main():
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
loss, acc, weight = exe.run( loss, acc, weight = train_exe.run(
fluid.default_main_program(),
feed={"pixel": img_data, feed={"pixel": img_data,
"label": y_data}, "label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]) fetch_list=[
accuracy.add(value=acc, weight=weight) avg_cost.name, batch_acc.name, batch_size_tensor.name
])
accuracy.add(value=np.array(np.mean(acc)), weight=np.mean(weight))
iters += 1 iters += 1
num_samples += len(y_data) num_samples += len(y_data)
loss = np.mean(np.array(loss))
acc = np.mean(np.array(acc))
print( print(
"Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" % "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" %
(pass_id, iters, loss, acc) (pass_id, iters, loss, acc)

@ -24,7 +24,7 @@ set(BOOST_PROJECT "extern_boost")
# So we use 1.41.0 here. # So we use 1.41.0 here.
set(BOOST_VER "1.41.0") set(BOOST_VER "1.41.0")
set(BOOST_TAR "boost_1_41_0") set(BOOST_TAR "boost_1_41_0")
set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz") set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz")
set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost) set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost)
set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}") set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}")
set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE) set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE)

@ -21,11 +21,12 @@ else()
ExternalProject_Add( ExternalProject_Add(
extern_eigen3 extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" GIT_REPOSITORY "https://github.com/eigenteam/eigen-git-mirror"
# eigen on cuda9.1 missing header of math_funtions.hpp # eigen on cuda9.1 missing header of math_funtions.hpp
# https://stackoverflow.com/questions/43113508/math-functions-hpp-not-found-when-using-cuda-with-eigen # https://stackoverflow.com/questions/43113508/math-functions-hpp-not-found-when-using-cuda-with-eigen
GIT_TAG 917060c364181f33a735dc023818d5a54f60e54c GIT_TAG 917060c364181f33a735dc023818d5a54f60e54c
PREFIX ${EIGEN_SOURCE_DIR} PREFIX ${EIGEN_SOURCE_DIR}
DOWNLOAD_NAME "eigen"
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
BUILD_COMMAND "" BUILD_COMMAND ""

@ -53,11 +53,9 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS} DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
GIT_TAG "v0.14" GIT_TAG "db3424ad44901513c03a1ea31ccaacdf633fbe9f"
PREFIX ${MKLDNN_SOURCES_DIR} PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
# Patch MKLDNN to compile with gcc 4.8, the related issue is in intel/mkl-dnn#237.
PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/mkldnn.hpp ${MKLDNN_SOURCES_DIR}/src/extern_mkldnn/include/mkldnn.hpp
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
CMAKE_ARGS -DMKLROOT=${MKLML_ROOT} CMAKE_ARGS -DMKLROOT=${MKLML_ROOT}

@ -28,7 +28,7 @@ INCLUDE(ExternalProject)
SET(MKLML_PROJECT "extern_mklml") SET(MKLML_PROJECT "extern_mklml")
SET(MKLML_VER "mklml_lnx_2018.0.3.20180406") SET(MKLML_VER "mklml_lnx_2018.0.3.20180406")
SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz") SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz")
SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml")
SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}")
SET(MKLML_DST_DIR "mklml") SET(MKLML_DST_DIR "mklml")

@ -47,8 +47,6 @@ ExternalProject_Add(
-DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_COMMAND make -j8
INSTALL_COMMAND make install
) )
add_library(snappy STATIC IMPORTED GLOBAL) add_library(snappy STATIC IMPORTED GLOBAL)

@ -46,8 +46,6 @@ ExternalProject_Add(
-DCMAKE_INSTALL_PREFIX:PATH=${SNAPPYSTREAM_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${SNAPPYSTREAM_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPYSTREAM_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPYSTREAM_INSTALL_DIR}/lib
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_COMMAND make -j8
INSTALL_COMMAND make install
DEPENDS snappy DEPENDS snappy
) )

@ -98,6 +98,14 @@ elseif (WITH_MKLML)
) )
endif() endif()
if(WITH_MKLDNN)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/mkldnn")
copy(mkldnn_lib
SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB}
DSTS ${dst_dir} ${dst_dir}/lib
)
endif()
if(NOT MOBILE_INFERENCE AND NOT RPI) if(NOT MOBILE_INFERENCE AND NOT RPI)
set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy") set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy")
copy(snappy_lib copy(snappy_lib
@ -148,4 +156,10 @@ copy(string_lib
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat
) )
set(module "pybind")
copy(pybind_lib
SRCS ${CMAKE_CURRENT_BINARY_DIR}/paddle/fluid/${module}/pybind.h
DSTS ${dst_dir}/${module}
)
add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep}) add_custom_target(inference_lib_dist DEPENDS ${inference_lib_dist_dep})

@ -40,7 +40,7 @@ template <typename T>
class FCOp : public OperatorBase { class FCOp : public OperatorBase {
public: public:
void Run(...) { void Run(...) {
add(mul(Input<T>("X"), Input<T>("W")), Input<T>("b"); add(mul(Input<T>("X"), Input<T>("W")), Input<T>("b"));
} }
}; };
REGISTER_OP(FCOp, "fc"); REGISTER_OP(FCOp, "fc");

@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY)
endif() endif()
add_subdirectory(testing) add_subdirectory(testing)
if(NOT MOBILE_INFERENCE AND NOT RPI) if(NOT MOBILE_INFERENCE AND NOT RPI AND NOT WITH_C_API)
add_subdirectory(fluid) add_subdirectory(fluid)
endif() endif()

@ -36,9 +36,11 @@ void TransDataDevice(const Tensor& in, const platform::Place& dst_place,
VLOG(3) << "DeviceTransform in, src_place " << in.place() VLOG(3) << "DeviceTransform in, src_place " << in.place()
<< " dst_place: " << dst_place; << " dst_place: " << dst_place;
auto* dev_ctx = GetDeviceContext(in.place(), dst_place); auto* dev_ctx = GetDeviceContext(in.place(), dst_place);
dev_ctx->Wait();
TensorCopy(in, dst_place, *dev_ctx, out); TensorCopy(in, dst_place, *dev_ctx, out);
if (platform::is_gpu_place(in.place()) && platform::is_cpu_place(dst_place)) {
dev_ctx->Wait(); dev_ctx->Wait();
}
} }
} // namespace framework } // namespace framework

@ -58,6 +58,7 @@ static DataTypeMap* InitDataTypeMap() {
RegType(bool, proto::VarType::BOOL); RegType(bool, proto::VarType::BOOL);
RegType(size_t, proto::VarType::SIZE_T); RegType(size_t, proto::VarType::SIZE_T);
RegType(int16_t, proto::VarType::INT16); RegType(int16_t, proto::VarType::INT16);
RegType(uint8_t, proto::VarType::UINT8);
#undef RegType #undef RegType
return retv; return retv;

@ -47,8 +47,14 @@ inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
case proto::VarType::BOOL: case proto::VarType::BOOL:
visitor.template operator()<bool>(); visitor.template operator()<bool>();
break; break;
case proto::VarType::UINT8:
visitor.template operator()<uint8_t>();
break;
case proto::VarType::INT16:
visitor.template operator()<int16_t>();
break;
default: default:
PADDLE_THROW("Not supported"); PADDLE_THROW("Not supported %d", type);
} }
} }

@ -48,17 +48,18 @@ void FetchOpHandle::RunImpl() {
WaitInputVarGenerated(platform::CPUPlace()); WaitInputVarGenerated(platform::CPUPlace());
tensors_.resize(inputs_.size()); tensors_.resize(inputs_.size());
auto *var_handle = static_cast<VarHandle *>(inputs_[0]);
auto &var_name = var_handle->name_;
platform::CPUPlace cpu; platform::CPUPlace cpu;
auto &scopes = *local_scopes_; auto &scopes = *local_scopes_;
for (size_t i = 0; i < scopes.size(); ++i) { for (size_t i = 0; i < inputs_.size(); ++i) {
auto &scope = scopes[i]; auto *var_handle = static_cast<VarHandle *>(inputs_[i]);
auto *var = auto &scope = scopes.at(var_handle->scope_idx_);
scope->FindVar(kLocalExecScopeName)->Get<Scope *>()->FindVar(var_name); auto *var = scope->FindVar(kLocalExecScopeName)
->Get<Scope *>()
->FindVar(var_handle->name_);
PADDLE_ENFORCE_NOT_NULL(var, "Cannot find variable %s in execution scope", PADDLE_ENFORCE_NOT_NULL(var, "Cannot find variable %s in execution scope",
var_name); var_handle->name_);
auto &t = var->Get<framework::LoDTensor>(); auto &t = var->Get<framework::LoDTensor>();
if (platform::is_gpu_place(t.place())) { if (platform::is_gpu_place(t.place())) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA

@ -98,7 +98,7 @@ bool MultiDevSSAGraphBuilder::IsDistTrainOp(const OpDesc &op,
return false; return false;
}; };
if (op.Type() == "split") { if (op.Type() == "split" || op.Type() == "split_byref") {
return checker(op.OutputArgumentNames(), send_op->InputArgumentNames()); return checker(op.OutputArgumentNames(), send_op->InputArgumentNames());
} else if (op.Type() == "concat") { } else if (op.Type() == "concat") {
return checker(op.InputArgumentNames(), send_op->OutputArgumentNames()); return checker(op.InputArgumentNames(), send_op->OutputArgumentNames());

@ -70,6 +70,14 @@ class OpHandleBase {
const std::vector<VarHandleBase *> &Inputs() const { return inputs_; } const std::vector<VarHandleBase *> &Inputs() const { return inputs_; }
size_t NoDupInputSize() const {
std::unordered_set<VarHandleBase *> res;
for (auto *var : inputs_) {
res.emplace(var);
}
return res.size();
}
const std::vector<VarHandleBase *> &Outputs() const { return outputs_; } const std::vector<VarHandleBase *> &Outputs() const { return outputs_; }
protected: protected:

@ -174,7 +174,7 @@ void ThreadedSSAGraphExecutor::InsertFetchOps(
void ThreadedSSAGraphExecutor::InsertPendingOp( void ThreadedSSAGraphExecutor::InsertPendingOp(
std::unordered_map<OpHandleBase *, size_t> *pending_ops, std::unordered_map<OpHandleBase *, size_t> *pending_ops,
OpHandleBase *op_instance) const { OpHandleBase *op_instance) const {
pending_ops->insert({op_instance, op_instance->Inputs().size()}); pending_ops->insert({op_instance, op_instance->NoDupInputSize()});
} }
void ThreadedSSAGraphExecutor::InsertPendingVar( void ThreadedSSAGraphExecutor::InsertPendingVar(

@ -228,7 +228,8 @@ static bool has_fetch_operators(
void Executor::Run(const ProgramDesc& program, Scope* scope, void Executor::Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets, std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets, std::map<std::string, LoDTensor*>* fetch_targets,
bool create_vars, const std::string& feed_holder_name, bool create_local_scope, bool create_vars,
const std::string& feed_holder_name,
const std::string& fetch_holder_name) { const std::string& fetch_holder_name) {
platform::RecordBlock b(kProgramId); platform::RecordBlock b(kProgramId);
bool has_feed_ops = bool has_feed_ops =
@ -290,8 +291,9 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
} }
auto ctx = Prepare(*copy_program, 0); auto ctx = Prepare(*copy_program, 0);
RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets, create_vars, RunPreparedContext(ctx.get(), scope, feed_targets, fetch_targets,
feed_holder_name, fetch_holder_name); create_local_scope, create_vars, feed_holder_name,
fetch_holder_name);
} }
std::unique_ptr<ExecutorPrepareContext> Executor::Prepare( std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
@ -366,8 +368,9 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
void Executor::RunPreparedContext( void Executor::RunPreparedContext(
ExecutorPrepareContext* ctx, Scope* scope, ExecutorPrepareContext* ctx, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets, std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets, bool create_vars, std::map<std::string, LoDTensor*>* fetch_targets, bool create_local_scope,
const std::string& feed_holder_name, const std::string& fetch_holder_name) { bool create_vars, const std::string& feed_holder_name,
const std::string& fetch_holder_name) {
auto& global_block = ctx->prog_.Block(ctx->block_id_); auto& global_block = ctx->prog_.Block(ctx->block_id_);
PADDLE_ENFORCE( PADDLE_ENFORCE(
@ -387,7 +390,7 @@ void Executor::RunPreparedContext(
} }
} }
RunPreparedContext(ctx, scope, create_vars, create_vars); RunPreparedContext(ctx, scope, create_local_scope, create_vars);
// obtain the data of fetch_targets from fetch_holder // obtain the data of fetch_targets from fetch_holder
for (auto* op : global_block.AllOps()) { for (auto* op : global_block.AllOps()) {

@ -57,7 +57,7 @@ class Executor {
void Run(const ProgramDesc& program, Scope* scope, void Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets, std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets, std::map<std::string, LoDTensor*>* fetch_targets,
bool create_vars = true, bool create_local_scope = true, bool create_vars = true,
const std::string& feed_holder_name = "feed", const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch"); const std::string& fetch_holder_name = "fetch");
@ -76,6 +76,7 @@ class Executor {
void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets, std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets, std::map<std::string, LoDTensor*>* fetch_targets,
bool create_local_scope = true,
bool create_vars = true, bool create_vars = true,
const std::string& feed_holder_name = "feed", const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch"); const std::string& fetch_holder_name = "fetch");

@ -103,6 +103,7 @@ message VarType {
FP64 = 6; FP64 = 6;
// Tensor<size_t> is used in C++. // Tensor<size_t> is used in C++.
SIZE_T = 19; SIZE_T = 19;
UINT8 = 20;
// Other types that may need additional descriptions // Other types that may need additional descriptions
LOD_TENSOR = 7; LOD_TENSOR = 7;

@ -228,11 +228,12 @@ TEST(LoD, CheckAbsLoD) {
ASSERT_FALSE(CheckAbsLoD(abs_lod0)); ASSERT_FALSE(CheckAbsLoD(abs_lod0));
} }
TEST(LoDTensor, RecordIO) { template <typename T>
static void TestRecordIO() {
LoDTensor tensor; LoDTensor tensor;
int* tmp = tensor.mutable_data<int>(make_ddim({4, 5}), platform::CPUPlace()); T* tmp = tensor.mutable_data<T>(make_ddim({4, 5}), platform::CPUPlace());
for (int i = 0; i < 20; ++i) { for (int i = 0; i < 20; ++i) {
tmp[i] = i; tmp[i] = static_cast<T>(i);
} }
std::stringstream* stream = new std::stringstream(); std::stringstream* stream = new std::stringstream();
@ -247,7 +248,7 @@ TEST(LoDTensor, RecordIO) {
auto assert_tensor_ok = [](const LoDTensor& tensor) { auto assert_tensor_ok = [](const LoDTensor& tensor) {
for (int i = 0; i < 20; ++i) { for (int i = 0; i < 20; ++i) {
ASSERT_EQ(tensor.data<int>()[i], i); ASSERT_EQ(tensor.data<T>()[i], static_cast<T>(i));
} }
}; };
@ -265,5 +266,13 @@ TEST(LoDTensor, RecordIO) {
} }
} }
TEST(LoDTensor, RecordIO) {
TestRecordIO<int>();
TestRecordIO<int16_t>();
TestRecordIO<uint8_t>();
TestRecordIO<float>();
TestRecordIO<double>();
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -49,7 +49,7 @@ class OpConverter {
// convert fluid block to tensorrt network // convert fluid block to tensorrt network
void ConvertBlock(const framework::proto::BlockDesc& block, void ConvertBlock(const framework::proto::BlockDesc& block,
TensorRTEngine* engine) { TensorRTEngine* engine) {
for (size_t i = 0; i < block.ops_size(); i++) { for (int i = 0; i < block.ops_size(); i++) {
const auto& op = block.ops(i); const auto& op = block.ops(i);
OpConverter::Run(op, engine); OpConverter::Run(op, engine);
} }

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save