Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/combine_open_files_and_double_buffer

guochaorong-patch-1
yuyang18 7 years ago
commit be528f9815
No known key found for this signature in database
GPG Key ID: 6DFF29878217BE5F

@ -210,7 +210,7 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
# generate fake:
if args.use_fake_data:
for var in feed_var_list:
v = startup_prog.global_block().clone_variable(var)
v = startup_prog.global_block()._clone_variable(var)
var.persistable = True
v.persistable = True

@ -98,13 +98,13 @@ class Block(objects):
def append_operator(self, ...):
self.ops.append(Operator(self, ...))
def prepend_operator(self, ...): # Parameter's ctor prepands initialize operators.
def _prepend_operator(self, ...): # Parameter's ctor prepands initialize operators.
self.ops.prepend(Operator(self, ...))
```
`create_parameter` is necessary because parameters are global variables, defined in the global block, but can be created in some sub-blocks. For example, an FC layer in the step block of an RNN operator.
`prepend_operator` is necessary because the constructor of `Parameter` needs to create the initialize (or load) operator of the parameter, and would like to put it in the *preamble* of the global block.
`_prepend_operator` is necessary because the constructor of `Parameter` needs to create the initialize (or load) operator of the parameter, and would like to put it in the *preamble* of the global block.
### Operator

@ -78,7 +78,7 @@ def error_clip_callback(block, context):
op_desc = block.desc.op(block.desc.op_size() - 1)
for grad_n in filter(lambda n: grad_to_var.has_key(n),
op_desc.output_arg_names()):
fwd_var = block.var_recursive(grad_to_var[grad_n])
fwd_var = block.__var_recursive(grad_to_var[grad_n])
error_clip = getattr(fwd_var, "error_clip", None)
if not (error_clip is None or isinstance(error_clip,
BaseErrorClipAttr)):

@ -35,11 +35,16 @@ PaddlePaddle需要使用Docker环境完成编译这样可以免去单独安
# 2. 可选步骤源码中构建用于编译PaddlePaddle的Docker镜像
docker build -t paddle:dev .
# 3. 执行下面的命令编译CPU-Only的二进制
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build
docker run -it -v $PWD:/paddle -w /paddle -e "PYTHON_ABI=cp27-cp27mu" -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build
# 4. 或者也可以使用为上述可选步骤构建的镜像必须先执行第2步
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build
注:上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。
注:
- 上述命令把当前目录(源码树根目录)映射为 container 里的 :code:`/paddle` 目录。
- 如果您使用的是 manylinux 的镜像进行编译, 那么您需要通过环境变量 :code:`PYTHON_ABI` 来指定一个 `Python ABI <https://www.python.org/dev/peps/pep-0425/#id8>`__.
PaddlePaddle目前支持的 Python ABI 有 :code:`cp27-cp27m`:code:`cp27-cp27mu`.
编译完成后会在build/python/dist目录下生成输出的whl包可以选在在当前机器安装也可以拷贝到目标机器安装

@ -36,13 +36,18 @@ If you don't wish to use dockeryou need to install several compile dependenci
# 2. Optional: build development docker image from source
docker build -t paddle:dev .
# 3. Run the following command to build a CPU-Only binaries
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build
docker run -it -v $PWD:/paddle -w /paddle -e "PYTHON_ABI=cp27-cp27mu" -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 ./paddle/scripts/paddle_build.sh build
# 4. Or, use your built Docker image to build PaddlePaddle (must run step 2)
docker run -it -v $PWD:/paddle -w /paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev ./paddle/scripts/paddle_build.sh build
NOTE: The above command try to mount the current working directory (root directory of source code)
NOTE:
- The above command try to mount the current working directory (root directory of source code)
into :code:`/paddle` directory inside docker container.
- You need to pass in the required environment variable :code:`PYTHON_ABI` to specify a `Python ABI <https://www.python.org/dev/peps/pep-0425/#id8>`__.
Currently PaddlePaddle supported Python ABIs include :code:`cp27-cp27m` and :code:`cp27-cp27mu` .
When the compile finishes, you can get the output whl package under
build/python/dist, then you can choose to install the whl on local
machine or copy it to the target machine.

@ -118,7 +118,7 @@ class Float16Transpiler:
for var in self.block.vars.keys():
if var not in args:
self.block.remove_var(var)
self.block._remove_var(var)
def _modify_feed_fetch(self):
'''
@ -165,7 +165,7 @@ class Float16Transpiler:
dtype=core.VarDesc.VarType.FP16,
shape=var.shape,
persistable=var.persistable)
self.block.insert_op(
self.block._insert_op(
i + 1,
type="cast",
inputs={"X": var},
@ -188,7 +188,7 @@ class Float16Transpiler:
persistable=var.persistable)
find_op(var)
var.op.rename_output(var_name, tmp_var_name)
self.block.insert_op(
self.block._insert_op(
i,
type="cast",
inputs={"X": tmp_var},
@ -253,4 +253,4 @@ class Float16Transpiler:
# old var will be replaced by the fp16 var in program desc
self.input_map[var.name] = fp16_var_name
self.block.remove_var(var.name)
self.block._remove_var(var.name)

@ -104,7 +104,3 @@ if (WITH_ANAKIN) # only needed in CI
target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS})
endif(WITH_TESTING)
endif()
if(WITH_TESTING)
add_subdirectory(demo)
endif()

@ -1,59 +0,0 @@
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
option(WITH_INFERENCE_DEMO "Compile with Inference demo" OFF)
if(NOT WITH_INFERENCE_DEMO)
return()
endif()
set(DEMO_INSTALL_DIR "${PADDLE_BINARY_DIR}/inference_demo")
set(URL_ROOT http://paddlemodels.bj.bcebos.com/inference-vis-demos%2F)
function(inference_download_test_demo TARGET)
if (NOT WITH_TESTING)
return()
endif()
set(options "")
set(oneValueArgs URL)
set(multiValueArgs SRCS)
cmake_parse_arguments(tests "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(test_dir "${DEMO_INSTALL_DIR}/${TARGET}")
message(STATUS "inference demo ${test_dir}")
if(NOT EXISTS "${test_dir}")
message(STATUS "Download ${TARGET} model from ${tests_URL}")
execute_process(COMMAND bash -c "mkdir -p ${test_dir}")
execute_process(COMMAND bash -c "cd ${test_dir}; wget -q ${tests_URL}")
execute_process(COMMAND bash -c "cd ${test_dir}; tar xzf *.tar.gz")
endif()
cc_test(${TARGET} SRCS "${tests_SRCS}"
DEPS paddle_inference_api paddle_fluid
ARGS --data=${test_dir}/data.txt
--modeldir=${test_dir}/model
--refer=${test_dir}/result.txt)
endfunction()
# disable mobilenet test
#inference_download_test_demo(mobilenet_inference_demo
# SRCS vis_demo.cc
# URL ${URL_ROOT}mobilenet.tar.gz)
inference_download_test_demo(se_resnext50_inference_demo
SRCS vis_demo.cc
URL ${URL_ROOT}se_resnext50.tar.gz)
inference_download_test_demo(ocr_inference_demo
SRCS vis_demo.cc
URL ${URL_ROOT}ocr.tar.gz)

@ -1,36 +0,0 @@
# Infernce Demos
Input data format:
- Each line contains a single record
- Each record's format is
```
<space splitted floats as data>\t<space splitted ints as shape>
```
Follow the C++ codes in `vis_demo.cc`.
## MobileNet
To execute the demo, simply run
```sh
./mobilenet_inference_demo --modeldir <model> --data <datafile>
```
## SE-ResNeXt-50
To execute the demo, simply run
```sh
./se_resnext50_inference_demo --modeldir <model> --data <datafile>
```
## OCR
To execute the demo, simply run
```sh
./ocr_inference_demo --modeldir <model> --data <datafile>
```

@ -52,14 +52,12 @@ else()
set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas.a)
endif()
# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
if(WITH_STATIC_LIB)
set(DEPS
"-Wl,--whole-archive"
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.a
"-Wl,--no-whole-archive"
${PADDLE_LIB}/contrib/inference/libpaddle_inference_api.a)
${PADDLE_LIB}/contrib/inference/libpaddle_inference_api.a
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.a)
else()
# Note: libpaddle_inference_api.so must put before libpaddle_fluid.so
set(DEPS
${PADDLE_LIB}/contrib/inference/libpaddle_inference_api.so
${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid.so)

@ -0,0 +1,26 @@
# Inference Demos
There are several demos:
- simple_on_word2vec:
- Follow the C++ codes is in `simple_on_word2vec.cc`.
- It is suitable for word2vec model.
- vis_demo:
- Follow the C++ codes is in `vis_demo.cc`.
- It is suitable for mobilenet, se_resnext50 and ocr three models.
- Input data format:
- Each line contains a single record
- Each record's format is
```
<space splitted floats as data>\t<space splitted ints as shape>
```
To build and execute the demos, simply run
```
./run.sh $PADDLE_ROOT $TURN_ON_MKL $TEST_GPU_CPU
```
- It will build and execute the demos in both static and shared library.
- `$PADDLE_ROOT`: paddle library path
- `$TURN_ON_MKL`: use MKL or Openblas
- `$TEST_GPU_CPU`: test both GPU/CPU mode or only CPU mode
- NOTE: for simple_on_word2vec, must run `ctest -R test_word2vec -R` to obtain word2vec model at first.

@ -1,34 +1,81 @@
set -x
PADDLE_ROOT=$1
WITH_MKL=$2
WITH_GPU=$3
if [ $3 == "ON" ]; then
TURN_ON_MKL=$2 # use MKL or Openblas
TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode
if [ $2 == ON ]; then
# You can export yourself if move the install path
MKL_LIB=${PADDLE_ROOT}/build/fluid_install_dir/third_party/install/mklml/lib
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${MKL_LIB}
fi
if [ $3 == ON ]; then
use_gpu_list='true false'
else
use_gpu_list='false'
fi
# download vis_demo data
function download() {
dir_name=$1
mkdir -p $dir_name
cd $dir_name
wget -q ${URL_ROOT}$dir_name.tar.gz
tar xzf *.tar.gz
cd ..
}
URL_ROOT=http://paddlemodels.bj.bcebos.com/inference-vis-demos%2F
mkdir -p data
cd data
vis_demo_list='se_resnext50 ocr mobilenet'
for vis_demo_name in $vis_demo_list; do
download $vis_demo_name
done
cd ..
# compile and test the demo
mkdir -p build
cd build
for WITH_STATIC_LIB in false; do
for WITH_STATIC_LIB in ON OFF; do
# -----simple_on_word2vec-----
rm -rf *
cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
-DWITH_MKL=$WITH_MKL \
-DWITH_MKL=$TURN_ON_MKL \
-DDEMO_NAME=simple_on_word2vec \
-DWITH_GPU=$WITH_GPU \
-DWITH_GPU=$TEST_GPU_CPU \
-DWITH_STATIC_LIB=$WITH_STATIC_LIB
make
for use_gpu in $use_gpu_list; do
./simple_on_word2vec \
--dirname=${PADDLE_ROOT}/build/python/paddle/fluid/tests/book/word2vec.inference.model \
--use_gpu=$use_gpu
make -j
word2vec_model=${PADDLE_ROOT}'/build/python/paddle/fluid/tests/book/word2vec.inference.model'
if [ -d $word2vec_model ]; then
for use_gpu in $use_gpu_list; do
./simple_on_word2vec \
--dirname=$word2vec_model \
--use_gpu=$use_gpu
if [ $? -ne 0 ]; then
echo "simple_on_word2vec demo runs fail."
exit 1
fi
done
fi
# ---------vis_demo---------
rm -rf *
cmake .. -DPADDLE_LIB=${PADDLE_ROOT}/build/fluid_install_dir/ \
-DWITH_MKL=$TURN_ON_MKL \
-DDEMO_NAME=vis_demo \
-DWITH_GPU=$TEST_GPU_CPU \
-DWITH_STATIC_LIB=$WITH_STATIC_LIB
make -j
for use_gpu in false; do
for vis_demo_name in $vis_demo_list; do
./vis_demo \
--modeldir=../data/$vis_demo_name/model \
--data=../data/$vis_demo_name/data.txt \
--refer=../data/$vis_demo_name/result.txt \
--use_gpu=$use_gpu
if [ $? -ne 0 ]; then
echo "vis demo $vis_demo_name runs fail."
exit 1
fi
done
done
done
if [ $? -eq 0 ]; then
exit 0
else
echo "inference demo runs fail."
exit 1
fi
set +x

@ -16,7 +16,7 @@
#include <string>
#include <vector>
#include "paddle/contrib/inference/paddle_inference_api.h"
#include "contrib/inference/paddle_inference_api.h"
namespace paddle {
namespace demo {

@ -18,19 +18,14 @@ limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
#include <gtest/gtest.h>
#include <fstream>
#include <iostream>
#include "paddle/contrib/inference/demo/utils.h"
#include "paddle/contrib/inference/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "utils.h"
#ifdef PADDLE_WITH_CUDA
DECLARE_double(fraction_of_gpu_memory_to_use);
#endif
namespace paddle {
namespace demo {
DEFINE_string(modeldir, "", "Directory of the inference model.");
DEFINE_string(refer, "", "path to reference result for comparison.");
DEFINE_string(
@ -38,6 +33,10 @@ DEFINE_string(
"",
"path of data; each line is a record, format is "
"'<space splitted floats as data>\t<space splitted ints as shape'");
DEFINE_bool(use_gpu, false, "Whether use gpu.");
namespace paddle {
namespace demo {
struct Record {
std::vector<float> data;
@ -47,7 +46,7 @@ struct Record {
void split(const std::string& str, char sep, std::vector<std::string>* pieces);
Record ProcessALine(const std::string& line) {
LOG(INFO) << "process a line";
VLOG(3) << "process a line";
std::vector<std::string> columns;
split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL)
@ -65,8 +64,8 @@ Record ProcessALine(const std::string& line) {
for (auto& s : shape_strs) {
record.shape.push_back(std::stoi(s));
}
LOG(INFO) << "data size " << record.data.size();
LOG(INFO) << "data shape size " << record.shape.size();
VLOG(3) << "data size " << record.data.size();
VLOG(3) << "data shape size " << record.shape.size();
return record;
}
@ -78,20 +77,22 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
file.close();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
LOG(INFO) << "predictor output numel " << numel;
LOG(INFO) << "reference output numel " << refer.data.size();
EXPECT_EQ(numel, refer.data.size());
VLOG(3) << "predictor output numel " << numel;
VLOG(3) << "reference output numel " << refer.data.size();
PADDLE_ENFORCE_EQ(numel, refer.data.size());
switch (output.dtype) {
case PaddleDType::INT64: {
for (size_t i = 0; i < numel; ++i) {
EXPECT_EQ(static_cast<int64_t*>(output.data.data())[i], refer.data[i]);
PADDLE_ENFORCE_EQ(static_cast<int64_t*>(output.data.data())[i],
refer.data[i]);
}
break;
}
case PaddleDType::FLOAT32:
for (size_t i = 0; i < numel; ++i) {
EXPECT_NEAR(
static_cast<float*>(output.data.data())[i], refer.data[i], 1e-5);
PADDLE_ENFORCE_LT(
fabs(static_cast<float*>(output.data.data())[i] - refer.data[i]),
1e-5);
}
break;
}
@ -106,15 +107,15 @@ void Main(bool use_gpu) {
config.prog_file = FLAGS_modeldir + "/__model__";
config.use_gpu = use_gpu;
config.device = 0;
#ifdef PADDLE_WITH_CUDA
config.fraction_of_gpu_memory = FLAGS_fraction_of_gpu_memory_to_use;
#endif
if (FLAGS_use_gpu) {
config.fraction_of_gpu_memory = 0.1; // set by yourself
}
LOG(INFO) << "init predictor";
VLOG(3) << "init predictor";
auto predictor =
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
LOG(INFO) << "begin to process data";
VLOG(3) << "begin to process data";
// Just a single batch of data.
std::string line;
std::ifstream file(FLAGS_data);
@ -129,21 +130,26 @@ void Main(bool use_gpu) {
.data = PaddleBuf(record.data.data(), record.data.size() * sizeof(float)),
.dtype = PaddleDType::FLOAT32};
LOG(INFO) << "run executor";
VLOG(3) << "run executor";
std::vector<PaddleTensor> output;
predictor->Run({input}, &output);
LOG(INFO) << "output.size " << output.size();
VLOG(3) << "output.size " << output.size();
auto& tensor = output.front();
LOG(INFO) << "output: " << SummaryTensor(tensor);
VLOG(3) << "output: " << SummaryTensor(tensor);
// compare with reference result
CheckOutput(FLAGS_refer, tensor);
}
TEST(demo, vis_demo_cpu) { Main(false /*use_gpu*/); }
#ifdef PADDLE_WITH_CUDA
TEST(demo, vis_demo_gpu) { Main(true /*use_gpu*/); }
#endif
} // namespace demo
} // namespace paddle
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
paddle::demo::Main(false /* use_gpu*/);
if (FLAGS_use_gpu) {
paddle::demo::Main(true /*use_gpu*/);
}
return 0;
}

@ -54,6 +54,7 @@ bool PaddleInferenceAnakinPredictor::Run(
LOG(ERROR) << "copy data from CPU to GPU error";
return false;
}
cudaStreamSynchronize(NULL);
}
executor_.prediction();
@ -76,6 +77,7 @@ bool PaddleInferenceAnakinPredictor::Run(
LOG(ERROR) << "copy data from GPU to CPU error";
return false;
}
cudaStreamSynchronize(NULL);
}
return true;
}

@ -104,7 +104,7 @@ ParallelExecutor::ParallelExecutor(
}
if (member_->local_scopes_.size() != 1 && local_scopes.empty()) {
BCastParamsToDevs(bcast_vars);
BCastParamsToDevices(bcast_vars);
}
// Startup Program has been run. All local scopes has correct parameters.
@ -140,7 +140,7 @@ ParallelExecutor::ParallelExecutor(
member_->places_, std::move(member_->executor_)));
}
void ParallelExecutor::BCastParamsToDevs(
void ParallelExecutor::BCastParamsToDevices(
const std::unordered_set<std::string> &vars) const {
// the initializing bcast, all vars would be bcast from device(0),
// otherwise
@ -218,7 +218,10 @@ void ParallelExecutor::BCastParamsToDevs(
auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
if (member_->use_all_reduce_ || member_->use_cuda_) {
// FIXME(zcd): LR_DECAY_COUNTER should not be shared. This is a hot fix.
if (member_->use_all_reduce_ || member_->use_cuda_ ||
var == "@LR_DECAY_COUNTER@") {
t->Resize(dims);
t->mutable_data(cpu, main_tensor.type());
paddle::framework::TensorCopy(main_tensor, cpu, t);

@ -66,7 +66,7 @@ class ParallelExecutor {
void Run(const std::vector<std::string> &fetch_tensors,
const std::string &fetched_var_name);
void BCastParamsToDevs(const std::unordered_set<std::string> &vars) const;
void BCastParamsToDevices(const std::unordered_set<std::string> &vars) const;
private:
ParallelExecutorPrivate *member_;

@ -1,3 +1,10 @@
# analysis and tensorrt must be added before creating static library,
# otherwise, there would be undefined reference to them in static library.
add_subdirectory(analysis)
if (TENSORRT_FOUND)
add_subdirectory(tensorrt)
endif()
set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor )
# TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
@ -7,10 +14,6 @@ cc_library(paddle_fluid_api
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
if(WITH_CONTRIB)
set(fluid_modules "${fluid_modules}" paddle_inference_api)
endif()
# Create static library
cc_library(paddle_fluid DEPS ${fluid_modules} paddle_fluid_api)
if(NOT APPLE)
@ -35,9 +38,3 @@ if(WITH_TESTING)
# both tests/book and analysis depends the models that generated by python/paddle/fluid/tests/book
add_subdirectory(tests/book)
endif()
add_subdirectory(analysis)
if (TENSORRT_FOUND)
add_subdirectory(tensorrt)
endif()

@ -35,7 +35,14 @@ class AucOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(inference_height, label_height,
"Out and Label should have same height.");
int num_thres = ctx->Attrs().Get<int>("num_thresholds");
ctx->SetOutputDim("AUC", {1});
ctx->SetOutputDim("TPOut", {num_thres});
ctx->SetOutputDim("TNOut", {num_thres});
ctx->SetOutputDim("FPOut", {num_thres});
ctx->SetOutputDim("FNOut", {num_thres});
ctx->ShareLoD("Out", /*->*/ "AUC");
}
@ -63,10 +70,18 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Label",
"A 2D int tensor indicating the label of the training data."
"The height is batch size and width is always 1.");
AddInput("TP", "True-Positive value.");
AddInput("FP", "False-Positive value.");
AddInput("TN", "True-Negative value.");
AddInput("FN", "False-Negative value.");
// TODO(typhoonzero): support weight input
AddOutput("AUC",
"A scalar representing the "
"current area-under-the-curve.");
AddOutput("TPOut", "True-Positive value.");
AddOutput("FPOut", "False-Positive value.");
AddOutput("TNOut", "True-Negative value.");
AddOutput("FNOut", "False-Negative value.");
AddAttr<std::string>("curve", "Curve type, can be 'ROC' or 'PR'.")
.SetDefault("ROC");

@ -34,6 +34,12 @@ class AucKernel : public framework::OpKernel<T> {
auto* inference = ctx.Input<Tensor>("Out");
auto* label = ctx.Input<Tensor>("Label");
auto* auc = ctx.Output<Tensor>("AUC");
// Only use output var for now, make sure it's persistable and
// not cleaned up for each batch.
auto* true_positive = ctx.Output<Tensor>("TPOut");
auto* false_positive = ctx.Output<Tensor>("FPOut");
auto* true_negative = ctx.Output<Tensor>("TNOut");
auto* false_negative = ctx.Output<Tensor>("FNOut");
float* auc_data = auc->mutable_data<float>(ctx.GetPlace());
@ -54,19 +60,10 @@ class AucKernel : public framework::OpKernel<T> {
const T* inference_data = inference->data<T>();
const int64_t* label_data = label->data<int64_t>();
// Create local tensor for storing the curve: TP, FN, TN, FP
// TODO(typhoonzero): use eigen op to caculate these values.
Tensor true_positive, false_positive, true_negative, false_negative;
true_positive.Resize({num_thresholds});
false_negative.Resize({num_thresholds});
true_negative.Resize({num_thresholds});
false_positive.Resize({num_thresholds});
int64_t* tp_data = true_positive.mutable_data<int64_t>(ctx.GetPlace());
int64_t* fn_data = false_negative.mutable_data<int64_t>(ctx.GetPlace());
int64_t* tn_data = true_negative.mutable_data<int64_t>(ctx.GetPlace());
int64_t* fp_data = false_positive.mutable_data<int64_t>(ctx.GetPlace());
auto* tp_data = true_positive->mutable_data<int64_t>(ctx.GetPlace());
auto* fn_data = false_negative->mutable_data<int64_t>(ctx.GetPlace());
auto* tn_data = true_negative->mutable_data<int64_t>(ctx.GetPlace());
auto* fp_data = false_positive->mutable_data<int64_t>(ctx.GetPlace());
for (int idx_thresh = 0; idx_thresh < num_thresholds; idx_thresh++) {
// caculate TP, FN, TN, FP for current thresh
@ -91,10 +88,10 @@ class AucKernel : public framework::OpKernel<T> {
}
}
// store rates
tp_data[idx_thresh] = tp;
fn_data[idx_thresh] = fn;
tn_data[idx_thresh] = tn;
fp_data[idx_thresh] = fp;
tp_data[idx_thresh] += tp;
fn_data[idx_thresh] += fn;
tn_data[idx_thresh] += tn;
fp_data[idx_thresh] += fp;
}
// epsilon to avoid divide by zero.
float epsilon = 1e-6;

@ -48,7 +48,7 @@ class CheckpointNotifyOp : public framework::OperatorBase {
VLOG(3) << "checkpoint notify sending lookup table: " << lookup_table_name
<< " and dir:" << dir << " to " << epmap[i];
}
rpc_client->Wait();
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
}
};

File diff suppressed because it is too large Load Diff

@ -281,9 +281,10 @@ void GRPCClient::AsyncCheckpointNotify(const std::string& ep,
req_count_++;
}
void GRPCClient::Wait() {
bool GRPCClient::Wait() {
std::unique_lock<std::mutex> lk(sync_mutex_);
sync_cond_.wait(lk, [this] { return req_count_ == 0; });
sync_cond_.wait(lk, [this] { return (req_count_ == 0 || ok_ == false); });
return ok_;
}
void GRPCClient::Proceed() {
@ -297,6 +298,14 @@ void GRPCClient::Proceed() {
if (c->status_.ok()) {
VLOG(3) << c->var_h_.String() << " process";
c->Process();
} else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) {
LOG(ERROR) << c->var_h_.String()
<< " meets grpc error:" << c->status_.error_message();
{
std::lock_guard<std::mutex> lk(sync_mutex_);
ok_ = false;
}
sync_cond_.notify_all();
} else {
LOG(FATAL) << c->var_h_.String()
<< " meets grpc error:" << c->status_.error_message();

@ -188,7 +188,7 @@ class CheckpointNotifyProcessor : public BaseProcessor {
class GRPCClient : public RPCClient {
public:
GRPCClient() {}
GRPCClient() : ok_(true) {}
virtual ~GRPCClient();
bool AsyncSendVar(const std::string& ep, const platform::DeviceContext& ctx,
@ -221,7 +221,7 @@ class GRPCClient : public RPCClient {
void AsyncSendEndPass(const std::string& ep,
int64_t time_out = FLAGS_rpc_deadline) override;
void Wait() override;
bool Wait() override;
void SendBeginPass() override;
@ -247,6 +247,7 @@ class GRPCClient : public RPCClient {
std::mutex sync_mutex_;
std::condition_variable sync_cond_;
std::atomic<int64_t> req_count_{0};
bool ok_;
// mutex for GetChannel thread safety
std::mutex chan_mutex_;

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save