add Set/GetCPUNumThreads api

local_add_cudnn_lstm
luotao1 6 years ago
parent 3fe2def1ca
commit e21edb26f6

@ -46,6 +46,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
prog_file = other.prog_file;
param_file = other.param_file;
specify_input_name = other.specify_input_name;
cpu_num_threads_ = other.cpu_num_threads_;
// fields from this.
enable_ir_optim = other.enable_ir_optim;
use_feed_fetch_ops = other.use_feed_fetch_ops;

@ -35,7 +35,6 @@
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool(profile);
DECLARE_int32(paddle_num_threads);
namespace paddle {
@ -67,7 +66,7 @@ bool AnalysisPredictor::Init(
#endif
// no matter with or without MKLDNN
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
paddle::platform::SetNumThreads(config_.GetCPUNumThreads());
if (!PrepareScope(parent_scope)) {
return false;

@ -28,7 +28,6 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
DEFINE_bool(profile, false, "Turn on profiler for fluid");
DECLARE_int32(paddle_num_threads);
namespace paddle {
namespace {
@ -76,7 +75,7 @@ bool NativePaddlePredictor::Init(
#endif
// no matter with or without MKLDNN
paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);
paddle::platform::SetNumThreads(config_.GetCPUNumThreads());
if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device);

@ -186,6 +186,15 @@ struct NativeConfig : public PaddlePredictor::Config {
// Specify the variable's name of each input if input tensors don't follow the
// `feeds` and `fetches` of the phase `save_inference_model`.
bool specify_input_name{false};
// Set and get the number of cpu threads.
void SetCPUNumThreads(int cpu_num_threads) {
cpu_num_threads_ = cpu_num_threads;
}
int GetCPUNumThreads() const { return cpu_num_threads_; }
protected:
int cpu_num_threads_{1}; // number of cpu threads for each instance.
};
// A factory to help create different predictors.

@ -27,6 +27,7 @@ void SetConfig(AnalysisConfig *cfg) {
cfg->device = 0;
cfg->enable_ir_optim = true;
cfg->specify_input_name = true;
cfg->SetCPUNumThreads(FLAGS_paddle_num_threads);
}
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

@ -53,6 +53,8 @@ std::ostream &operator<<(std::ostream &os, const NativeConfig &config) {
os << GenSpaces(num_spaces) << "param_file: " << config.param_file << "\n";
os << GenSpaces(num_spaces)
<< "specify_input_name: " << config.specify_input_name << "\n";
os << GenSpaces(num_spaces)
<< "cpu_num_threads: " << config.GetCPUNumThreads() << "\n";
num_spaces--;
os << GenSpaces(num_spaces) << "}\n";
return os;

@ -42,6 +42,7 @@ DEFINE_bool(use_analysis, true,
"Running the inference program in analysis mode.");
DECLARE_bool(profile);
DECLARE_int32(paddle_num_threads);
namespace paddle {
namespace inference {

@ -17,8 +17,6 @@ limitations under the License. */
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
DECLARE_int32(paddle_num_threads);
namespace paddle {
namespace operators {
namespace math {
@ -43,7 +41,7 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
.template Get<jitkernel::VAddKernel<T>>(N);
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for if (FLAGS_paddle_num_threads > 1)
#pragma omp parallel for
#endif
for (int i = 0; i < M; i++) {
T* dst = Y + i * N;

@ -41,7 +41,7 @@ void SetNumThreads(int num_threads) {
#elif defined(PADDLE_WITH_MKLML)
int real_num_threads = num_threads > 1 ? num_threads : 1;
platform::dynload::MKL_Set_Num_Threads(real_num_threads);
omp_set_num_threads(num_threads);
omp_set_num_threads(real_num_threads);
#else
PADDLE_ENFORCE(false, "To be implemented.");
#endif

Loading…
Cancel
Save