Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into quantize_transpiler_update

revert-13637-optimize-opyreader
Dang Qingqing 7 years ago
commit 605907fe85

@ -136,10 +136,6 @@ def parse_args():
'--no_random', '--no_random',
action='store_true', action='store_true',
help='If set, keep the random seed and do not shuffle the data.') help='If set, keep the random seed and do not shuffle the data.')
parser.add_argument(
'--use_lars',
action='store_true',
help='If set, use lars for optimizers, ONLY support resnet module.')
parser.add_argument( parser.add_argument(
'--reduce_strategy', '--reduce_strategy',
type=str, type=str,

@ -200,11 +200,6 @@ def get_model(args, is_train, main_prog, startup_prog):
# configure optimize # configure optimize
optimizer = None optimizer = None
if is_train: if is_train:
if args.use_lars:
lars_decay = 1.0
else:
lars_decay = 0.0
total_images = 1281167 / trainer_count total_images = 1281167 / trainer_count
step = int(total_images / (args.batch_size * args.gpus) + 1) step = int(total_images / (args.batch_size * args.gpus) + 1)

@ -224,11 +224,6 @@ def get_model(args, is_train, main_prog, startup_prog):
# configure optimize # configure optimize
optimizer = None optimizer = None
if is_train: if is_train:
if args.use_lars:
lars_decay = 1.0
else:
lars_decay = 0.0
total_images = 1281167 / trainer_count total_images = 1281167 / trainer_count
step = int(total_images / args.batch_size + 1) step = int(total_images / args.batch_size + 1)

@ -244,11 +244,6 @@ def get_model(args, is_train, main_prog, startup_prog):
optimizer = None optimizer = None
if is_train: if is_train:
if args.use_lars:
lars_decay = 1.0
else:
lars_decay = 0.0
total_images = 1281167 / trainer_count total_images = 1281167 / trainer_count
step = int(total_images / args.batch_size + 1) step = int(total_images / args.batch_size + 1)
@ -262,8 +257,7 @@ def get_model(args, is_train, main_prog, startup_prog):
learning_rate=fluid.layers.piecewise_decay( learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr), boundaries=bd, values=lr),
momentum=0.9, momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4), regularization=fluid.regularizer.L2Decay(1e-4))
LARS_weight_decay=lars_decay)
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
if args.memory_optimize: if args.memory_optimize:

@ -354,25 +354,25 @@ paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'fi
paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max')) paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max'))
paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)) paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,))
paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)) paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0))
paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate'], varargs=None, keywords='kwargs', defaults=None) paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov'], varargs=None, keywords='kwargs', defaults=(False,)) paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon'], varargs=None, keywords='kwargs', defaults=(1e-06,)) paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, None, None))
paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon'], varargs=None, keywords='kwargs', defaults=(0.001, 0.9, 0.999, 1e-08)) paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None))
paddle.fluid.optimizer.AdamOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.AdamOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon'], varargs=None, keywords='kwargs', defaults=(0.001, 0.9, 0.999, 1e-08)) paddle.fluid.optimizer.AdamaxOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.AdamaxOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon'], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06)) paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None))
paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power'], varargs=None, keywords='kwargs', defaults=(0.0, 0.0, -0.5)) paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None))
paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered'], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06, 0.0, False)) paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None))
paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho'], varargs=None, keywords='kwargs', defaults=(1e-06, 0.95)) paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None))
paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window'], varargs=None, keywords='kwargs', defaults=(10000, 10000)) paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None))
paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.ModelAverage.restore ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None) paddle.fluid.optimizer.ModelAverage.restore ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None)

@ -103,108 +103,74 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
input_slots->assign({input_tensor}); input_slots->assign({input_tensor});
} }
const int64_t lac_ref_data[] = {24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, void SetConfig(AnalysisConfig *cfg) {
25, 25, 25, 25, 44, 24, 25, 25, 25, 36, 42, 43, cfg->model_dir = FLAGS_infer_model;
44, 14, 15, 44, 14, 15, 44, 14, 15, 44, 38, 39, cfg->use_gpu = false;
14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23}; cfg->device = 0;
cfg->specify_input_name = true;
void TestLACPrediction(const std::string &model_path, cfg->enable_ir_optim = true;
const std::string &data_file, const int batch_size, }
const int repeat, bool use_analysis = false) {
AnalysisConfig cfg;
cfg.model_dir = model_path;
cfg.use_gpu = false;
cfg.device = 0;
cfg.specify_input_name = true;
cfg.enable_ir_optim = true;
std::vector<PaddleTensor> input_slots, outputs_slots; void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
DataRecord data(data_file, batch_size); DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
GetOneBatch(&input_slots, &data, batch_size); std::vector<PaddleTensor> input_slots;
std::unique_ptr<PaddlePredictor> predictor; int epoch = FLAGS_test_all_data ? data.batched_datas.size() : 1;
if (use_analysis) { LOG(INFO) << "number of samples: " << epoch;
predictor = for (int bid = 0; bid < epoch; ++bid) {
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(cfg); GetOneBatch(&input_slots, &data, FLAGS_batch_size);
} else { (*inputs).emplace_back(input_slots);
predictor =
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(cfg);
}
for (int i = 0; i < FLAGS_burning; i++) {
predictor->Run(input_slots, &outputs_slots);
} }
Timer timer; }
if (FLAGS_test_all_data) {
LOG(INFO) << "test all data";
std::vector<std::vector<PaddleTensor>> input_slots_all;
for (size_t bid = 0; bid < data.batched_datas.size(); ++bid) {
GetOneBatch(&input_slots, &data, batch_size);
input_slots_all.emplace_back(input_slots);
}
LOG(INFO) << "total number of samples: " << data.datasets.size();
TestPrediction(cfg, input_slots_all, &outputs_slots, FLAGS_num_threads);
return;
}
timer.tic();
for (int i = 0; i < repeat; i++) {
predictor->Run(input_slots, &outputs_slots);
}
PrintTime(batch_size, repeat, 1, 0, timer.toc() / repeat);
// check result // Easy for profiling independently.
EXPECT_EQ(outputs_slots.size(), 1UL); TEST(Analyzer_LAC, profile) {
auto &out = outputs_slots[0]; AnalysisConfig cfg;
size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, SetConfig(&cfg);
[](int a, int b) { return a * b; }); std::vector<PaddleTensor> outputs;
size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t);
PADDLE_ENFORCE_GT(size, 0);
EXPECT_GE(size, batch1_size);
int64_t *pdata = static_cast<int64_t *>(out.data.data());
for (size_t i = 0; i < batch1_size; ++i) {
EXPECT_EQ(pdata[i], lac_ref_data[i]);
}
if (use_analysis) { std::vector<std::vector<PaddleTensor>> input_slots_all;
// run once for comparion as reference SetInput(&input_slots_all);
auto ref_predictor = TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(cfg);
std::vector<PaddleTensor> ref_outputs_slots;
ref_predictor->Run(input_slots, &ref_outputs_slots);
CompareResult(ref_outputs_slots, outputs_slots);
AnalysisPredictor *analysis_predictor = if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
dynamic_cast<AnalysisPredictor *>(predictor.get()); // the first inference result
auto &fuse_statis = analysis_predictor->analysis_argument() const int64_t lac_ref_data[] = {
.Get<std::unordered_map<std::string, int>>( 24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25,
framework::ir::kFuseStatisAttr); 44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14,
for (auto &item : fuse_statis) { 15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23};
LOG(INFO) << "fused " << item.first << " " << item.second; PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
} size_t size = GetSize(outputs[0]);
int num_ops = 0; size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t);
for (auto &node : PADDLE_ENFORCE_GE(size, batch1_size);
analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { int64_t *pdata = static_cast<int64_t *>(outputs[0].data.data());
if (node->IsFunction()) { for (size_t i = 0; i < batch1_size; ++i) {
++num_ops; EXPECT_EQ(pdata[i], lac_ref_data[i]);
}
} }
LOG(INFO) << "has num ops: " << num_ops;
ASSERT_TRUE(fuse_statis.count("fc_fuse"));
ASSERT_TRUE(fuse_statis.count("fc_gru_fuse"));
EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 4);
EXPECT_EQ(num_ops, 11);
} }
} }
TEST(Analyzer_LAC, native) { // Check the fuse status
LOG(INFO) << "LAC with native"; TEST(Analyzer_LAC, fuse_statis) {
TestLACPrediction(FLAGS_infer_model, FLAGS_infer_data, FLAGS_batch_size, AnalysisConfig cfg;
FLAGS_repeat); SetConfig(&cfg);
int num_ops;
auto fuse_statis = GetFuseStatis(cfg, &num_ops);
ASSERT_TRUE(fuse_statis.count("fc_fuse"));
ASSERT_TRUE(fuse_statis.count("fc_gru_fuse"));
EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 4);
EXPECT_EQ(num_ops, 11);
} }
TEST(Analyzer_LAC, analysis) { // Compare result of NativeConfig and AnalysisConfig
LOG(INFO) << "LAC with analysis"; TEST(Analyzer_LAC, compare) {
TestLACPrediction(FLAGS_infer_model, FLAGS_infer_data, FLAGS_batch_size, AnalysisConfig cfg;
FLAGS_repeat, true); SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all);
} }
} // namespace analysis } // namespace analysis

@ -95,97 +95,73 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
} }
} }
// the first inference result void SetConfig(AnalysisConfig *cfg) {
const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26, cfg->prog_file = FLAGS_infer_model + "/__model__";
48, 39, 38, 16, 25}; cfg->param_file = FLAGS_infer_model + "/param";
cfg->use_gpu = false;
void TestChineseNERPrediction(bool use_analysis) { cfg->device = 0;
AnalysisConfig cfg; cfg->specify_input_name = true;
cfg.prog_file = FLAGS_infer_model + "/__model__"; cfg->enable_ir_optim = true;
cfg.param_file = FLAGS_infer_model + "/param"; }
cfg.use_gpu = false;
cfg.device = 0;
cfg.specify_input_name = true;
cfg.enable_ir_optim = true;
std::vector<PaddleTensor> input_slots, outputs;
std::unique_ptr<PaddlePredictor> predictor;
Timer timer;
if (use_analysis) {
predictor =
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(cfg);
} else {
predictor =
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(cfg);
}
if (FLAGS_test_all_data) { void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
LOG(INFO) << "test all data";
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
std::vector<std::vector<PaddleTensor>> input_slots_all;
for (size_t bid = 0; bid < data.num_samples / FLAGS_batch_size; ++bid) {
PrepareInputs(&input_slots, &data, FLAGS_batch_size);
input_slots_all.emplace_back(input_slots);
}
LOG(INFO) << "total number of samples: " << data.num_samples;
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
return;
}
// Prepare inputs.
DataRecord data(FLAGS_infer_data, FLAGS_batch_size); DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
PrepareInputs(&input_slots, &data, FLAGS_batch_size); std::vector<PaddleTensor> input_slots;
int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1;
timer.tic(); LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size;
for (int i = 0; i < FLAGS_repeat; i++) { for (int bid = 0; bid < epoch; ++bid) {
predictor->Run(input_slots, &outputs); PrepareInputs(&input_slots, &data, FLAGS_batch_size);
(*inputs).emplace_back(input_slots);
} }
PrintTime(FLAGS_batch_size, FLAGS_repeat, 1, 0, timer.toc() / FLAGS_repeat); }
PADDLE_ENFORCE(outputs.size(), 1UL); // Easy for profiling independently.
auto &out = outputs[0]; TEST(Analyzer_Chinese_ner, profile) {
size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, AnalysisConfig cfg;
[](int a, int b) { return a * b; }); SetConfig(&cfg);
PADDLE_ENFORCE_GT(size, 0); std::vector<PaddleTensor> outputs;
int64_t *result = static_cast<int64_t *>(out.data.data());
for (size_t i = 0; i < std::min(11UL, size); i++) {
PADDLE_ENFORCE(result[i], chinese_ner_result_data[i]);
}
if (use_analysis) { std::vector<std::vector<PaddleTensor>> input_slots_all;
// run once for comparion as reference SetInput(&input_slots_all);
auto ref_predictor = TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(cfg);
std::vector<PaddleTensor> ref_outputs_slots;
ref_predictor->Run(input_slots, &ref_outputs_slots);
CompareResult(ref_outputs_slots, outputs);
AnalysisPredictor *analysis_predictor = if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
dynamic_cast<AnalysisPredictor *>(predictor.get()); // the first inference result
auto &fuse_statis = analysis_predictor->analysis_argument() const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26,
.Get<std::unordered_map<std::string, int>>( 48, 39, 38, 16, 25};
framework::ir::kFuseStatisAttr); PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
for (auto &item : fuse_statis) { size_t size = GetSize(outputs[0]);
LOG(INFO) << "fused " << item.first << " " << item.second; PADDLE_ENFORCE_GT(size, 0);
} int64_t *result = static_cast<int64_t *>(outputs[0].data.data());
int num_ops = 0; for (size_t i = 0; i < std::min(11UL, size); i++) {
for (auto &node : EXPECT_EQ(result[i], chinese_ner_result_data[i]);
analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) {
if (node->IsFunction()) {
++num_ops;
}
} }
LOG(INFO) << "has num ops: " << num_ops;
ASSERT_TRUE(fuse_statis.count("fc_fuse"));
ASSERT_TRUE(fuse_statis.count("fc_gru_fuse"));
EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 2);
EXPECT_EQ(num_ops, 14);
} }
} }
TEST(Analyzer_Chinese_ner, native) { TestChineseNERPrediction(false); } // Check the fuse status
TEST(Analyzer_Chinese_ner, fuse_statis) {
AnalysisConfig cfg;
SetConfig(&cfg);
TEST(Analyzer_Chinese_ner, analysis) { TestChineseNERPrediction(true); } int num_ops;
auto fuse_statis = GetFuseStatis(cfg, &num_ops);
ASSERT_TRUE(fuse_statis.count("fc_fuse"));
ASSERT_TRUE(fuse_statis.count("fc_gru_fuse"));
EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 2);
EXPECT_EQ(num_ops, 14);
}
// Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_Chinese_ner, compare) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all);
}
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle

@ -25,6 +25,7 @@ struct DataRecord {
std::vector<size_t> lod1, lod2, lod3; std::vector<size_t> lod1, lod2, lod3;
std::vector<std::vector<float>> rnn_link_data, rnn_week_datas, std::vector<std::vector<float>> rnn_link_data, rnn_week_datas,
rnn_minute_datas; rnn_minute_datas;
size_t num_samples; // total number of samples
size_t batch_iter{0}; size_t batch_iter{0};
size_t batch_size{1}; size_t batch_size{1};
DataRecord() = default; DataRecord() = default;
@ -97,6 +98,7 @@ struct DataRecord {
week_data_all.push_back(std::move(week_data)); week_data_all.push_back(std::move(week_data));
minute_data_all.push_back(std::move(minute_data)); minute_data_all.push_back(std::move(minute_data));
} }
num_samples = num_lines;
} }
}; };
void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
@ -147,89 +149,72 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
} }
} }
// Test with a really complicate model. void SetConfig(AnalysisConfig *cfg) {
void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) { cfg->prog_file = FLAGS_infer_model + "/__model__";
AnalysisConfig config; cfg->param_file = FLAGS_infer_model + "/param";
config.prog_file = FLAGS_infer_model + "/__model__"; cfg->use_gpu = false;
config.param_file = FLAGS_infer_model + "/param"; cfg->device = 0;
config.use_gpu = false; cfg->specify_input_name = true;
config.device = 0; cfg->enable_ir_optim = true;
config.specify_input_name = true; cfg->ir_passes.clear(); // Do not exclude any pass.
config.enable_ir_optim = activate_ir; }
PADDLE_ENFORCE(config.ir_mode ==
AnalysisConfig::IrPassMode::kExclude); // default
config.ir_passes.clear(); // Do not exclude any pass.
int batch_size = FLAGS_batch_size;
auto base_predictor = void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config); DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
auto predictor =
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
std::vector<PaddleTensor> input_slots; std::vector<PaddleTensor> input_slots;
DataRecord data(FLAGS_infer_data, batch_size); int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1;
// Prepare inputs. LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size;
PrepareInputs(&input_slots, &data, batch_size); for (int bid = 0; bid < epoch; ++bid) {
std::vector<PaddleTensor> outputs, base_outputs; PrepareInputs(&input_slots, &data, FLAGS_batch_size);
(*inputs).emplace_back(input_slots);
}
}
base_predictor->Run(input_slots, &base_outputs); // Easy for profiling independently.
TEST(Analyzer_rnn1, profile) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
input_slots_all.emplace_back(input_slots); SetInput(&input_slots_all);
if (num_threads == 1) { TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
TestOneThreadPrediction(config, input_slots_all, &outputs); }
CompareResult(outputs, base_outputs);
} else {
// only return the output of first thread
TestMultiThreadPrediction(config, input_slots_all, &outputs, num_threads);
}
if (use_analysis && activate_ir) { // Check the fuse status
AnalysisPredictor *analysis_predictor = TEST(Analyzer_rnn1, fuse_statis) {
dynamic_cast<AnalysisPredictor *>(predictor.get()); AnalysisConfig cfg;
auto &fuse_statis = analysis_predictor->analysis_argument() SetConfig(&cfg);
.Get<std::unordered_map<std::string, int>>(
framework::ir::kFuseStatisAttr);
for (auto &item : fuse_statis) {
LOG(INFO) << "fused " << item.first << " " << item.second;
}
int num_ops = 0; int num_ops;
for (auto &node : auto fuse_statis = GetFuseStatis(cfg, &num_ops);
analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { ASSERT_TRUE(fuse_statis.count("fc_fuse"));
if (node->IsFunction()) { EXPECT_EQ(fuse_statis.at("fc_fuse"), 1);
++num_ops; EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM
} EXPECT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1);
} EXPECT_EQ(num_ops,
LOG(INFO) << "has num ops: " << num_ops; 13); // After graph optimization, only 13 operators exists.
}
ASSERT_TRUE(fuse_statis.count("fc_fuse")); // Compare result of NativeConfig and AnalysisConfig
EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); TEST(Analyzer_rnn1, compare) {
EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM AnalysisConfig cfg;
EXPECT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1); SetConfig(&cfg);
EXPECT_EQ(num_ops,
13); // After graph optimization, only 13 operators exists. std::vector<std::vector<PaddleTensor>> input_slots_all;
} SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all);
} }
// Inference with analysis and IR, easy for profiling independently. // Test Multi-Thread.
TEST(Analyzer, rnn1) { TestRNN1Prediction(true, true, FLAGS_num_threads); } TEST(Analyzer_rnn1, multi_thread) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
// Other unit-tests of RNN1, test different options of use_analysis, std::vector<std::vector<PaddleTensor>> input_slots_all;
// activate_ir and multi-threads. SetInput(&input_slots_all);
TEST(Analyzer, RNN_tests) { TestPrediction(cfg, input_slots_all, &outputs, 4 /* num_threads */);
int num_threads[2] = {1, 4};
for (auto i : num_threads) {
// Directly infer with the original model.
TestRNN1Prediction(false, false, i);
// Inference with the original model with the analysis turned on, the
// analysis module will transform the program to a data flow graph.
TestRNN1Prediction(true, false, i);
// Inference with analysis and IR. The IR module will fuse some large
// kernels.
TestRNN1Prediction(true, true, i);
}
} }
} // namespace inference } // namespace inference

@ -12,24 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/tests/api/tester_helper.h"
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
DEFINE_string(infer_model, "", "model path");
DEFINE_string(infer_data, "", "data path");
DEFINE_int32(batch_size, 1, "batch size.");
DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
namespace paddle { namespace paddle {
namespace inference { namespace inference {
@ -41,6 +24,7 @@ struct DataRecord {
std::vector<size_t> lod; std::vector<size_t> lod;
std::vector<std::vector<float>> rnn_link_data; std::vector<std::vector<float>> rnn_link_data;
std::vector<float> result_data; std::vector<float> result_data;
size_t num_samples; // total number of samples
size_t batch_iter{0}; size_t batch_iter{0};
size_t batch_size{1}; size_t batch_size{1};
DataRecord() = default; DataRecord() = default;
@ -100,6 +84,7 @@ struct DataRecord {
result_data.insert(result_data.end(), tmp.begin(), tmp.end()); result_data.insert(result_data.end(), tmp.begin(), tmp.end());
} }
} }
num_samples = num_lines / 2;
} }
}; };
void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
@ -118,64 +103,58 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
input_slots->assign({feed_tensor}); input_slots->assign({feed_tensor});
} }
void CompareResult(const std::vector<PaddleTensor> &outputs, void SetConfig(AnalysisConfig *cfg) {
const std::vector<float> &base_result) { cfg->prog_file = FLAGS_infer_model + "/__model__";
PADDLE_ENFORCE_GT(outputs.size(), 0); cfg->param_file = FLAGS_infer_model + "/param";
for (size_t i = 0; i < outputs.size(); i++) { cfg->use_gpu = false;
auto &out = outputs[i]; cfg->device = 0;
size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, cfg->specify_input_name = true;
[](int a, int b) { return a * b; }); cfg->enable_ir_optim = true;
PADDLE_ENFORCE_GT(size, 0); }
float *data = static_cast<float *>(out.data.data());
for (size_t i = 0; i < size; i++) { void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
EXPECT_NEAR(data[i], base_result[i], 1e-3); DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
} std::vector<PaddleTensor> input_slots;
int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1;
LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size;
for (int bid = 0; bid < epoch; ++bid) {
PrepareInputs(&input_slots, &data, FLAGS_batch_size);
(*inputs).emplace_back(input_slots);
} }
} }
// Test with a really complicate model.
void TestRNN2Prediction() {
AnalysisConfig config;
config.prog_file = FLAGS_infer_model + "/__model__";
config.param_file = FLAGS_infer_model + "/param";
config.use_gpu = false;
config.device = 0;
config.specify_input_name = true;
config.enable_ir_optim = true;
PADDLE_ENFORCE(config.ir_mode ==
AnalysisConfig::IrPassMode::kExclude); // default
int batch_size = FLAGS_batch_size; // Easy for profiling independently.
int num_times = FLAGS_repeat; TEST(Analyzer_rnn2, profile) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
auto base_predictor = std::vector<std::vector<PaddleTensor>> input_slots_all;
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config); SetInput(&input_slots_all);
auto predictor = TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
std::vector<PaddleTensor> input_slots;
DataRecord data(FLAGS_infer_data, batch_size);
PrepareInputs(&input_slots, &data, batch_size);
std::vector<PaddleTensor> outputs, base_outputs;
Timer timer1; if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
timer1.tic(); // the first inference result
for (int i = 0; i < num_times; i++) { DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
base_predictor->Run(input_slots, &base_outputs); PADDLE_ENFORCE_GT(outputs.size(), 0);
size_t size = GetSize(outputs[0]);
PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(outputs[0].data.data());
for (size_t i = 0; i < size; i++) {
EXPECT_NEAR(result[i], data.result_data[i], 1e-3);
}
} }
PrintTime(batch_size, num_times, 1, 0, timer1.toc() / num_times); }
Timer timer2; // Compare result of NativeConfig and AnalysisConfig
timer2.tic(); TEST(Analyzer_rnn2, compare) {
for (int i = 0; i < num_times; i++) { AnalysisConfig cfg;
predictor->Run(input_slots, &outputs); SetConfig(&cfg);
}
PrintTime(batch_size, num_times, 1, 0, timer2.toc() / num_times);
CompareResult(base_outputs, data.result_data); std::vector<std::vector<PaddleTensor>> input_slots_all;
CompareResult(outputs, data.result_data); SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all);
} }
TEST(Analyzer, rnn2) { TestRNN2Prediction(); }
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle

@ -46,54 +46,63 @@ struct DataReader {
std::unique_ptr<std::ifstream> file; std::unique_ptr<std::ifstream> file;
}; };
void Main(int batch_size) { void SetConfig(AnalysisConfig *cfg) {
// shape -- cfg->model_dir = FLAGS_infer_model;
// Create Predictor -- cfg->use_gpu = false;
AnalysisConfig config; cfg->device = 0;
config.model_dir = FLAGS_infer_model; cfg->specify_input_name = true;
config.use_gpu = false; cfg->enable_ir_optim = true;
config.enable_ir_optim = true; }
std::vector<PaddleTensor> input_slots, output_slots; void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
std::vector<PaddleTensor> input_slots;
DataReader reader(FLAGS_infer_data); DataReader reader(FLAGS_infer_data);
std::vector<std::vector<PaddleTensor>> input_slots_all; int num_batches = 0;
while (reader.NextBatch(&input_slots, FLAGS_batch_size)) {
if (FLAGS_test_all_data) { (*inputs).emplace_back(input_slots);
LOG(INFO) << "test all data"; ++num_batches;
int num_batches = 0; if (!FLAGS_test_all_data) return;
while (reader.NextBatch(&input_slots, FLAGS_batch_size)) {
input_slots_all.emplace_back(input_slots);
++num_batches;
}
LOG(INFO) << "total number of samples: " << num_batches * FLAGS_batch_size;
TestPrediction(config, input_slots_all, &output_slots, FLAGS_num_threads);
return;
} }
LOG(INFO) << "total number of samples: " << num_batches * FLAGS_batch_size;
}
// one batch starts // Easy for profiling independently.
// data -- TEST(Analyzer_Text_Classification, profile) {
reader.NextBatch(&input_slots, FLAGS_batch_size); AnalysisConfig cfg;
input_slots_all.emplace_back(input_slots); SetConfig(&cfg);
TestPrediction(config, input_slots_all, &output_slots, FLAGS_num_threads); std::vector<PaddleTensor> outputs;
// Get output std::vector<std::vector<PaddleTensor>> input_slots_all;
LOG(INFO) << "get outputs " << output_slots.size(); SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
for (auto &output : output_slots) { if (FLAGS_num_threads == 1) {
LOG(INFO) << "output.shape: " << to_string(output.shape); // Get output
// no lod ? LOG(INFO) << "get outputs " << outputs.size();
CHECK_EQ(output.lod.size(), 0UL); for (auto &output : outputs) {
LOG(INFO) << "output.dtype: " << output.dtype; LOG(INFO) << "output.shape: " << to_string(output.shape);
std::stringstream ss; // no lod ?
for (int i = 0; i < 5; i++) { CHECK_EQ(output.lod.size(), 0UL);
ss << static_cast<float *>(output.data.data())[i] << " "; LOG(INFO) << "output.dtype: " << output.dtype;
std::stringstream ss;
for (int i = 0; i < 5; i++) {
ss << static_cast<float *>(output.data.data())[i] << " ";
}
LOG(INFO) << "output.data summary: " << ss.str();
// one batch ends
} }
LOG(INFO) << "output.data summary: " << ss.str();
// one batch ends
} }
} }
TEST(text_classification, basic) { Main(FLAGS_batch_size); } // Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_Text_Classification, compare) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all);
}
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle

@ -49,84 +49,83 @@ Record ProcessALine(const std::string &line) {
return record; return record;
} }
/* void SetConfig(AnalysisConfig *cfg) {
* Use the native and analysis fluid engine to inference the demo. cfg->param_file = FLAGS_infer_model + "/__params__";
* ocr, mobilenet and se_resnext50 cfg->prog_file = FLAGS_infer_model + "/__model__";
*/ cfg->use_gpu = false;
void TestVisualPrediction(bool use_mkldnn) { cfg->device = 0;
std::unique_ptr<PaddlePredictor> predictor; cfg->enable_ir_optim = true;
AnalysisConfig cfg; cfg->specify_input_name = true;
cfg.param_file = FLAGS_infer_model + "/__params__";
cfg.prog_file = FLAGS_infer_model + "/__model__";
cfg.use_gpu = false;
cfg._use_mkldnn = use_mkldnn;
cfg.device = 0;
cfg.enable_ir_optim = true;
// TODO(TJ): fix fusion gru // TODO(TJ): fix fusion gru
cfg.ir_passes.push_back("fc_gru_fuse_pass"); cfg->ir_passes.push_back("fc_gru_fuse_pass");
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
cfg->_use_mkldnn = true;
// disable mkldnn fuse since it should have some bugs // disable mkldnn fuse since it should have some bugs
cfg.ir_passes.push_back("conv_relu_mkldnn_fuse_pass"); cfg->ir_passes.push_back("conv_relu_mkldnn_fuse_pass");
#endif #endif
predictor = }
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(cfg);
// Only have single batch of data. void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data.");
std::string line; std::string line;
std::ifstream file(FLAGS_infer_data); std::ifstream file(FLAGS_infer_data);
std::getline(file, line); std::getline(file, line);
auto record = ProcessALine(line); auto record = ProcessALine(line);
file.close();
// Inference.
PaddleTensor input; PaddleTensor input;
input.shape = record.shape; input.shape = record.shape;
input.data =
PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
input.dtype = PaddleDType::FLOAT32; input.dtype = PaddleDType::FLOAT32;
size_t input_size = record.data.size() * sizeof(float);
input.data.Resize(input_size);
memcpy(input.data.data(), record.data.data(), input_size);
std::vector<PaddleTensor> input_slots;
input_slots.assign({input});
(*inputs).emplace_back(input_slots);
}
std::vector<PaddleTensor> outputs_slots; // Easy for profiling independently.
Timer timer; // ocr, mobilenet and se_resnext50
timer.tic(); TEST(Analyzer_vis, profile) {
for (int i = 0; i < FLAGS_repeat; i++) { AnalysisConfig cfg;
predictor->Run({input}, &outputs_slots); SetConfig(&cfg);
} std::vector<PaddleTensor> outputs;
PrintTime(/*batch size*/ 1, FLAGS_repeat, /*num threads*/ 1, /*thread id*/ 0,
timer.toc() / FLAGS_repeat); std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
VLOG(3) << "output.size " << outputs_slots.size(); TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
// run native as reference if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
auto ref_predictor = const float ocr_result_data[] = {
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(cfg); 5.273636460856323538e-08, 3.296741795111302054e-07,
std::vector<PaddleTensor> ref_outputs_slots; 1.873261190610264748e-08, 3.403730275408634043e-08,
ref_predictor->Run({input}, &ref_outputs_slots); 3.383312474625199684e-08};
CompareResult(outputs_slots, ref_outputs_slots); PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
// print what are fused size_t size = GetSize(outputs[0]);
AnalysisPredictor *analysis_predictor = PADDLE_ENFORCE_GT(size, 0);
dynamic_cast<AnalysisPredictor *>(predictor.get()); float *result = static_cast<float *>(outputs[0].data.data());
auto &fuse_statis = analysis_predictor->analysis_argument() for (size_t i = 0; i < std::min(5UL, size); i++) {
.Get<std::unordered_map<std::string, int>>( EXPECT_NEAR(result[i], ocr_result_data[i], 1e-3);
framework::ir::kFuseStatisAttr);
for (auto &item : fuse_statis) {
LOG(INFO) << "fused " << item.first << " " << item.second;
}
int num_ops = 0;
for (auto &node :
analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) {
if (node->IsFunction()) {
++num_ops;
} }
} }
LOG(INFO) << "has num ops: " << num_ops;
} }
TEST(Analyzer_vis, analysis) { TestVisualPrediction(/*use_mkldnn*/ false); } // Check the fuse status
#ifdef PADDLE_WITH_MKLDNN TEST(Analyzer_vis, fuse_statis) {
TEST(Analyzer_vis, analysis_mkldnn) { AnalysisConfig cfg;
TestVisualPrediction(/*use_mkldnn*/ true); SetConfig(&cfg);
int num_ops;
GetFuseStatis(cfg, &num_ops);
}
// Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_vis, compare) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all);
} }
#endif
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference

@ -15,6 +15,7 @@
#pragma once #pragma once
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
@ -28,17 +29,18 @@
DEFINE_string(infer_model, "", "model path"); DEFINE_string(infer_model, "", "model path");
DEFINE_string(infer_data, "", "data file"); DEFINE_string(infer_data, "", "data file");
DEFINE_int32(batch_size, 1, "batch size."); DEFINE_int32(batch_size, 1, "batch size.");
DEFINE_int32(burning, 0, "Burning before repeat.");
DEFINE_int32(repeat, 1, "Running the inference program repeat times."); DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
DEFINE_bool(test_all_data, false, "Test the all dataset in data file."); DEFINE_bool(test_all_data, false, "Test the all dataset in data file.");
DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads."); DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
DEFINE_bool(use_analysis, true,
"Running the inference program in analysis mode.");
namespace paddle { namespace paddle {
namespace inference { namespace inference {
void CompareResult(const std::vector<PaddleTensor> &outputs, void CompareResult(const std::vector<PaddleTensor> &outputs,
const std::vector<PaddleTensor> &ref_outputs) { const std::vector<PaddleTensor> &ref_outputs) {
EXPECT_GT(outputs.size(), 0); EXPECT_GT(outputs.size(), 0UL);
EXPECT_EQ(outputs.size(), ref_outputs.size()); EXPECT_EQ(outputs.size(), ref_outputs.size());
for (size_t i = 0; i < outputs.size(); i++) { for (size_t i = 0; i < outputs.size(); i++) {
auto &out = outputs[i]; auto &out = outputs[i];
@ -72,14 +74,50 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
} }
} }
std::unique_ptr<PaddlePredictor> GetPrediction(AnalysisConfig config,
bool use_analysis = true) {
if (use_analysis) {
return CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
} else {
return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
config);
}
}
size_t GetSize(const PaddleTensor &out) {
return std::accumulate(out.shape.begin(), out.shape.end(), 1,
[](int a, int b) { return a * b; });
}
std::unordered_map<std::string, int> GetFuseStatis(AnalysisConfig config,
int *num_ops) {
auto predictor = GetPrediction(config);
AnalysisPredictor *analysis_predictor =
dynamic_cast<AnalysisPredictor *>(predictor.get());
auto &fuse_statis = analysis_predictor->analysis_argument()
.Get<std::unordered_map<std::string, int>>(
framework::ir::kFuseStatisAttr);
for (auto &item : fuse_statis) {
LOG(INFO) << "fused " << item.first << " " << item.second;
}
int num = 0;
for (auto &node :
analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) {
if (node->IsFunction()) {
++num;
}
}
*num_ops = num;
return fuse_statis;
}
void TestOneThreadPrediction( void TestOneThreadPrediction(
AnalysisConfig config, const std::vector<std::vector<PaddleTensor>> inputs, AnalysisConfig config, const std::vector<std::vector<PaddleTensor>> inputs,
std::vector<PaddleTensor> *outputs) { std::vector<PaddleTensor> *outputs, bool use_analysis = true) {
int batch_size = FLAGS_batch_size; int batch_size = FLAGS_batch_size;
int num_times = FLAGS_repeat; int num_times = FLAGS_repeat;
auto predictor = auto predictor = GetPrediction(config, use_analysis);
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
Timer timer; Timer timer;
timer.tic(); timer.tic();
for (int i = 0; i < num_times; i++) { for (int i = 0; i < num_times; i++) {
@ -93,7 +131,8 @@ void TestOneThreadPrediction(
void TestMultiThreadPrediction( void TestMultiThreadPrediction(
AnalysisConfig config, const std::vector<std::vector<PaddleTensor>> inputs, AnalysisConfig config, const std::vector<std::vector<PaddleTensor>> inputs,
std::vector<PaddleTensor> *outputs, int num_threads) { std::vector<PaddleTensor> *outputs, int num_threads,
bool use_analysis = true) {
int batch_size = FLAGS_batch_size; int batch_size = FLAGS_batch_size;
int num_times = FLAGS_repeat; int num_times = FLAGS_repeat;
std::vector<std::thread> threads; std::vector<std::thread> threads;
@ -101,9 +140,7 @@ void TestMultiThreadPrediction(
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled // TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged. // because AttentionLSTM's hard code nodeid will be damanged.
for (int tid = 0; tid < num_threads; ++tid) { for (int tid = 0; tid < num_threads; ++tid) {
predictors.emplace_back( predictors.emplace_back(GetPrediction(config, use_analysis));
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config));
} }
for (int tid = 0; tid < num_threads; ++tid) { for (int tid = 0; tid < num_threads; ++tid) {
threads.emplace_back([&, tid]() { threads.emplace_back([&, tid]() {
@ -129,13 +166,25 @@ void TestMultiThreadPrediction(
void TestPrediction(AnalysisConfig config, void TestPrediction(AnalysisConfig config,
const std::vector<std::vector<PaddleTensor>> inputs, const std::vector<std::vector<PaddleTensor>> inputs,
std::vector<PaddleTensor> *outputs, int num_threads) { std::vector<PaddleTensor> *outputs, int num_threads,
bool use_analysis = FLAGS_use_analysis) {
LOG(INFO) << "use_analysis: " << use_analysis;
if (num_threads == 1) { if (num_threads == 1) {
TestOneThreadPrediction(config, inputs, outputs); TestOneThreadPrediction(config, inputs, outputs, use_analysis);
} else { } else {
TestMultiThreadPrediction(config, inputs, outputs, num_threads); TestMultiThreadPrediction(config, inputs, outputs, num_threads,
use_analysis);
} }
} }
void CompareNativeAndAnalysis(
AnalysisConfig config,
const std::vector<std::vector<PaddleTensor>> inputs) {
std::vector<PaddleTensor> native_outputs, analysis_outputs;
TestOneThreadPrediction(config, inputs, &native_outputs, false);
TestOneThreadPrediction(config, inputs, &analysis_outputs, true);
CompareResult(analysis_outputs, native_outputs);
}
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle

@ -174,12 +174,13 @@ struct SparseAdamFunctor {
const int64_t* rows_; const int64_t* rows_;
int64_t row_numel_; int64_t row_numel_;
int64_t row_count_;
SparseAdamFunctor(T beta1, T beta2, T epsilon, const T* beta1_pow, SparseAdamFunctor(T beta1, T beta2, T epsilon, const T* beta1_pow,
const T* beta2_pow, const T* mom1, T* mom1_out, const T* beta2_pow, const T* mom1, T* mom1_out,
const T* mom2, T* mom2_out, const T* lr, const T* grad, const T* mom2, T* mom2_out, const T* lr, const T* grad,
const T* param, T* param_out, const int64_t* rows, const T* param, T* param_out, const int64_t* rows,
int64_t row_numel) int64_t row_numel, int64_t row_count)
: beta1_(beta1), : beta1_(beta1),
beta2_(beta2), beta2_(beta2),
epsilon_(epsilon), epsilon_(epsilon),
@ -194,28 +195,47 @@ struct SparseAdamFunctor {
param_(param), param_(param),
param_out_(param_out), param_out_(param_out),
rows_(rows), rows_(rows),
row_numel_(row_numel) {} row_numel_(row_numel),
row_count_(row_count) {}
inline HOSTDEVICE int64_t BinarySearchInRows(int64_t row) const {
int64_t beg = 0, end = row_count_ - 1;
while (beg <= end) {
auto mid = ((beg + end) >> 1);
if (rows_[mid] == row)
return mid;
else if (rows_[mid] < row)
beg = mid + 1;
else
end = mid - 1;
}
return -1;
}
inline HOSTDEVICE void operator()(size_t i) const { inline HOSTDEVICE void operator()(size_t i) const {
int64_t row = i / row_numel_;
auto row_idx = BinarySearchInRows(row);
T g = row_idx >= 0 ? grad_[row_idx * row_numel_ + i % row_numel_] : 0;
// The following code is the same as dense
T mom1 = moment1_[i];
T mom2 = moment2_[i];
T lr = *lr_;
T beta1_pow = *beta1_pow_; T beta1_pow = *beta1_pow_;
T beta2_pow = *beta2_pow_; T beta2_pow = *beta2_pow_;
for (int64_t j = 0; j < row_numel_; ++j) { T p = param_[i];
T g = grad_[i * row_numel_ + j];
T mom1 = moment1_[rows_[i] * row_numel_ + j]; // Calculation
T mom2 = moment2_[rows_[i] * row_numel_ + j]; lr *= sqrt(1 - beta2_pow) / (1 - beta1_pow);
T lr = *lr_;
T p = param_[rows_[i] * row_numel_ + j]; mom1 = beta1_ * mom1 + (1 - beta1_) * g;
mom2 = beta2_ * mom2 + (1 - beta2_) * g * g;
lr *= sqrt(1 - beta2_pow) / (1 - beta1_pow); p -= lr * (mom1 / (sqrt(mom2) + epsilon_));
mom1 = beta1_ * mom1 + (1 - beta1_) * g; // Write back to global memory
mom2 = beta2_ * mom2 + (1 - beta2_) * g * g; moment1_out_[i] = mom1;
p -= lr * (mom1 / (sqrt(mom2) + epsilon_)); moment2_out_[i] = mom2;
param_out_[i] = p;
moment1_out_[rows_[i] * row_numel_ + j] = mom1;
moment2_out_[rows_[i] * row_numel_ + j] = mom2;
param_out_[rows_[i] * row_numel_ + j] = p;
} // for col id
} }
}; };
@ -287,9 +307,14 @@ class AdamOpKernel : public framework::OpKernel<T> {
return; return;
} }
// merge duplicated rows if any. // merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
scatter::MergeAdd<DeviceContext, T> merge_func; scatter::MergeAdd<DeviceContext, T> merge_func;
auto grad_merge = auto& grad_merge = *(ctx.scope()
merge_func(ctx.template device_context<DeviceContext>(), grad); .NewScope()
.Var("sparse_adam_grad_merge")
->GetMutable<framework::SelectedRows>());
merge_func(ctx.template device_context<DeviceContext>(), grad,
&grad_merge);
auto& grad_tensor = grad_merge.value(); auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>(); const T* grad_data = grad_tensor.template data<T>();
int64_t* rows = nullptr; int64_t* rows = nullptr;
@ -314,10 +339,11 @@ class AdamOpKernel : public framework::OpKernel<T> {
mom2.template data<T>(), mom2.template data<T>(),
mom2_out.template mutable_data<T>(ctx.GetPlace()), mom2_out.template mutable_data<T>(ctx.GetPlace()),
lr.template data<T>(), grad_data, param.template data<T>(), lr.template data<T>(), grad_data, param.template data<T>(),
param_out.template mutable_data<T>(ctx.GetPlace()), rows, row_numel); param_out.template mutable_data<T>(ctx.GetPlace()), rows, row_numel,
grad_merge.rows().size());
platform::ForRange<DeviceContext> for_range( platform::ForRange<DeviceContext> for_range(
static_cast<const DeviceContext&>(ctx.device_context()), static_cast<const DeviceContext&>(ctx.device_context()),
grad_merge.rows().size()); param.numel());
for_range(functor); for_range(functor);
} else { } else {
PADDLE_THROW("Variable type not supported by adam_op"); PADDLE_THROW("Variable type not supported by adam_op");

@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/transform.h" #include "paddle/fluid/platform/transform.h"
namespace paddle { namespace paddle {
@ -61,14 +62,32 @@ class ClipKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto max = context.Attr<T>("max"); auto max = context.Attr<T>("max");
auto min = context.Attr<T>("min"); auto min = context.Attr<T>("min");
auto* x = context.Input<Tensor>("X"); auto* x_var = context.InputVar("X");
auto* out = context.Output<Tensor>("Out"); if (x_var->IsType<framework::LoDTensor>()) {
T* out_data = out->mutable_data<T>(context.GetPlace()); auto* x = context.Input<framework::LoDTensor>("X");
const T* x_data = x->data<T>(); auto* out = context.Output<framework::LoDTensor>("Out");
int64_t numel = x->numel(); T* out_data = out->mutable_data<T>(context.GetPlace());
Transform<DeviceContext> trans; const T* x_data = x->data<T>();
trans(context.template device_context<DeviceContext>(), x_data, int64_t numel = x->numel();
x_data + numel, out_data, ClipFunctor<T>(min, max)); Transform<DeviceContext> trans;
trans(context.template device_context<DeviceContext>(), x_data,
x_data + numel, out_data, ClipFunctor<T>(min, max));
} else if (x_var->IsType<framework::SelectedRows>()) {
auto* x = context.Input<framework::SelectedRows>("X");
auto* out = context.Output<framework::SelectedRows>("Out");
PADDLE_ENFORCE_NE(x, out,
"Inplace clip is not allowed when x is SelectedRows");
math::scatter::MergeAdd<DeviceContext, T> merge_func;
merge_func(context.template device_context<DeviceContext>(), *x, out);
auto* out_tensor = out->mutable_value();
auto* out_data = out_tensor->data<T>();
int64_t numel = out_tensor->numel();
Transform<DeviceContext> trans;
trans(context.template device_context<DeviceContext>(), out_data,
out_data + numel, out_data, ClipFunctor<T>(min, max));
} else {
PADDLE_THROW("ClipOp only supports LoDTensor and SelectedRows");
}
} }
}; };
@ -78,10 +97,12 @@ class ClipGradKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto max = context.Attr<T>("max"); auto max = context.Attr<T>("max");
auto min = context.Attr<T>("min"); auto min = context.Attr<T>("min");
auto* d_out = context.Input<Tensor>(framework::GradVarName("Out")); auto* d_out =
auto* d_x = context.Output<Tensor>(framework::GradVarName("X")); context.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto* d_x =
context.Output<framework::LoDTensor>(framework::GradVarName("X"));
if (d_x != nullptr) { if (d_x != nullptr) {
auto* x = context.Input<Tensor>("X"); auto* x = context.Input<framework::LoDTensor>("X");
int64_t numel = d_out->numel(); int64_t numel = d_out->numel();
auto* d_x_data = d_x->mutable_data<T>(context.GetPlace()); auto* d_x_data = d_x->mutable_data<T>(context.GetPlace());
const T* d_out_data = d_out->data<T>(); const T* d_out_data = d_out->data<T>();

@ -199,6 +199,14 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
framework::SelectedRows operator()(const platform::CPUDeviceContext& context, framework::SelectedRows operator()(const platform::CPUDeviceContext& context,
const framework::SelectedRows& input) { const framework::SelectedRows& input) {
framework::SelectedRows out; framework::SelectedRows out;
(*this)(context, input, &out);
return out;
}
void operator()(const platform::CPUDeviceContext& context,
const framework::SelectedRows& input,
framework::SelectedRows* output) {
framework::SelectedRows& out = *output;
auto input_rows = input.rows(); auto input_rows = input.rows();
std::set<int64_t> row_set(input_rows.begin(), input_rows.end()); std::set<int64_t> row_set(input_rows.begin(), input_rows.end());
std::vector<int64_t> merge_rows(row_set.begin(), row_set.end()); std::vector<int64_t> merge_rows(row_set.begin(), row_set.end());
@ -223,7 +231,6 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
out_data[out_i * input_width + j] += input_data[i * input_width + j]; out_data[out_i * input_width + j] += input_data[i * input_width + j];
} }
} }
return out;
} }
}; };

@ -234,7 +234,7 @@ template <typename T, int block_size>
__global__ void MergeAddKernel(const T* input, const int64_t* input_rows, __global__ void MergeAddKernel(const T* input, const int64_t* input_rows,
T* out, const int64_t* out_rows, T* out, const int64_t* out_rows,
size_t out_rows_size, int64_t row_numel) { size_t out_rows_size, int64_t row_numel) {
const int ty = blockIdx.y; const int ty = blockIdx.x;
int tid = threadIdx.x; int tid = threadIdx.x;
__shared__ size_t out_idx; __shared__ size_t out_idx;
@ -260,6 +260,14 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
framework::SelectedRows operator()(const platform::CUDADeviceContext& context, framework::SelectedRows operator()(const platform::CUDADeviceContext& context,
const framework::SelectedRows& input) { const framework::SelectedRows& input) {
framework::SelectedRows out; framework::SelectedRows out;
(*this)(context, input, &out);
return out;
}
void operator()(const platform::CUDADeviceContext& context,
const framework::SelectedRows& input,
framework::SelectedRows* output) {
framework::SelectedRows& out = *output;
framework::Vector<int64_t> input_rows(input.rows()); framework::Vector<int64_t> input_rows(input.rows());
std::set<int64_t> row_set(input_rows.begin(), input_rows.end()); std::set<int64_t> row_set(input_rows.begin(), input_rows.end());
std::vector<int64_t> merge_rows(row_set.begin(), row_set.end()); std::vector<int64_t> merge_rows(row_set.begin(), row_set.end());
@ -281,16 +289,12 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
const int block_size = 256; const int block_size = 256;
dim3 threads(block_size, 1); dim3 threads(block_size, 1);
dim3 grid1(1, input_rows.size()); dim3 grid1(input_rows.size(), 1);
MergeAddKernel< MergeAddKernel<T, 256><<<grid1, threads, 0, context.stream()>>>(
T, 256><<<grid1, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
input_data, input_rows.CUDAData(context.GetPlace()), out_data, input_data, input_rows.CUDAData(context.GetPlace()), out_data,
out.mutable_rows()->CUDAMutableData(context.GetPlace()), out.mutable_rows()->CUDAMutableData(context.GetPlace()),
out.rows().size(), input_width); out.rows().size(), input_width);
return out;
} }
}; };

@ -65,6 +65,9 @@ struct MergeAdd {
// the input SelectedRows object. // the input SelectedRows object.
framework::SelectedRows operator()(const DeviceContext& context, framework::SelectedRows operator()(const DeviceContext& context,
const framework::SelectedRows& input); const framework::SelectedRows& input);
void operator()(const DeviceContext& context,
const framework::SelectedRows& input,
framework::SelectedRows* output);
}; };
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>

File diff suppressed because it is too large Load Diff

@ -190,14 +190,11 @@ class L1DecayRegularizer(WeightDecayRegularizer):
Examples: Examples:
.. code-block:: python .. code-block:: python
program = fluid.framework.Program() optimizer = fluid.optimizer.Adagrad(
block = program.global_block() learning_rate=1e-4,
mul_x = block.create_parameter( regularization=fluid.regularizer.L1DecayRegularizer(
dtype="float32", regularization_coeff=0.1))
shape=[5, 10], optimizer.minimize(avg_cost)
lod_level=0,
name="mul.x",
regularizer=fluid.regularizer.L1DecayRegularizer(0.5))
""" """
def __init__(self, regularization_coeff=0.0): def __init__(self, regularization_coeff=0.0):

@ -99,7 +99,7 @@ def train(nn_type,
test_program = fluid.default_main_program().clone(for_test=True) test_program = fluid.default_main_program().clone(for_test=True)
optimizer = fluid.optimizer.Adam(learning_rate=0.001, LARS_weight_decay=0.3) optimizer = fluid.optimizer.Adam(learning_rate=0.001)
optimizer.minimize(avg_loss) optimizer.minimize(avg_loss)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

@ -34,12 +34,13 @@ if(APPLE)
list(REMOVE_ITEM TEST_OPS test_desc_clone) list(REMOVE_ITEM TEST_OPS test_desc_clone)
list(REMOVE_ITEM TEST_OPS test_program_code) list(REMOVE_ITEM TEST_OPS test_program_code)
endif(NOT WITH_DISTRIBUTE) endif(NOT WITH_DISTRIBUTE)
message(WARNING "These tests has been disabled in OSX before being fixed: \n test_detection_map_op \n test_dist_se_resnext") message(WARNING "These tests has been disabled in OSX before being fixed: \n test_fuse_elewise_add_act_pass \n test_detection_map_op \n test_dist_se_resnext")
# this op is not support on mac # this op is not support on mac
list(REMOVE_ITEM TEST_OPS test_fusion_seqexpand_concat_fc_op) list(REMOVE_ITEM TEST_OPS test_fusion_seqexpand_concat_fc_op)
# TODO: add the unitest back when it fixed # TODO: add the unitest back when it fixed
list(REMOVE_ITEM TEST_OPS test_detection_map_op) list(REMOVE_ITEM TEST_OPS test_detection_map_op)
list(REMOVE_ITEM TEST_OPS test_dist_se_resnext) list(REMOVE_ITEM TEST_OPS test_dist_se_resnext)
list(REMOVE_ITEM TEST_OPS test_fuse_elewise_add_act_pass)
endif() endif()
function(py_test_modules TARGET_NAME) function(py_test_modules TARGET_NAME)

Loading…
Cancel
Save