Paddle/paddle/fluid/inference/api/api_impl_tester.cc

/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <glog/logging.h>
#include <gtest/gtest.h>

#include <thread>  // NOLINT

#include "gflags/gflags.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/tests/test_helper.h"

#ifdef __clang__
#define ACC_DIFF 4e-3
#else
#define ACC_DIFF 1e-3
#endif

DEFINE_string(dirname, "", "Directory of the inference model.");

namespace paddle {

PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
  PaddleTensor pt;

  if (t->type() == typeid(int64_t)) {
    pt.data.Reset(t->data<void>(), t->numel() * sizeof(int64_t));
    pt.dtype = PaddleDType::INT64;
  } else if (t->type() == typeid(float)) {
    pt.data.Reset(t->data<void>(), t->numel() * sizeof(float));
    pt.dtype = PaddleDType::FLOAT32;
  } else {
    LOG(FATAL) << "unsupported type.";
  }
  pt.shape = framework::vectorize2int(t->dims());
  return pt;
}

NativeConfig GetConfig() {
  NativeConfig config;
  config.model_dir = FLAGS_dirname + "/word2vec.inference.model";
  LOG(INFO) << "dirname  " << config.model_dir;
  config.fraction_of_gpu_memory = 0.15;
#ifdef PADDLE_WITH_CUDA
  config.use_gpu = true;
#else
  config.use_gpu = false;
#endif
  config.device = 0;
  return config;
}

void MainWord2Vec(bool use_gpu) {
  NativeConfig config = GetConfig();
  auto predictor = CreatePaddlePredictor<NativeConfig>(config);
  config.use_gpu = use_gpu;

  framework::LoDTensor first_word, second_word, third_word, fourth_word;
  framework::LoD lod{{0, 1}};
  int64_t dict_size = 2073;  // The size of dictionary

  SetupLoDTensor(&first_word, lod, static_cast<int64_t>(0), dict_size - 1);
  SetupLoDTensor(&second_word, lod, static_cast<int64_t>(0), dict_size - 1);
  SetupLoDTensor(&third_word, lod, static_cast<int64_t>(0), dict_size - 1);
  SetupLoDTensor(&fourth_word, lod, static_cast<int64_t>(0), dict_size - 1);

  std::vector<PaddleTensor> paddle_tensor_feeds;
  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&first_word));
  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&second_word));
  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&third_word));
  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&fourth_word));

  std::vector<PaddleTensor> outputs;
  ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
  ASSERT_EQ(outputs.size(), 1UL);
  size_t len = outputs[0].data.length();
  float* data = static_cast<float*>(outputs[0].data.data());
  for (size_t j = 0; j < len / sizeof(float); ++j) {
    ASSERT_LT(data[j], 1.0);
    ASSERT_GT(data[j], -1.0);
  }

  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
  cpu_feeds.push_back(&first_word);
  cpu_feeds.push_back(&second_word);
  cpu_feeds.push_back(&third_word);
  cpu_feeds.push_back(&fourth_word);

  framework::LoDTensor output1;
  std::vector<paddle::framework::LoDTensor*> cpu_fetchs1;
  cpu_fetchs1.push_back(&output1);

  TestInference<platform::CPUPlace>(config.model_dir, cpu_feeds, cpu_fetchs1);

  float* lod_data = output1.data<float>();
  for (int i = 0; i < output1.numel(); ++i) {
    EXPECT_LT(lod_data[i] - data[i], ACC_DIFF);
    EXPECT_GT(lod_data[i] - data[i], -ACC_DIFF);
  }
}

void MainImageClassification(bool use_gpu) {
  int batch_size = 2;
  bool repeat = false;
  NativeConfig config = GetConfig();
  config.use_gpu = use_gpu;
  config.model_dir =
      FLAGS_dirname + "/image_classification_resnet.inference.model";

  const bool is_combined = false;
  std::vector<std::vector<int64_t>> feed_target_shapes =
      GetFeedTargetShapes(config.model_dir, is_combined);

  framework::LoDTensor input;
  // Use normilized image pixels as input data,
  // which should be in the range [0.0, 1.0].
  feed_target_shapes[0][0] = batch_size;
  framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]);
  SetupTensor<float>(&input, input_dims, static_cast<float>(0),
                     static_cast<float>(1));
  std::vector<framework::LoDTensor*> cpu_feeds;
  cpu_feeds.push_back(&input);

  framework::LoDTensor output1;
  std::vector<framework::LoDTensor*> cpu_fetchs1;
  cpu_fetchs1.push_back(&output1);

  TestInference<platform::CPUPlace, false, true>(
      config.model_dir, cpu_feeds, cpu_fetchs1, repeat, is_combined);

  auto predictor = CreatePaddlePredictor(config);
  std::vector<PaddleTensor> paddle_tensor_feeds;
  paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&input));

  std::vector<PaddleTensor> outputs;
  ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));
  ASSERT_EQ(outputs.size(), 1UL);
  size_t len = outputs[0].data.length();
  float* data = static_cast<float*>(outputs[0].data.data());
  float* lod_data = output1.data<float>();
  for (size_t j = 0; j < len / sizeof(float); ++j) {
    EXPECT_NEAR(lod_data[j], data[j], ACC_DIFF);
  }
}

void MainThreadsWord2Vec(bool use_gpu) {
  NativeConfig config = GetConfig();
  config.use_gpu = use_gpu;
  auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);

  // prepare inputs data and reference results
  constexpr int num_jobs = 3;
  std::vector<std::vector<framework::LoDTensor>> jobs(num_jobs);
  std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds(num_jobs);
  std::vector<framework::LoDTensor> refs(num_jobs);
  for (size_t i = 0; i < jobs.size(); ++i) {
    // each job has 4 words
    jobs[i].resize(4);
    for (size_t j = 0; j < 4; ++j) {
      framework::LoD lod{{0, 1}};
      int64_t dict_size = 2073;  // The size of dictionary
      SetupLoDTensor(&jobs[i][j], lod, static_cast<int64_t>(0), dict_size - 1);
      paddle_tensor_feeds[i].push_back(LodTensorToPaddleTensor(&jobs[i][j]));
    }

    // get reference result of each job
    std::vector<paddle::framework::LoDTensor*> ref_feeds;
    std::vector<paddle::framework::LoDTensor*> ref_fetches(1, &refs[i]);
    for (auto& word : jobs[i]) {
      ref_feeds.push_back(&word);
    }
    TestInference<platform::CPUPlace>(config.model_dir, ref_feeds, ref_fetches);
  }

  // create threads and each thread run 1 job
  std::vector<std::thread> threads;
  for (int tid = 0; tid < num_jobs; ++tid) {
    threads.emplace_back([&, tid]() {
      auto predictor = main_predictor->Clone();
      auto& local_inputs = paddle_tensor_feeds[tid];
      std::vector<PaddleTensor> local_outputs;
      ASSERT_TRUE(predictor->Run(local_inputs, &local_outputs));

      // check outputs range
      ASSERT_EQ(local_outputs.size(), 1UL);
      const size_t len = local_outputs[0].data.length();
      float* data = static_cast<float*>(local_outputs[0].data.data());
      for (size_t j = 0; j < len / sizeof(float); ++j) {
        ASSERT_LT(data[j], 1.0);
        ASSERT_GT(data[j], -1.0);
      }

      // check outputs correctness
      float* ref_data = refs[tid].data<float>();
      EXPECT_EQ(refs[tid].numel(), static_cast<int64_t>(len / sizeof(float)));
      for (int i = 0; i < refs[tid].numel(); ++i) {
        EXPECT_NEAR(ref_data[i], data[i], 2e-3);
      }
    });
  }
  for (int i = 0; i < num_jobs; ++i) {
    threads[i].join();
  }
}

void MainThreadsImageClassification(bool use_gpu) {
  constexpr int num_jobs = 4;  // each job run 1 batch
  constexpr int batch_size = 1;
  NativeConfig config = GetConfig();
  config.use_gpu = use_gpu;
  config.model_dir =
      FLAGS_dirname + "/image_classification_resnet.inference.model";

  auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);
  std::vector<framework::LoDTensor> jobs(num_jobs);
  std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds(num_jobs);
  std::vector<framework::LoDTensor> refs(num_jobs);
  for (size_t i = 0; i < jobs.size(); ++i) {
    // prepare inputs
    std::vector<std::vector<int64_t>> feed_target_shapes =
        GetFeedTargetShapes(config.model_dir, /*is_combined*/ false);
    feed_target_shapes[0][0] = batch_size;
    framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]);
    SetupTensor<float>(&jobs[i], input_dims, 0.f, 1.f);
    paddle_tensor_feeds[i].push_back(LodTensorToPaddleTensor(&jobs[i]));

    // get reference result of each job
    std::vector<framework::LoDTensor*> ref_feeds(1, &jobs[i]);
    std::vector<framework::LoDTensor*> ref_fetches(1, &refs[i]);
    TestInference<platform::CPUPlace>(config.model_dir, ref_feeds, ref_fetches);
  }

  // create threads and each thread run 1 job
  std::vector<std::thread> threads;
  for (int tid = 0; tid < num_jobs; ++tid) {
    threads.emplace_back([&, tid]() {
      auto predictor = main_predictor->Clone();
      auto& local_inputs = paddle_tensor_feeds[tid];
      std::vector<PaddleTensor> local_outputs;
      ASSERT_TRUE(predictor->Run(local_inputs, &local_outputs));

      // check outputs correctness
      ASSERT_EQ(local_outputs.size(), 1UL);
      const size_t len = local_outputs[0].data.length();
      float* data = static_cast<float*>(local_outputs[0].data.data());
      float* ref_data = refs[tid].data<float>();
      EXPECT_EQ((size_t)refs[tid].numel(), len / sizeof(float));
      for (int i = 0; i < refs[tid].numel(); ++i) {
        EXPECT_NEAR(ref_data[i], data[i], ACC_DIFF);
      }
    });
  }
  for (int i = 0; i < num_jobs; ++i) {
    threads[i].join();
  }
}

TEST(inference_api_native, word2vec_cpu) { MainWord2Vec(false /*use_gpu*/); }
TEST(inference_api_native, word2vec_cpu_threads) {
  MainThreadsWord2Vec(false /*use_gpu*/);
}
TEST(inference_api_native, image_classification_cpu) {
  MainThreadsImageClassification(false /*use_gpu*/);
}
TEST(inference_api_native, image_classification_cpu_threads) {
  MainThreadsImageClassification(false /*use_gpu*/);
}

#ifdef PADDLE_WITH_CUDA
TEST(inference_api_native, word2vec_gpu) { MainWord2Vec(true /*use_gpu*/); }
TEST(inference_api_native, word2vec_gpu_threads) {
  MainThreadsWord2Vec(true /*use_gpu*/);
}
TEST(inference_api_native, image_classification_gpu) {
  MainThreadsImageClassification(true /*use_gpu*/);
}
TEST(inference_api_native, image_classification_gpu_threads) {
  MainThreadsImageClassification(true /*use_gpu*/);
}

#endif

}  // namespace paddle
add inference interface impl 7 years ago			`/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.`

			`Licensed under the Apache License, Version 2.0 (the "License");`
			`you may not use this file except in compliance with the License.`
			`You may obtain a copy of the License at`

			`http://www.apache.org/licenses/LICENSE-2.0`

			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS,`
			`WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`See the License for the specific language governing permissions and`
			`limitations under the License. */`

			`#include <glog/logging.h>`
			`#include <gtest/gtest.h>`

fix compiler error after move 7 years ago			`#include <thread> // NOLINT`
enable word2vec multi-threads ut 7 years ago
add inference interface impl 7 years ago			`#include "gflags/gflags.h"`
fix compiler error after move 7 years ago			`#include "paddle/fluid/inference/api/api_impl.h"`
add inference interface impl 7 years ago			`#include "paddle/fluid/inference/tests/test_helper.h"`

test=develop 6 years ago			`#ifdef __clang__`
			`#define ACC_DIFF 4e-3`
			`#else`
			`#define ACC_DIFF 1e-3`
			`#endif`

add inference interface impl 7 years ago			`DEFINE_string(dirname, "", "Directory of the inference model.");`

			`namespace paddle {`

			`PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {`
			`PaddleTensor pt;`

			`if (t->type() == typeid(int64_t)) {`
inference/unify output buffer management (#11569) 7 years ago			`pt.data.Reset(t->data<void>(), t->numel() * sizeof(int64_t));`
add inference interface impl 7 years ago			`pt.dtype = PaddleDType::INT64;`
			`} else if (t->type() == typeid(float)) {`
inference/unify output buffer management (#11569) 7 years ago			`pt.data.Reset(t->data<void>(), t->numel() * sizeof(float));`
add inference interface impl 7 years ago			`pt.dtype = PaddleDType::FLOAT32;`
			`} else {`
			`LOG(FATAL) << "unsupported type.";`
			`}`
			`pt.shape = framework::vectorize2int(t->dims());`
			`return pt;`
			`}`

inference API little fix (#11069) 7 years ago			`NativeConfig GetConfig() {`
			`NativeConfig config;`
fea/infer executor and concurrency performance issue bug fix (#13451) - add naive executor - fix concurrency performance issue 6 years ago			`config.model_dir = FLAGS_dirname + "/word2vec.inference.model";`
add inference interface impl 7 years ago			`LOG(INFO) << "dirname " << config.model_dir;`
Fix test to use less gpu memory 7 years ago			`config.fraction_of_gpu_memory = 0.15;`
enable word2vec multi-threads ut 7 years ago			`#ifdef PADDLE_WITH_CUDA`
inference API little fix (#11069) 7 years ago			`config.use_gpu = true;`
enable word2vec multi-threads ut 7 years ago			`#else`
			`config.use_gpu = false;`
			`#endif`
add inference interface impl 7 years ago			`config.device = 0;`
add tests and polish infer impl (#11009) 7 years ago			`return config;`
			`}`
add inference interface impl 7 years ago
add gpu tests 7 years ago			`void MainWord2Vec(bool use_gpu) {`
inference API little fix (#11069) 7 years ago			`NativeConfig config = GetConfig();`
			`auto predictor = CreatePaddlePredictor<NativeConfig>(config);`
add gpu tests 7 years ago			`config.use_gpu = use_gpu;`
add inference interface impl 7 years ago
			`framework::LoDTensor first_word, second_word, third_word, fourth_word;`
			`framework::LoD lod{{0, 1}};`
			`int64_t dict_size = 2073; // The size of dictionary`

			`SetupLoDTensor(&first_word, lod, static_cast<int64_t>(0), dict_size - 1);`
			`SetupLoDTensor(&second_word, lod, static_cast<int64_t>(0), dict_size - 1);`
			`SetupLoDTensor(&third_word, lod, static_cast<int64_t>(0), dict_size - 1);`
			`SetupLoDTensor(&fourth_word, lod, static_cast<int64_t>(0), dict_size - 1);`

add tests and polish infer impl (#11009) 7 years ago			`std::vector<PaddleTensor> paddle_tensor_feeds;`
			`paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&first_word));`
			`paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&second_word));`
			`paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&third_word));`
			`paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&fourth_word));`

			`std::vector<PaddleTensor> outputs;`
			`ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));`
			`ASSERT_EQ(outputs.size(), 1UL);`
inference/unify output buffer management (#11569) 7 years ago			`size_t len = outputs[0].data.length();`
			`float* data = static_cast<float*>(outputs[0].data.data());`
feature/simple inference demo (#11105) 7 years ago			`for (size_t j = 0; j < len / sizeof(float); ++j) {`
add tests and polish infer impl (#11009) 7 years ago			`ASSERT_LT(data[j], 1.0);`
			`ASSERT_GT(data[j], -1.0);`
			`}`

			`std::vector<paddle::framework::LoDTensor*> cpu_feeds;`
			`cpu_feeds.push_back(&first_word);`
			`cpu_feeds.push_back(&second_word);`
			`cpu_feeds.push_back(&third_word);`
			`cpu_feeds.push_back(&fourth_word);`

			`framework::LoDTensor output1;`
			`std::vector<paddle::framework::LoDTensor*> cpu_fetchs1;`
			`cpu_fetchs1.push_back(&output1);`

			`TestInference<platform::CPUPlace>(config.model_dir, cpu_feeds, cpu_fetchs1);`

			`float* lod_data = output1.data<float>();`
feature/simple inference demo (#11105) 7 years ago			`for (int i = 0; i < output1.numel(); ++i) {`
test=develop 6 years ago			`EXPECT_LT(lod_data[i] - data[i], ACC_DIFF);`
			`EXPECT_GT(lod_data[i] - data[i], -ACC_DIFF);`
add tests and polish infer impl (#11009) 7 years ago			`}`
			`}`

add gpu tests 7 years ago			`void MainImageClassification(bool use_gpu) {`
add tests and polish infer impl (#11009) 7 years ago			`int batch_size = 2;`
			`bool repeat = false;`
inference API little fix (#11069) 7 years ago			`NativeConfig config = GetConfig();`
add gpu tests 7 years ago			`config.use_gpu = use_gpu;`
add tests and polish infer impl (#11009) 7 years ago			`config.model_dir =`
fea/infer executor and concurrency performance issue bug fix (#13451) - add naive executor - fix concurrency performance issue 6 years ago			`FLAGS_dirname + "/image_classification_resnet.inference.model";`
add tests and polish infer impl (#11009) 7 years ago
			`const bool is_combined = false;`
			`std::vector<std::vector<int64_t>> feed_target_shapes =`
			`GetFeedTargetShapes(config.model_dir, is_combined);`

			`framework::LoDTensor input;`
			`// Use normilized image pixels as input data,`
			`// which should be in the range [0.0, 1.0].`
			`feed_target_shapes[0][0] = batch_size;`
			`framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]);`
move contrib/inference to paddle/fluid/inference/api 7 years ago			`SetupTensor<float>(&input, input_dims, static_cast<float>(0),`
			`static_cast<float>(1));`
add tests and polish infer impl (#11009) 7 years ago			`std::vector<framework::LoDTensor*> cpu_feeds;`
			`cpu_feeds.push_back(&input);`

			`framework::LoDTensor output1;`
			`std::vector<framework::LoDTensor*> cpu_fetchs1;`
			`cpu_fetchs1.push_back(&output1);`

fix compiler error in high-level api 7 years ago			`TestInference<platform::CPUPlace, false, true>(`
			`config.model_dir, cpu_feeds, cpu_fetchs1, repeat, is_combined);`
add tests and polish infer impl (#11009) 7 years ago
inference API little fix (#11069) 7 years ago			`auto predictor = CreatePaddlePredictor(config);`
add tests and polish infer impl (#11009) 7 years ago			`std::vector<PaddleTensor> paddle_tensor_feeds;`
			`paddle_tensor_feeds.push_back(LodTensorToPaddleTensor(&input));`
add inference interface impl 7 years ago
			`std::vector<PaddleTensor> outputs;`
add tests and polish infer impl (#11009) 7 years ago			`ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs));`
fix manylinux compile error caused by inference lib (#10994) * update * remove warning * Update test_paddle_inference_api_impl.cc 7 years ago			`ASSERT_EQ(outputs.size(), 1UL);`
inference/unify output buffer management (#11569) 7 years ago			`size_t len = outputs[0].data.length();`
			`float* data = static_cast<float*>(outputs[0].data.data());`
add tests and polish infer impl (#11009) 7 years ago			`float* lod_data = output1.data<float>();`
			`for (size_t j = 0; j < len / sizeof(float); ++j) {`
test=develop 6 years ago			`EXPECT_NEAR(lod_data[j], data[j], ACC_DIFF);`
add inference interface impl 7 years ago			`}`
			`}`

add gpu tests 7 years ago			`void MainThreadsWord2Vec(bool use_gpu) {`
enable word2vec multi-threads ut 7 years ago			`NativeConfig config = GetConfig();`
add gpu tests 7 years ago			`config.use_gpu = use_gpu;`
enable word2vec multi-threads ut 7 years ago			`auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);`

enable image_classification multi-threads ut 7 years ago			`// prepare inputs data and reference results`
enable word2vec multi-threads ut 7 years ago			`constexpr int num_jobs = 3;`
			`std::vector<std::vector<framework::LoDTensor>> jobs(num_jobs);`
			`std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds(num_jobs);`
			`std::vector<framework::LoDTensor> refs(num_jobs);`
			`for (size_t i = 0; i < jobs.size(); ++i) {`
			`// each job has 4 words`
			`jobs[i].resize(4);`
			`for (size_t j = 0; j < 4; ++j) {`
			`framework::LoD lod{{0, 1}};`
			`int64_t dict_size = 2073; // The size of dictionary`
			`SetupLoDTensor(&jobs[i][j], lod, static_cast<int64_t>(0), dict_size - 1);`
			`paddle_tensor_feeds[i].push_back(LodTensorToPaddleTensor(&jobs[i][j]));`
			`}`

			`// get reference result of each job`
			`std::vector<paddle::framework::LoDTensor*> ref_feeds;`
			`std::vector<paddle::framework::LoDTensor*> ref_fetches(1, &refs[i]);`
			`for (auto& word : jobs[i]) {`
			`ref_feeds.push_back(&word);`
			`}`
			`TestInference<platform::CPUPlace>(config.model_dir, ref_feeds, ref_fetches);`
			`}`

			`// create threads and each thread run 1 job`
			`std::vector<std::thread> threads;`
			`for (int tid = 0; tid < num_jobs; ++tid) {`
			`threads.emplace_back([&, tid]() {`
			`auto predictor = main_predictor->Clone();`
			`auto& local_inputs = paddle_tensor_feeds[tid];`
			`std::vector<PaddleTensor> local_outputs;`
			`ASSERT_TRUE(predictor->Run(local_inputs, &local_outputs));`

			`// check outputs range`
			`ASSERT_EQ(local_outputs.size(), 1UL);`
inference/unify output buffer management (#11569) 7 years ago			`const size_t len = local_outputs[0].data.length();`
			`float* data = static_cast<float*>(local_outputs[0].data.data());`
enable word2vec multi-threads ut 7 years ago			`for (size_t j = 0; j < len / sizeof(float); ++j) {`
			`ASSERT_LT(data[j], 1.0);`
			`ASSERT_GT(data[j], -1.0);`
			`}`

			`// check outputs correctness`
			`float* ref_data = refs[tid].data<float>();`
enable image_classification multi-threads ut 7 years ago			`EXPECT_EQ(refs[tid].numel(), static_cast<int64_t>(len / sizeof(float)));`
enable word2vec multi-threads ut 7 years ago			`for (int i = 0; i < refs[tid].numel(); ++i) {`
update test=develop 6 years ago			`EXPECT_NEAR(ref_data[i], data[i], 2e-3);`
enable word2vec multi-threads ut 7 years ago			`}`
enable image_classification multi-threads ut 7 years ago			`});`
			`}`
			`for (int i = 0; i < num_jobs; ++i) {`
			`threads[i].join();`
			`}`
			`}`

add gpu tests 7 years ago			`void MainThreadsImageClassification(bool use_gpu) {`
enable image_classification multi-threads ut 7 years ago			`constexpr int num_jobs = 4; // each job run 1 batch`
			`constexpr int batch_size = 1;`
			`NativeConfig config = GetConfig();`
add gpu tests 7 years ago			`config.use_gpu = use_gpu;`
enable image_classification multi-threads ut 7 years ago			`config.model_dir =`
fea/infer executor and concurrency performance issue bug fix (#13451) - add naive executor - fix concurrency performance issue 6 years ago			`FLAGS_dirname + "/image_classification_resnet.inference.model";`
enable image_classification multi-threads ut 7 years ago
			`auto main_predictor = CreatePaddlePredictor<NativeConfig>(config);`
			`std::vector<framework::LoDTensor> jobs(num_jobs);`
			`std::vector<std::vector<PaddleTensor>> paddle_tensor_feeds(num_jobs);`
			`std::vector<framework::LoDTensor> refs(num_jobs);`
			`for (size_t i = 0; i < jobs.size(); ++i) {`
			`// prepare inputs`
			`std::vector<std::vector<int64_t>> feed_target_shapes =`
			`GetFeedTargetShapes(config.model_dir, /is_combined/ false);`
			`feed_target_shapes[0][0] = batch_size;`
			`framework::DDim input_dims = framework::make_ddim(feed_target_shapes[0]);`
			`SetupTensor<float>(&jobs[i], input_dims, 0.f, 1.f);`
			`paddle_tensor_feeds[i].push_back(LodTensorToPaddleTensor(&jobs[i]));`

			`// get reference result of each job`
			`std::vector<framework::LoDTensor*> ref_feeds(1, &jobs[i]);`
			`std::vector<framework::LoDTensor*> ref_fetches(1, &refs[i]);`
			`TestInference<platform::CPUPlace>(config.model_dir, ref_feeds, ref_fetches);`
			`}`
enable word2vec multi-threads ut 7 years ago
enable image_classification multi-threads ut 7 years ago			`// create threads and each thread run 1 job`
			`std::vector<std::thread> threads;`
			`for (int tid = 0; tid < num_jobs; ++tid) {`
			`threads.emplace_back([&, tid]() {`
			`auto predictor = main_predictor->Clone();`
			`auto& local_inputs = paddle_tensor_feeds[tid];`
			`std::vector<PaddleTensor> local_outputs;`
			`ASSERT_TRUE(predictor->Run(local_inputs, &local_outputs));`

			`// check outputs correctness`
			`ASSERT_EQ(local_outputs.size(), 1UL);`
inference/unify output buffer management (#11569) 7 years ago			`const size_t len = local_outputs[0].data.length();`
			`float* data = static_cast<float*>(local_outputs[0].data.data());`
enable image_classification multi-threads ut 7 years ago			`float* ref_data = refs[tid].data<float>();`
fix compile warning in inference related codes 7 years ago			`EXPECT_EQ((size_t)refs[tid].numel(), len / sizeof(float));`
enable image_classification multi-threads ut 7 years ago			`for (int i = 0; i < refs[tid].numel(); ++i) {`
test=develop 6 years ago			`EXPECT_NEAR(ref_data[i], data[i], ACC_DIFF);`
enable image_classification multi-threads ut 7 years ago			`}`
enable word2vec multi-threads ut 7 years ago			`});`
			`}`
			`for (int i = 0; i < num_jobs; ++i) {`
			`threads[i].join();`
			`}`
			`}`

add gpu tests 7 years ago			`TEST(inference_api_native, word2vec_cpu) { MainWord2Vec(false /use_gpu/); }`
			`TEST(inference_api_native, word2vec_cpu_threads) {`
			`MainThreadsWord2Vec(false /use_gpu/);`
			`}`
			`TEST(inference_api_native, image_classification_cpu) {`
			`MainThreadsImageClassification(false /use_gpu/);`
			`}`
			`TEST(inference_api_native, image_classification_cpu_threads) {`
			`MainThreadsImageClassification(false /use_gpu/);`
			`}`

			`#ifdef PADDLE_WITH_CUDA`
			`TEST(inference_api_native, word2vec_gpu) { MainWord2Vec(true /use_gpu/); }`
			`TEST(inference_api_native, word2vec_gpu_threads) {`
			`MainThreadsWord2Vec(true /use_gpu/);`
			`}`
			`TEST(inference_api_native, image_classification_gpu) {`
			`MainThreadsImageClassification(true /use_gpu/);`
			`}`
			`TEST(inference_api_native, image_classification_gpu_threads) {`
			`MainThreadsImageClassification(true /use_gpu/);`
			`}`

			`#endif`

add inference interface impl 7 years ago			`} // namespace paddle`