commit
						84c3523c5a
					
				| @ -0,0 +1,238 @@ | ||||
| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 | ||||
| 
 | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
| 
 | ||||
|     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
| 
 | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. */ | ||||
| 
 | ||||
| #include "nnpack.h" | ||||
| #include "paddle/function/ConvOp.h" | ||||
| 
 | ||||
| DEFINE_bool(nnpack_allocate_outside, | ||||
|             false, | ||||
|             "Allocate and free workspace memory outside the NNPACK interface."); | ||||
| DEFINE_int32(nnpack_num_threads, | ||||
|              0, | ||||
|              "The number of nnpack threads" | ||||
|              "default: 0; 0 to disable threadpool."); | ||||
| 
 | ||||
| namespace paddle { | ||||
| 
 | ||||
| nnp_convolution_algorithm get_nnp_convolution_algorithm( | ||||
|     const std::string& algorithm) { | ||||
|   if (algorithm == "auto") { | ||||
|     return nnp_convolution_algorithm_auto; | ||||
|   } else if (algorithm == "ft8x8") { | ||||
|     return nnp_convolution_algorithm_ft8x8; | ||||
|   } else if (algorithm == "ft16x16") { | ||||
|     return nnp_convolution_algorithm_ft16x16; | ||||
|   } else if (algorithm == "wt8x8") { | ||||
|     return nnp_convolution_algorithm_wt8x8; | ||||
|   } else if (algorithm == "implicit-gemm") { | ||||
|     return nnp_convolution_algorithm_implicit_gemm; | ||||
|   } else if (algorithm == "direct") { | ||||
|     return nnp_convolution_algorithm_direct; | ||||
|   } else { | ||||
|     return nnp_convolution_algorithm_auto; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| template <DeviceType Device> | ||||
| class NNPACKConvFunction : public ConvFunctionBase { | ||||
| public: | ||||
|   void init(const FuncConfig& config) override { | ||||
|     ConvFunctionBase::init(config); | ||||
|     CHECK_EQ(groups_, (size_t)1); | ||||
|     algorithm_ = get_nnp_convolution_algorithm(config.get<std::string>("algo")); | ||||
|     // algorithm_ = nnp_convolution_algorithm_auto;
 | ||||
|     transform_strategy_ = nnp_convolution_transform_strategy_compute; | ||||
|     nnp_status status = nnp_initialize(); | ||||
|     CHECK_EQ(status, nnp_status_success); | ||||
|     workspaceBuffer_ = nullptr; | ||||
|     workspaceSize_ = 0; | ||||
| 
 | ||||
|     threadpool_ = nullptr; | ||||
|     if (FLAGS_nnpack_num_threads) { | ||||
|       threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads); | ||||
|       VLOG(3) << "Number of threads " | ||||
|               << pthreadpool_get_threads_count(threadpool_); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   ~NNPACKConvFunction() { | ||||
|     if (threadpool_) { | ||||
|       pthreadpool_destroy(threadpool_); | ||||
|     } | ||||
|     if (workspaceBuffer_) { | ||||
|       free(workspaceBuffer_); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   virtual void check(const BufferArgs& inputs, | ||||
|                      const BufferArgs& outputs) override { | ||||
|     const TensorShape& input = inputs[0].shape(); | ||||
|     const TensorShape& filter = inputs[1].shape(); | ||||
|     const TensorShape& output = outputs[0].shape(); | ||||
|     checkShape(input, filter, output); | ||||
|   } | ||||
| 
 | ||||
|   void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { | ||||
|     CHECK_EQ(numInputs_, inputs.size()); | ||||
|     CHECK_EQ(numOutputs_, outputs.size()); | ||||
|     CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); | ||||
|     check(inputs, outputs); | ||||
|     const TensorShape& input = inputs[0].shape(); | ||||
|     const TensorShape& filter = inputs[1].shape(); | ||||
|     const TensorShape& output = outputs[0].shape(); | ||||
| 
 | ||||
|     size_t batchSize = input[0]; | ||||
|     size_t inputChannels = input[1]; | ||||
|     size_t inputHeight = input[2]; | ||||
|     size_t inputWidth = input[3]; | ||||
|     size_t filterHeight = getFilterHeight(filter); | ||||
|     size_t filterWidth = getFilterWidth(filter); | ||||
|     size_t outputChannels = output[1]; | ||||
|     // size_t outputHeight = output[2];
 | ||||
|     // size_t outputWidth = output[3];
 | ||||
| 
 | ||||
|     nnp_size inputSize = {.width = inputWidth, .height = inputHeight}; | ||||
|     nnp_padding padding = {.top = (size_t)paddingH(), | ||||
|                            .right = (size_t)paddingW(), | ||||
|                            .bottom = (size_t)paddingH(), | ||||
|                            .left = (size_t)paddingW()}; | ||||
|     nnp_size kernelSize = {.width = filterWidth, .height = filterHeight}; | ||||
|     nnp_size outputSubsampling = {.width = (size_t)strideW(), | ||||
|                                   .height = (size_t)strideH()}; | ||||
| 
 | ||||
|     float* inputData = inputs[0].data<float>(); | ||||
|     float* filterData = inputs[1].data<float>(); | ||||
|     float* outputData = outputs[0].data<float>(); | ||||
| 
 | ||||
|     void* bufferPtr = nullptr; | ||||
|     size_t* sizePtr = nullptr; | ||||
|     size_t needSize; | ||||
|     if (FLAGS_nnpack_allocate_outside) { | ||||
|       if (batchSize == 1) { | ||||
|         nnp_status status = nnp_convolution_inference(algorithm_, | ||||
|                                                       transform_strategy_, | ||||
|                                                       inputChannels, | ||||
|                                                       outputChannels, | ||||
|                                                       inputSize, | ||||
|                                                       padding, | ||||
|                                                       kernelSize, | ||||
|                                                       outputSubsampling, | ||||
|                                                       nullptr, | ||||
|                                                       nullptr, | ||||
|                                                       nullptr, | ||||
|                                                       nullptr, | ||||
|                                                       nullptr, | ||||
|                                                       &needSize, | ||||
|                                                       nnp_activation_identity, | ||||
|                                                       nullptr, | ||||
|                                                       nullptr, | ||||
|                                                       nullptr); | ||||
|         CHECK_EQ(status, nnp_status_success); | ||||
|       } else { | ||||
|         // only supports stride = 1
 | ||||
|         CHECK_EQ(strideH(), 1); | ||||
|         CHECK_EQ(strideW(), 1); | ||||
|         nnp_status status = nnp_convolution_output(algorithm_, | ||||
|                                                    batchSize, | ||||
|                                                    inputChannels, | ||||
|                                                    outputChannels, | ||||
|                                                    inputSize, | ||||
|                                                    padding, | ||||
|                                                    kernelSize, | ||||
|                                                    nullptr, | ||||
|                                                    nullptr, | ||||
|                                                    nullptr, | ||||
|                                                    nullptr, | ||||
|                                                    nullptr, | ||||
|                                                    &needSize, | ||||
|                                                    nnp_activation_identity, | ||||
|                                                    nullptr, | ||||
|                                                    nullptr, | ||||
|                                                    nullptr); | ||||
|         CHECK_EQ(status, nnp_status_success); | ||||
|       } | ||||
| 
 | ||||
|       VLOG(3) << "workspace size is " << needSize; | ||||
|       if (needSize > workspaceSize_) { | ||||
|         workspaceSize_ = needSize; | ||||
|         if (workspaceBuffer_) { | ||||
|           free(workspaceBuffer_); | ||||
|         } else { | ||||
|           posix_memalign(&workspaceBuffer_, 64, needSize); | ||||
|         } | ||||
|       } | ||||
| 
 | ||||
|       if (needSize) { | ||||
|         bufferPtr = workspaceBuffer_; | ||||
|         sizePtr = &needSize; | ||||
|       } | ||||
|     } | ||||
| 
 | ||||
|     if (batchSize == 1) { | ||||
|       nnp_status status = | ||||
|           nnp_convolution_inference(algorithm_, | ||||
|                                     transform_strategy_, | ||||
|                                     inputChannels, | ||||
|                                     outputChannels, | ||||
|                                     inputSize, | ||||
|                                     padding, | ||||
|                                     kernelSize, | ||||
|                                     outputSubsampling, | ||||
|                                     inputData, | ||||
|                                     filterData, | ||||
|                                     nullptr, /* bias */ | ||||
|                                     outputData, | ||||
|                                     bufferPtr, | ||||
|                                     sizePtr, | ||||
|                                     nnp_activation_identity, | ||||
|                                     nullptr, | ||||
|                                     threadpool_, /* threadpool */ | ||||
|                                     nullptr); | ||||
|       CHECK_EQ(status, nnp_status_success); | ||||
|     } else { | ||||
|       // only supports stride = 1
 | ||||
|       CHECK_EQ(strideH(), 1); | ||||
|       CHECK_EQ(strideW(), 1); | ||||
|       nnp_status status = nnp_convolution_output(algorithm_, | ||||
|                                                  batchSize, | ||||
|                                                  inputChannels, | ||||
|                                                  outputChannels, | ||||
|                                                  inputSize, | ||||
|                                                  padding, | ||||
|                                                  kernelSize, | ||||
|                                                  inputData, | ||||
|                                                  filterData, | ||||
|                                                  nullptr, /* bias */ | ||||
|                                                  outputData, | ||||
|                                                  bufferPtr, | ||||
|                                                  sizePtr, | ||||
|                                                  nnp_activation_identity, | ||||
|                                                  nullptr, | ||||
|                                                  threadpool_, /* threadpool */ | ||||
|                                                  nullptr); | ||||
|       CHECK_EQ(status, nnp_status_success); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
| private: | ||||
|   nnp_convolution_algorithm algorithm_; | ||||
|   nnp_convolution_transform_strategy transform_strategy_; | ||||
|   void* workspaceBuffer_; | ||||
|   size_t workspaceSize_; | ||||
|   pthreadpool_t threadpool_; | ||||
| }; | ||||
| 
 | ||||
| REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction); | ||||
| 
 | ||||
| }  // namespace paddle
 | ||||
| @ -0,0 +1,99 @@ | ||||
| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 | ||||
| 
 | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
| 
 | ||||
|     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
| 
 | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. */ | ||||
| 
 | ||||
| #include <gtest/gtest.h> | ||||
| #include "paddle/function/Function.h" | ||||
| #include "paddle/function/FunctionTest.h" | ||||
| 
 | ||||
| DEFINE_string(algo, | ||||
|               "auto", | ||||
|               "The algorithm (auto, ft8x8, ft16x16, wt8x8, " | ||||
|               "implicit-gemm, or direct) for computing convolution of NNPACK."); | ||||
| 
 | ||||
| namespace paddle { | ||||
| 
 | ||||
| #define IS_NNPACK_SUPPORT(algo, filterSize, stride)        \ | ||||
|   if (algo == "direct" && filterSize != 1) continue;       \ | ||||
|   if (algo == "direct" && batchSize != 1) continue;        \ | ||||
|   if (algo == "wt8x8" && filterSize != 3) continue;        \ | ||||
|   if (algo == "implicit-gemm" && batchSize != 1) continue; \ | ||||
|   if (algo != "auto" && algo != "implicit-gemm" && stride > 1) continue; | ||||
| 
 | ||||
| class ConvolutionTest { | ||||
| public: | ||||
|   ConvolutionTest(const std::string& conv1, | ||||
|                   const std::string& conv2, | ||||
|                   std::string algo = "auto") { | ||||
|     for (size_t batchSize : {1, 32}) { | ||||
|       for (size_t inputSize : {7, 14, 54}) { | ||||
|         for (size_t filterSize : {1, 3, 5}) { | ||||
|           for (size_t inputChannels : {3, 64}) { | ||||
|             for (size_t outputChannels : {3, 64, 128}) { | ||||
|               if (inputChannels < outputChannels) break; | ||||
|               for (size_t stride : {1, 2}) { | ||||
|                 // if batchSize > 1 NNPACKConv only supports stride = 1
 | ||||
|                 if (batchSize > 1 && stride > 1) break; | ||||
|                 for (size_t padding : {0, 1}) { | ||||
|                   if (padding >= filterSize) break; | ||||
|                   size_t outputSize = | ||||
|                       (inputSize - filterSize + 2 * padding + stride) / stride; | ||||
|                   IS_NNPACK_SUPPORT(algo, filterSize, stride); | ||||
|                   LOG(INFO) << " batchSize=" << batchSize | ||||
|                             << " inputChannels=" << inputChannels | ||||
|                             << " inputHeight=" << inputSize | ||||
|                             << " inputWidth=" << inputSize | ||||
|                             << " outputChannels=" << outputChannels | ||||
|                             << " filterHeight=" << filterSize | ||||
|                             << " filterWidth=" << filterSize | ||||
|                             << " outputHeight=" << outputSize | ||||
|                             << " outputWidth=" << outputSize | ||||
|                             << " stride=" << stride << " padding=" << padding; | ||||
| 
 | ||||
|                   std::vector<size_t> paddings = {padding, padding}; | ||||
|                   std::vector<size_t> strides = {stride, stride}; | ||||
|                   Compare2Function<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test( | ||||
|                       conv1, | ||||
|                       conv2, | ||||
|                       FuncConfig() | ||||
|                           .set("paddings", paddings) | ||||
|                           .set("strides", strides) | ||||
|                           .set("groups", (size_t)1) | ||||
|                           .set("algo", algo)); | ||||
| 
 | ||||
|                   TensorShape shape0{ | ||||
|                       batchSize, inputChannels, inputSize, inputSize}; | ||||
|                   TensorShape shape1{ | ||||
|                       outputChannels, inputChannels, filterSize, filterSize}; | ||||
|                   TensorShape shape2{ | ||||
|                       batchSize, outputChannels, outputSize, outputSize}; | ||||
|                   test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape0)); | ||||
|                   test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape1)); | ||||
|                   test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape2)); | ||||
|                   test.run(); | ||||
|                 } | ||||
|               } | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }; | ||||
| 
 | ||||
| TEST(Convolution, NNPACK) { | ||||
|   // NNPACK only supports stride = 1
 | ||||
|   ConvolutionTest test("GemmConv-CPU", "NNPACKConv-CPU", FLAGS_algo); | ||||
| } | ||||
| 
 | ||||
| }  // namespace paddle
 | ||||
| @ -0,0 +1,16 @@ | ||||
| # Find the NNPACK library | ||||
| #  NNPACK_ROOT - where to find NNPACK include and library. | ||||
| # | ||||
| 
 | ||||
| set(NNPACK_FOUND OFF) | ||||
| set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK") | ||||
| find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include) | ||||
| find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib) | ||||
| find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib) | ||||
| 
 | ||||
| if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB) | ||||
|   set(NNPACK_FOUND ON) | ||||
|   INCLUDE_DIRECTORIES(${NNPACK_INC_DIR}) | ||||
| else() | ||||
|   message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})") | ||||
| endif() | ||||
					Loading…
					
					
				
		Reference in new issue