Add Quantize OP

test=develop
6 years ago · 019dbf7f5f
parent e9eee0de6a
commit 019dbf7f5f
4 changed files with 269 additions and 0 deletions
--- a/paddle/fluid/operators/quantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/quantize_mkldnn_op.cc
@ -0,0 +1,100 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "mkldnn.hpp"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/quantize_op.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
 namespace paddle {
 namespace operators {
 using mkldnn::memory;
 using mkldnn::primitive;
 using mkldnn::reorder;
 using platform::to_void_cast;
 using Tensor = framework::Tensor;
 using framework::DataLayout;
 using mkldnn::stream;
 using platform::GetMKLDNNFormat;
 template <typename T>
 class QuantOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* input = ctx.Input<Tensor>("Input");
    auto scale_data = ctx.Attr<float>("Scale");
    auto* output = ctx.Output<Tensor>("Output");
    auto& dev_ctx =
        ctx.template device_context<platform::MKLDNNDeviceContext>();
    const auto& engine = dev_ctx.GetEngine();
    std::vector<primitive> pipeline;
    std::vector<int> src_tz = paddle::framework::vectorize2int(input->dims());
    std::vector<int> dst_tz = paddle::framework::vectorize2int(output->dims());
    const T* input_data = input->data<T>();
    mkldnn::primitive_attr attri;
    int mask = 0;
    attri.set_output_scales(mask, {scale_data});
    auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32,
                                          input->format());
    auto src_pd = mkldnn::memory::primitive_desc(src_md, engine);
    auto src_memory =
        std::make_shared<mkldnn::memory>(src_pd, to_void_cast<T>(input_data));
    std::shared_ptr<primitive::at> src_memory_p =
        std::shared_ptr<primitive::at>(new primitive::at(*src_memory));
    bool is_negative = ctx.Attr<bool>("is_negative_input");
    mkldnn::memory::primitive_desc dst_pd;
    std::shared_ptr<mkldnn::memory> dst_memory;
    if (is_negative) {
      int8_t* output_data = output->mutable_data<int8_t>(ctx.GetPlace());
      auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::s8,
                                            memory::format::nhwc);
      dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
      dst_memory.reset(
          new mkldnn::memory(dst_pd, to_void_cast<int8_t>(output_data)));
    } else {
      uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
      auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::u8,
                                            memory::format::nhwc);
      dst_pd = mkldnn::memory::primitive_desc(dst_md, engine);
      dst_memory.reset(
          new mkldnn::memory(dst_pd, to_void_cast<uint8_t>(output_data)));
    }
    auto reorder_pd = std::shared_ptr<reorder::primitive_desc>(
        new reorder::primitive_desc(src_pd, dst_pd, attri));
    auto reorder_p = std::shared_ptr<reorder>(
        new reorder(*reorder_pd, *src_memory_p, *dst_memory));
    pipeline.push_back(*reorder_p);
    stream(stream::kind::eager).submit(pipeline).wait();
    output->set_layout(DataLayout::kMKLDNN);
    output->set_format(GetMKLDNNFormat(*dst_memory));
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 // TODO(Xiaoli) Support FP32->S8 quantization.
 REGISTER_OP_KERNEL(quantize, MKLDNN, ::paddle::platform::CPUPlace,
                   ops::QuantOpKernel<float>);
--- a/paddle/fluid/operators/quantize_op.cc
+++ b/paddle/fluid/operators/quantize_op.cc
@ -0,0 +1,47 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *     Unless required by applicable law or agreed to in writing, software
 *     distributed under the License is distributed on an "AS IS" BASIS,
 *     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *     See the License for the specific language governing permissions and
 *     limitations under the License. */
 #include "paddle/fluid/operators/quantize_op.h"
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
 namespace paddle {
 namespace operators {
 framework::OpKernelType QuantOp::GetExpectedKernelType(
    const framework::ExecutionContext& ctx) const {
  framework::LibraryType library_ = framework::LibraryType::kMKLDNN;
  framework::DataLayout layout_ = framework::DataLayout::kMKLDNN;
  return framework::OpKernelType(ctx.Input<Tensor>("Input")->type(),
                                 ctx.GetPlace(), layout_, library_);
 }
 void QuantOpMaker::Make() {
  AddInput("Input", "input data");
  AddOutput("Output", "output data");
  AddAttr<bool>("is_negative_input",
                "(bool, default false) Only used in mkldnn INT8 kernel")
      .SetDefault(false);
  AddAttr<float>("Scale", "scale data").SetDefault({1.0f});
  AddComment(R"DOC(This op will quantize data from FP32 to INT8)DOC");
 }
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OPERATOR(quantize, ops::QuantOp, ops::QuantOpMaker,
                  paddle::framework::DefaultGradOpDescMaker<true>);
--- a/paddle/fluid/operators/quantize_op.h
+++ b/paddle/fluid/operators/quantize_op.h
@ -0,0 +1,46 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 namespace paddle {
 namespace operators {
 using framework::OpKernelType;
 using framework::Tensor;
 class QuantOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext* ctx) const override {
    ctx->SetOutputDim("Output", ctx->GetInputDim("Input"));
    ctx->ShareLoD("Input", /*->*/ "Output");
  }
 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override;
 };
 class QuantOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override;
 };
 }  // namespace operators
 }  // namespace paddle
--- a/python/paddle/fluid/tests/unittests/test_quantize_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_quantize_mkldnn_op.py
@ -0,0 +1,76 @@
 #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import print_function
 import unittest
 import numpy as np
 from op_test import OpTest
 class TestQuantizeOp(OpTest):
    def setUp(self):
        self.op_type = 'quantize'
        self.scale = 2.0
        self.input_size = [1, 1, 5, 5]  #Naive nChw16c
        self.is_negative = False
        self.set_scale()
        self.set_is_negative()
        if self.is_negative:
            input = (100 * np.random.random_sample(self.input_size) - 50
                     ).astype('float32')
            output = np.round(input * self.scale).astype('int8')
        else:
            input = (100 *
                     np.random.random_sample(self.input_size)).astype('float32')
            output = np.round(input * self.scale).astype('uint8')
        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)}
        self.outputs = {'Output': output}
        self.attrs = {
            'Scale': self.scale,
            'is_negative_input': self.is_negative
        }
    def test_check_output(self):
        self.check_output()
    def set_scale(self):
        pass
    def set_is_negative(self):
        pass
 class TestQuantizeOp1(TestQuantizeOp):
    def set_scale(self):
        self.scale = 1.5
    def set_is_negative(self):
        self.is_nagative = True
 class TestQuantizeOp2(TestQuantizeOp):
    def set_scale(self):
        self.scale = 0.1
    def set_is_negative(self):
        self.is_nagative = False
 if __name__ == '__main__':
    unittest.main()