Remove all the code, API and doc of MKL-DNN INT8v1 (#18347)

6 years ago · 19da59ed3f
parent 8ed33bf91f
commit 19da59ed3f
9 changed files with 0 additions and 1243 deletions
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -403,9 +403,6 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_
 paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd'))
 paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884'))
-paddle.fluid.contrib.Calibrator.__init__ (ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.contrib.Calibrator.sample_data (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3b8c85ca1e2cf753cc8c90a6c6992958'))
-paddle.fluid.contrib.Calibrator.save_int8_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.contrib.distributed_batch_reader (ArgSpec(args=['batch_reader'], varargs=None, keywords=None, defaults=None), ('document', 'b60796eb0a481484dd34e345f0eaa4d5'))
 paddle.fluid.contrib.reader.ctr_reader.ctr_reader (ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b2ebf3de2a6ef1af2c3b88d2db7591ab'))
 paddle.fluid.contrib.Compressor.__init__ (ArgSpec(args=['self', 'place', 'scope', 'train_program', 'train_reader', 'train_feed_list', 'train_fetch_list', 'eval_program', 'eval_reader', 'eval_feed_list', 'eval_fetch_list', 'teacher_programs', 'checkpoint_path', 'train_optimizer', 'distiller_optimizer', 'search_space'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, [], None, None, None, None)), ('document', 'c195b3bba26169cff9439e8c467557c0'))
--- a/python/paddle/fluid/contrib/init.py
+++ b/python/paddle/fluid/contrib/init.py
@ -22,8 +22,6 @@ from . import op_frequence
 from .op_frequence import *
 from . import quantize
 from .quantize import *
-from . import int8_inference
-from .int8_inference import *
 from . import reader
 from .reader import *
 from . import slim
@ -44,7 +42,6 @@ __all__ += decoder.__all__
 __all__ += memory_usage_calc.__all__
 __all__ += op_frequence.__all__
 __all__ += quantize.__all__
-__all__ += int8_inference.__all__
 __all__ += reader.__all__
 __all__ += slim.__all__
 __all__ += utils.__all__
--- a/python/paddle/fluid/contrib/int8_inference/README.md
+++ b/python/paddle/fluid/contrib/int8_inference/README.md
@ -1,86 +0,0 @@
-# Offline INT8 Calibration Tool
-
-PaddlePaddle supports offline INT8 calibration to accelerate the inference speed. In this document, we provide the instructions on how to enable INT8 calibration and show the ResNet-50 and MobileNet-V1 results in accuracy.
-
-## 0. Prerequisite
-You need to install at least PaddlePaddle-1.3 python package `pip install paddlepaddle==1.3`.
-
-## 1. How to generate INT8 model
-You can refer to the unit test in [test_calibration_resnet50.py](../tests/test_calibration_resnet50.py). Basically, there are three steps:
-* Construct calibration object.
-
-```python
-calibrator = int8_utility.Calibrator( # Step 1
-    program=infer_program, # required, FP32 program
-    pretrained_model=model_path, # required, FP32 pretrained model
-    algo=algo, # required, calibration algorithm; default is max, the alternative is KL (Kullback–Leibler divergence)
-    exe=exe, # required, executor
-    output=int8_model, # required, INT8 model
-    feed_var_names=feed_dict, # required, feed dict
-    fetch_list=fetch_targets) # required, fetch targets
-```
-
-* Call the calibrator.sample_data() after executor run.
-```python
-_, acc1, _ = exe.run(
-    program,
-    feed={feed_dict[0]: image,
-          feed_dict[1]: label},
-    fetch_list=fetch_targets)
-
-calibrator.sample_data() # Step 2
-```
-
-* Call the calibrator.save_int8_model() after sampling over specified iterations (e.g., iterations = 50)
-```python
-calibrator.save_int8_model() # Step 3
-```
-
-## 2. How to run INT8 model
-You can load INT8 model by load_inference_model [API](https://github.com/PaddlePaddle/Paddle/blob/8b50ad80ff6934512d3959947ac1e71ea3fb9ea3/python/paddle/fluid/io.py#L991) and run INT8 inference similar as [FP32](https://github.com/PaddlePaddle/models/blob/develop/fluid/PaddleCV/object_detection/eval.py "FP32").
-
-```python
-[infer_program, feed_dict,
-    fetch_targets] = fluid.io.load_inference_model(model_path, exe)
-```
-
-## 3. Result
-We provide the results of accuracy and performance measured on Intel(R) Xeon(R) Gold 6271 (single core).
-
-**I. Top-1 Accuracy on Intel(R) Xeon(R) Gold 6271**
-
-| Model  | Dataset  | FP32 Accuracy  | INT8 Accuracy  | Accuracy Diff  |
-| :------------: | :------------: | :------------: | :------------: | :------------: |
-| ResNet-50  | Full ImageNet Val  |  76.63%  | 76.23%  | 0.40% |
-| MobileNet-V1 | Full ImageNet Val  | 70.78%  | 70.47%  | 0.31%  |
-
-**II. Throughput on Intel(R) Xeon(R) Gold 6271 (batch size 1 on single core)**
-
-| Model  | Dataset  | FP32 Throughput  | INT8 Throughput  |  Ratio(INT8/FP32)  |
-| :------------: | :------------: | :------------: | :------------: | :------------: |
-| ResNet-50  | Full ImageNet Val  |  11.54 images/s | 32.2 images/s | 2.79 |
-| MobileNet-V1 | Full ImageNet Val  | 49.21 images/s | 108.37 images/s | 2.2  |
-
-Please note that [full ImageNet validation dataset](http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar "full ImageNet validation dataset") can be downloaded by script `test_calibration.py` with `DATASET=full`. 
-
-Notes:
-* The accuracy measurement requires the model with `label`.
-* The INT8 theoretical speedup is 4X on Intel® Xeon® Cascadelake Server (please refer to `The theoretical peak compute gains are 4x int8 OPS over fp32 OPS.` in  [Reference](https://software.intel.com/en-us/articles/lower-numerical-precision-deep-learning-inference-and-training "Reference")). Therefore, op-level gain is 4X and topology-level is smaller.
-
-## 4. How to reproduce the results
-* Small dataset for ResNet-50 (Single core)
-```bash
-FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
-```
->Note: Change `test_calibration_resnet50.py` to `test_calibration_mobilenetv1.py` for MobileNet-V1. Same for the following commands.
-
-* Full dataset for ResNet-50 (Single core)
-```bash
-FLAGS_use_mkldnn=true DATASET=full python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
-```
-
-* Full dataset for ResNet-50 (Multi-core)
-```bash
-FLAGS_use_mkldnn=true OMP_NUM_THREADS=20 DATASET=full python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
-```
-> Notes: This is an example command with 20 cores by using set `OMP_NUM_THREADS` value.
--- a/python/paddle/fluid/contrib/int8_inference/init.py
+++ b/python/paddle/fluid/contrib/int8_inference/init.py
@ -1,20 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-from . import utility
-from .utility import *
-
-__all__ = utility.__all__
--- a/python/paddle/fluid/contrib/int8_inference/utility.py
+++ b/python/paddle/fluid/contrib/int8_inference/utility.py
--- a/python/paddle/fluid/contrib/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/tests/CMakeLists.txt
@ -1,15 +1,6 @@
 file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
 string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")

-if(APPLE OR WIN32 OR NOT WITH_MKL)
-    list(REMOVE_ITEM TEST_OPS test_calibration_resnet50)
-    list(REMOVE_ITEM TEST_OPS test_calibration_mobilenetv1)
-endif()
-
 foreach(src ${TEST_OPS})
-    if(src MATCHES "test_calibration_*")
-        py_test(${src} SRCS ${src}.py ENVS FLAGS_use_mkldnn=true FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI})
-    else()
        py_test(${src} SRCS ${src}.py)
-    endif()
 endforeach()
--- a/python/paddle/fluid/contrib/tests/test_calibration_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/tests/test_calibration_mobilenetv1.py
@ -1,59 +0,0 @@
-#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-import unittest
-import sys
-from test_calibration_resnet50 import TestCalibration
-
-
-class TestCalibrationForMobilenetv1(TestCalibration):
-    def download_model(self):
-        # mobilenetv1 fp32 data
-        data_urls = [
-            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
-        ]
-        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
-        self.model_cache_folder = self.download_data(data_urls, data_md5s,
-                                                     "mobilenetv1_fp32")
-        self.model = "MobileNet-V1"
-        self.algo = "KL"
-
-    def test_calibration(self):
-        self.download_model()
-        print("Start FP32 inference for {0} on {1} images ...".format(
-            self.model, self.infer_iterations * self.batch_size))
-        (fp32_throughput, fp32_latency,
-         fp32_acc1) = self.run_program(self.model_cache_folder + "/model")
-        print("Start INT8 calibration for {0} on {1} images ...".format(
-            self.model, self.sample_iterations * self.batch_size))
-        self.run_program(
-            self.model_cache_folder + "/model", True, algo=self.algo)
-        print("Start INT8 inference for {0} on {1} images ...".format(
-            self.model, self.infer_iterations * self.batch_size))
-        (int8_throughput, int8_latency,
-         int8_acc1) = self.run_program(self.int8_model)
-        delta_value = fp32_acc1 - int8_acc1
-        self.assertLess(delta_value, 0.01)
-        print(
-            "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}".
-            format(self.model, self.batch_size, fp32_throughput, fp32_latency,
-                   fp32_acc1))
-        print(
-            "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}".
-            format(self.model, self.batch_size, int8_throughput, int8_latency,
-                   int8_acc1))
-        sys.stdout.flush()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
+++ b/python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
--- a/python/setup.py.in
+++ b/python/setup.py.in
@ -110,7 +110,6 @@ packages=['paddle',
          'paddle.fluid.contrib',
          'paddle.fluid.contrib.decoder',
          'paddle.fluid.contrib.quantize',
-          'paddle.fluid.contrib.int8_inference',
          'paddle.fluid.contrib.reader',
          'paddle.fluid.contrib.slim',
          'paddle.fluid.contrib.slim.core',