Remove all the code, API and doc of MKL-DNN INT8v1 (#18347)

nan-debug-tool
翟飞跃 6 years ago committed by Tao Luo
parent 8ed33bf91f
commit 19da59ed3f

@ -403,9 +403,6 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd'))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884'))
paddle.fluid.contrib.Calibrator.__init__ (ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.Calibrator.sample_data (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3b8c85ca1e2cf753cc8c90a6c6992958'))
paddle.fluid.contrib.Calibrator.save_int8_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.distributed_batch_reader (ArgSpec(args=['batch_reader'], varargs=None, keywords=None, defaults=None), ('document', 'b60796eb0a481484dd34e345f0eaa4d5'))
paddle.fluid.contrib.reader.ctr_reader.ctr_reader (ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b2ebf3de2a6ef1af2c3b88d2db7591ab'))
paddle.fluid.contrib.Compressor.__init__ (ArgSpec(args=['self', 'place', 'scope', 'train_program', 'train_reader', 'train_feed_list', 'train_fetch_list', 'eval_program', 'eval_reader', 'eval_feed_list', 'eval_fetch_list', 'teacher_programs', 'checkpoint_path', 'train_optimizer', 'distiller_optimizer', 'search_space'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, [], None, None, None, None)), ('document', 'c195b3bba26169cff9439e8c467557c0'))

@ -22,8 +22,6 @@ from . import op_frequence
from .op_frequence import *
from . import quantize
from .quantize import *
from . import int8_inference
from .int8_inference import *
from . import reader
from .reader import *
from . import slim
@ -44,7 +42,6 @@ __all__ += decoder.__all__
__all__ += memory_usage_calc.__all__
__all__ += op_frequence.__all__
__all__ += quantize.__all__
__all__ += int8_inference.__all__
__all__ += reader.__all__
__all__ += slim.__all__
__all__ += utils.__all__

@ -1,86 +0,0 @@
# Offline INT8 Calibration Tool
PaddlePaddle supports offline INT8 calibration to accelerate the inference speed. In this document, we provide the instructions on how to enable INT8 calibration and show the ResNet-50 and MobileNet-V1 results in accuracy.
## 0. Prerequisite
You need to install at least PaddlePaddle-1.3 python package `pip install paddlepaddle==1.3`.
## 1. How to generate INT8 model
You can refer to the unit test in [test_calibration_resnet50.py](../tests/test_calibration_resnet50.py). Basically, there are three steps:
* Construct calibration object.
```python
calibrator = int8_utility.Calibrator( # Step 1
program=infer_program, # required, FP32 program
pretrained_model=model_path, # required, FP32 pretrained model
algo=algo, # required, calibration algorithm; default is max, the alternative is KL (KullbackLeibler divergence)
exe=exe, # required, executor
output=int8_model, # required, INT8 model
feed_var_names=feed_dict, # required, feed dict
fetch_list=fetch_targets) # required, fetch targets
```
* Call the calibrator.sample_data() after executor run.
```python
_, acc1, _ = exe.run(
program,
feed={feed_dict[0]: image,
feed_dict[1]: label},
fetch_list=fetch_targets)
calibrator.sample_data() # Step 2
```
* Call the calibrator.save_int8_model() after sampling over specified iterations (e.g., iterations = 50)
```python
calibrator.save_int8_model() # Step 3
```
## 2. How to run INT8 model
You can load INT8 model by load_inference_model [API](https://github.com/PaddlePaddle/Paddle/blob/8b50ad80ff6934512d3959947ac1e71ea3fb9ea3/python/paddle/fluid/io.py#L991) and run INT8 inference similar as [FP32](https://github.com/PaddlePaddle/models/blob/develop/fluid/PaddleCV/object_detection/eval.py "FP32").
```python
[infer_program, feed_dict,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
```
## 3. Result
We provide the results of accuracy and performance measured on Intel(R) Xeon(R) Gold 6271 (single core).
**I. Top-1 Accuracy on Intel(R) Xeon(R) Gold 6271**
| Model | Dataset | FP32 Accuracy | INT8 Accuracy | Accuracy Diff |
| :------------: | :------------: | :------------: | :------------: | :------------: |
| ResNet-50 | Full ImageNet Val | 76.63% | 76.23% | 0.40% |
| MobileNet-V1 | Full ImageNet Val | 70.78% | 70.47% | 0.31% |
**II. Throughput on Intel(R) Xeon(R) Gold 6271 (batch size 1 on single core)**
| Model | Dataset | FP32 Throughput | INT8 Throughput | Ratio(INT8/FP32) |
| :------------: | :------------: | :------------: | :------------: | :------------: |
| ResNet-50 | Full ImageNet Val | 11.54 images/s | 32.2 images/s | 2.79 |
| MobileNet-V1 | Full ImageNet Val | 49.21 images/s | 108.37 images/s | 2.2 |
Please note that [full ImageNet validation dataset](http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar "full ImageNet validation dataset") can be downloaded by script `test_calibration.py` with `DATASET=full`.
Notes:
* The accuracy measurement requires the model with `label`.
* The INT8 theoretical speedup is 4X on Intel® Xeon® Cascadelake Server (please refer to `The theoretical peak compute gains are 4x int8 OPS over fp32 OPS.` in [Reference](https://software.intel.com/en-us/articles/lower-numerical-precision-deep-learning-inference-and-training "Reference")). Therefore, op-level gain is 4X and topology-level is smaller.
## 4. How to reproduce the results
* Small dataset for ResNet-50 (Single core)
```bash
FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
```
>Note: Change `test_calibration_resnet50.py` to `test_calibration_mobilenetv1.py` for MobileNet-V1. Same for the following commands.
* Full dataset for ResNet-50 (Single core)
```bash
FLAGS_use_mkldnn=true DATASET=full python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
```
* Full dataset for ResNet-50 (Multi-core)
```bash
FLAGS_use_mkldnn=true OMP_NUM_THREADS=20 DATASET=full python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
```
> Notes: This is an example command with 20 cores by using set `OMP_NUM_THREADS` value.

@ -1,20 +0,0 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from . import utility
from .utility import *
__all__ = utility.__all__

File diff suppressed because it is too large Load Diff

@ -1,15 +1,6 @@
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
if(APPLE OR WIN32 OR NOT WITH_MKL)
list(REMOVE_ITEM TEST_OPS test_calibration_resnet50)
list(REMOVE_ITEM TEST_OPS test_calibration_mobilenetv1)
endif()
foreach(src ${TEST_OPS})
if(src MATCHES "test_calibration_*")
py_test(${src} SRCS ${src}.py ENVS FLAGS_use_mkldnn=true FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI})
else()
py_test(${src} SRCS ${src}.py)
endif()
endforeach()

@ -1,59 +0,0 @@
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import sys
from test_calibration_resnet50 import TestCalibration
class TestCalibrationForMobilenetv1(TestCalibration):
def download_model(self):
# mobilenetv1 fp32 data
data_urls = [
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
]
data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
self.model_cache_folder = self.download_data(data_urls, data_md5s,
"mobilenetv1_fp32")
self.model = "MobileNet-V1"
self.algo = "KL"
def test_calibration(self):
self.download_model()
print("Start FP32 inference for {0} on {1} images ...".format(
self.model, self.infer_iterations * self.batch_size))
(fp32_throughput, fp32_latency,
fp32_acc1) = self.run_program(self.model_cache_folder + "/model")
print("Start INT8 calibration for {0} on {1} images ...".format(
self.model, self.sample_iterations * self.batch_size))
self.run_program(
self.model_cache_folder + "/model", True, algo=self.algo)
print("Start INT8 inference for {0} on {1} images ...".format(
self.model, self.infer_iterations * self.batch_size))
(int8_throughput, int8_latency,
int8_acc1) = self.run_program(self.int8_model)
delta_value = fp32_acc1 - int8_acc1
self.assertLess(delta_value, 0.01)
print(
"FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}".
format(self.model, self.batch_size, fp32_throughput, fp32_latency,
fp32_acc1))
print(
"INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}".
format(self.model, self.batch_size, int8_throughput, int8_latency,
int8_acc1))
sys.stdout.flush()
if __name__ == '__main__':
unittest.main()

@ -110,7 +110,6 @@ packages=['paddle',
'paddle.fluid.contrib',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.quantize',
'paddle.fluid.contrib.int8_inference',
'paddle.fluid.contrib.reader',
'paddle.fluid.contrib.slim',
'paddle.fluid.contrib.slim.core',

Loading…
Cancel
Save