Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into dyg_db

4 years ago · 0fbcb520b1
parent dbd27878cb 9df55aa509
commit 0fbcb520b1
7 changed files with 49 additions and 69 deletions
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@ -119,10 +119,10 @@ class DetResizeForTest(object):
        if 'image_shape' in kwargs:
            self.image_shape = kwargs['image_shape']
            self.resize_type = 1
-        if 'limit_side_len' in kwargs:
+        elif 'limit_side_len' in kwargs:
            self.limit_side_len = kwargs['limit_side_len']
            self.limit_type = kwargs.get('limit_type', 'min')
-        if 'resize_long' in kwargs:
+        elif 'resize_long' in kwargs:
            self.resize_type = 2
            self.resize_long = kwargs.get('resize_long', 960)
        else:
--- a/ppocr/losses/det_sast_loss.py
+++ b/ppocr/losses/det_sast_loss.py
@ -19,7 +19,6 @@ from __future__ import print_function
 import paddle
 from paddle import nn
 from .det_basic_loss import DiceLoss
-import paddle.fluid as fluid
 import numpy as np


@ -27,9 +26,7 @@ class SASTLoss(nn.Layer):
    """
    """

-    def __init__(self,
-                 eps=1e-6,
-                 **kwargs):
+    def __init__(self, eps=1e-6, **kwargs):
        super(SASTLoss, self).__init__()
        self.dice_loss = DiceLoss(eps=eps)

@ -39,7 +36,7 @@ class SASTLoss(nn.Layer):
        tcl_mask: N x 128 x 1
        tcl_label: N x X list or LoDTensor
        """
-                
+
        f_score = predicts['f_score']
        f_border = predicts['f_border']
        f_tvo = predicts['f_tvo']
@ -53,15 +50,17 @@ class SASTLoss(nn.Layer):
        score_loss = 1.0 - 2 * intersection / (union + 1e-5)

        #border loss
-        l_border_split, l_border_norm = paddle.split(l_border, num_or_sections=[4, 1], axis=1)
+        l_border_split, l_border_norm = paddle.split(
+            l_border, num_or_sections=[4, 1], axis=1)
        f_border_split = f_border
        border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1])
-        l_border_norm_split = paddle.expand(x=l_border_norm, shape=border_ex_shape)
-        l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)   
-        l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)  
+        l_border_norm_split = paddle.expand(
+            x=l_border_norm, shape=border_ex_shape)
+        l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)
+        l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)

        border_diff = l_border_split - f_border_split
-        abs_border_diff = paddle.abs(border_diff) 
+        abs_border_diff = paddle.abs(border_diff)
        border_sign = abs_border_diff < 1.0
        border_sign = paddle.cast(border_sign, dtype='float32')
        border_sign.stop_gradient = True
@ -72,15 +71,16 @@ class SASTLoss(nn.Layer):
                    (paddle.sum(l_border_score * l_border_mask) + 1e-5)

        #tvo_loss
-        l_tvo_split, l_tvo_norm = paddle.split(l_tvo, num_or_sections=[8, 1], axis=1)
+        l_tvo_split, l_tvo_norm = paddle.split(
+            l_tvo, num_or_sections=[8, 1], axis=1)
        f_tvo_split = f_tvo
        tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1])
        l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape)
-        l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape)   
-        l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape)   
+        l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape)
+        l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape)
        #
        tvo_geo_diff = l_tvo_split - f_tvo_split
-        abs_tvo_geo_diff = paddle.abs(tvo_geo_diff) 
+        abs_tvo_geo_diff = paddle.abs(tvo_geo_diff)
        tvo_sign = abs_tvo_geo_diff < 1.0
        tvo_sign = paddle.cast(tvo_sign, dtype='float32')
        tvo_sign.stop_gradient = True
@ -91,15 +91,16 @@ class SASTLoss(nn.Layer):
                    (paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5)

        #tco_loss
-        l_tco_split, l_tco_norm = paddle.split(l_tco, num_or_sections=[2, 1], axis=1)
+        l_tco_split, l_tco_norm = paddle.split(
+            l_tco, num_or_sections=[2, 1], axis=1)
        f_tco_split = f_tco
        tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1])
        l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape)
-        l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape)   
-        l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape) 
-        
+        l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape)
+        l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape)
+
        tco_geo_diff = l_tco_split - f_tco_split
-        abs_tco_geo_diff = paddle.abs(tco_geo_diff) 
+        abs_tco_geo_diff = paddle.abs(tco_geo_diff)
        tco_sign = abs_tco_geo_diff < 1.0
        tco_sign = paddle.cast(tco_sign, dtype='float32')
        tco_sign.stop_gradient = True
@ -109,13 +110,12 @@ class SASTLoss(nn.Layer):
        tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \
                    (paddle.sum(l_tco_score * l_tco_mask) + 1e-5)

-
        # total loss
        tvo_lw, tco_lw = 1.5, 1.5
        score_lw, border_lw = 1.0, 1.0
        total_loss = score_loss * score_lw + border_loss * border_lw + \
                    tvo_loss * tvo_lw + tco_loss * tco_lw
-                    
+
        losses = {'loss':total_loss, "score_loss":score_loss,\
            "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
-        return losses
+        return losses
--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
@ -24,7 +24,6 @@ import numpy as np
 import math
 import time
 import traceback
-import paddle.fluid as fluid

 import tools.infer.utility as utility
 from ppocr.postprocess import build_post_process
@ -39,7 +38,6 @@ class TextClassifier(object):
        self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
        self.cls_batch_num = args.cls_batch_num
        self.cls_thresh = args.cls_thresh
-        self.use_zero_copy_run = args.use_zero_copy_run
        postprocess_params = {
            'name': 'ClsPostProcess',
            "label_list": args.label_list,
@ -99,12 +97,8 @@ class TextClassifier(object):
            norm_img_batch = norm_img_batch.copy()
            starttime = time.time()

-            if self.use_zero_copy_run:
-                self.input_tensor.copy_from_cpu(norm_img_batch)
-                self.predictor.zero_copy_run()
-            else:
-                norm_img_batch = fluid.core.PaddleTensor(norm_img_batch)
-                self.predictor.run([norm_img_batch])
+            self.input_tensor.copy_from_cpu(norm_img_batch)
+            self.predictor.run()
            prob_out = self.output_tensors[0].copy_to_cpu()
            cls_result = self.postprocess_op(prob_out)
            elapse += time.time() - starttime
@ -143,10 +137,11 @@ def main(args):
            "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
        exit()
    for ino in range(len(img_list)):
-        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], cls_res[
-            ino]))
+        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
+                                               cls_res[ino]))
    logger.info("Total predict time for {} images, cost: {:.3f}".format(
        len(img_list), predict_time))

+
 if __name__ == "__main__":
    main(utility.parse_args())
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@ -22,7 +22,6 @@ import cv2
 import numpy as np
 import time
 import sys
-import paddle

 import tools.infer.utility as utility
 from ppocr.utils.logging import get_logger
@ -37,7 +36,6 @@ class TextDetector(object):
    def __init__(self, args):
        self.args = args
        self.det_algorithm = args.det_algorithm
-        self.use_zero_copy_run = args.use_zero_copy_run
        pre_process_list = [{
            'DetResizeForTest': {
                'limit_side_len': args.det_limit_side_len,
@ -72,7 +70,9 @@ class TextDetector(object):
            postprocess_params["nms_thresh"] = args.det_east_nms_thresh
        elif self.det_algorithm == "SAST":
            pre_process_list[0] = {
-                'DetResizeForTest': {'resize_long': args.det_limit_side_len}
+                'DetResizeForTest': {
+                    'resize_long': args.det_limit_side_len
+                }
            }
            postprocess_params['name'] = 'SASTPostProcess'
            postprocess_params["score_thresh"] = args.det_sast_score_thresh
@ -161,12 +161,8 @@ class TextDetector(object):
        img = img.copy()
        starttime = time.time()

-        if self.use_zero_copy_run:
-            self.input_tensor.copy_from_cpu(img)
-            self.predictor.zero_copy_run()
-        else:
-            im = paddle.fluid.core.PaddleTensor(img)
-            self.predictor.run([im])
+        self.input_tensor.copy_from_cpu(img)
+        self.predictor.run()
        outputs = []
        for output_tensor in self.output_tensors:
            output = output_tensor.copy_to_cpu()
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@ -23,7 +23,6 @@ import numpy as np
 import math
 import time
 import traceback
-import paddle.fluid as fluid

 import tools.infer.utility as utility
 from ppocr.postprocess import build_post_process
@ -39,7 +38,6 @@ class TextRecognizer(object):
        self.character_type = args.rec_char_type
        self.rec_batch_num = args.rec_batch_num
        self.rec_algorithm = args.rec_algorithm
-        self.use_zero_copy_run = args.use_zero_copy_run
        postprocess_params = {
            'name': 'CTCLabelDecode',
            "character_type": args.rec_char_type,
@ -101,12 +99,8 @@ class TextRecognizer(object):
            norm_img_batch = np.concatenate(norm_img_batch)
            norm_img_batch = norm_img_batch.copy()
            starttime = time.time()
-            if self.use_zero_copy_run:
-                self.input_tensor.copy_from_cpu(norm_img_batch)
-                self.predictor.zero_copy_run()
-            else:
-                norm_img_batch = fluid.core.PaddleTensor(norm_img_batch)
-                self.predictor.run([norm_img_batch])
+            self.input_tensor.copy_from_cpu(norm_img_batch)
+            self.predictor.run()
            outputs = []
            for output_tensor in self.output_tensors:
                output = output_tensor.copy_to_cpu()
@ -145,8 +139,8 @@ def main(args):
            "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
        exit()
    for ino in range(len(img_list)):
-        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], rec_res[
-            ino]))
+        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
+                                               rec_res[ino]))
    logger.info("Total predict time for {} images, cost: {:.3f}".format(
        len(img_list), predict_time))

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@ -20,8 +20,7 @@ import numpy as np
 import json
 from PIL import Image, ImageDraw, ImageFont
 import math
-from paddle.fluid.core import AnalysisConfig
-from paddle.fluid.core import create_paddle_predictor
+from paddle import inference


 def parse_args():
@ -83,8 +82,6 @@ def parse_args():
    parser.add_argument("--cls_thresh", type=float, default=0.9)

    parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
-    parser.add_argument("--use_zero_copy_run", type=str2bool, default=False)
-
    parser.add_argument("--use_pdserving", type=str2bool, default=False)

    return parser.parse_args()
@ -110,14 +107,14 @@ def create_predictor(args, mode, logger):
        logger.info("not find params file path {}".format(params_file_path))
        sys.exit(0)

-    config = AnalysisConfig(model_file_path, params_file_path)
+    config = inference.Config(model_file_path, params_file_path)

    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
        if args.use_tensorrt:
            config.enable_tensorrt_engine(
-                precision_mode=AnalysisConfig.Precision.Half
-                if args.use_fp16 else AnalysisConfig.Precision.Float32,
+                precision_mode=inference.PrecisionType.Half
+                if args.use_fp16 else inference.PrecisionType.Float32,
                max_batch_size=args.max_batch_size)
    else:
        config.disable_gpu()
@ -130,20 +127,18 @@ def create_predictor(args, mode, logger):
    # config.enable_memory_optim()
    config.disable_glog_info()

-    if args.use_zero_copy_run:
-        config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
-        config.switch_use_feed_fetch_ops(False)
-    else:
-        config.switch_use_feed_fetch_ops(True)
+    config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
+    config.switch_use_feed_fetch_ops(False)

-    predictor = create_paddle_predictor(config)
+    # create predictor
+    predictor = inference.create_predictor(config)
    input_names = predictor.get_input_names()
    for name in input_names:
-        input_tensor = predictor.get_input_tensor(name)
+        input_tensor = predictor.get_input_handle(name)
    output_names = predictor.get_output_names()
    output_tensors = []
    for output_name in output_names:
-        output_tensor = predictor.get_output_tensor(output_name)
+        output_tensor = predictor.get_output_handle(output_name)
        output_tensors.append(output_tensor)
    return predictor, input_tensor, output_tensors

--- a/tools/program.py
+++ b/tools/program.py
@ -131,7 +131,7 @@ def check_gpu(use_gpu):
          "model on CPU"

    try:
-        if use_gpu and not paddle.fluid.is_compiled_with_cuda():
+        if use_gpu and not paddle.is_compiled_with_cuda():
            print(err)
            sys.exit(1)
    except Exception as e: