Fix en doc of warpctc, affine_grid, generate_proposals (#20262)

6 years ago · 54df1ad4c7
parent 89c4b3ddcf
commit 54df1ad4c7
3 changed files with 163 additions and 180 deletions
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -177,7 +177,7 @@ paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized',
 paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', 'c1df110ea65998984f564c5c10abc54a'))
 paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', '3720b4a386585094435993deb028b592'))
 paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e50940f3ce5a08cc477b72f517491bf3'))
-paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'input_length', 'label_length'], varargs=None, keywords=None, defaults=(0, False, None, None)), ('document', 'a5be881ada816e47ea7a6ee4396da357'))
+paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'input_length', 'label_length'], varargs=None, keywords=None, defaults=(0, False, None, None)), ('document', '79aaea078ddea57a82ed7906d71dedc7'))
 paddle.fluid.layers.sequence_reshape (ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None), ('document', 'eeb1591cfc854c6ffdac77b376313c44'))
 paddle.fluid.layers.transpose (ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '8e72db173d4c082e27cb11f31d8c9bfa'))
 paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)), ('document', 'fe352915a543cec434f74e9b32ac49da'))
@ -277,7 +277,7 @@ paddle.fluid.layers.mul (ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_di
 paddle.fluid.layers.sigmoid_cross_entropy_with_logits (ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False)), ('document', '7637c974f2d749d359acae9062c4d96f'))
 paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '169882eb87fb693198e0153629134c22'))
 paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '26decdea9376b6b9a0d3432d82ca207b'))
-paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'f85b263b7b6698d000977529a28f202b'))
+paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '315b50c1cbd9569375b098c56f1e91c9'))
 paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5b32ed21ab89140a8e758002923a0da3'))
 paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', '9f303c67538e468a36c5904a0a3aa110'))
 paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '18ec2e3afeb90e70c8b73d2b71c40fdb'))
@ -416,13 +416,13 @@ paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type',
 paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'e9685f32d21bec8c013626c0254502c5'))
 paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta', 'return_index'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0, False)), ('document', '5485bcaceb0cde2695565a2ffd5bbd40'))
 paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '14d1eeae0f41b6792be43c1c0be0589b'))
-paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', '651d98d51879dfa1bc1cd40391786a41'))
+paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', 'd46629656b4ce9b07809e32c0482cbef'))
 paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', 'fa1d1c9d5e0111684c0db705f86a2595'))
 paddle.fluid.layers.sigmoid_focal_loss (ArgSpec(args=['x', 'label', 'fg_num', 'gamma', 'alpha'], varargs=None, keywords=None, defaults=(2, 0.25)), ('document', 'aeac6aae100173b3fc7f102cf3023a3d'))
 paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', 'd25e5e90f9a342764f32b5cd48657148'))
 paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', 'a82016342789ba9d85737e405f824ff1'))
-paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', '69def376b42ef0681d0cc7f53a2dac4b'))
-paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', 'b7d707822b6af2a586bce608040235b1'))
+paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', 'f2342042127b536a0a16390f149f1bba'))
+paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', '5cba014b41610431f8949e2d7336f1cc'))
 paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'b319b10ddaf17fb4ddf03518685a17ef'))
 paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '72fca4a39ccf82d5c746ae62d1868a99'))
 paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '1d5144c3856673d05c29c752c7c8f821'))
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@ -254,70 +254,66 @@ def rpn_target_assign(bbox_pred,
        bbox_pred(Variable): A 3-D Tensor with shape [N, M, 4] represents the
            predicted locations of M bounding bboxes. N is the batch size,
            and each bounding box has four coordinate values and the layout
-            is [xmin, ymin, xmax, ymax].
+            is [xmin, ymin, xmax, ymax]. The data type can be float32 or float64.
        cls_logits(Variable): A 3-D Tensor with shape [N, M, 1] represents the
            predicted confidence predictions. N is the batch size, 1 is the
            frontground and background sigmoid, M is number of bounding boxes.
+            The data type can be float32 or float64.
        anchor_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
            each box is represented as [xmin, ymin, xmax, ymax],
            [xmin, ymin] is the left top coordinate of the anchor box,
            if the input is image feature map, they are close to the origin
            of the coordinate system. [xmax, ymax] is the right bottom
-            coordinate of the anchor box.
+            coordinate of the anchor box. The data type can be float32 or float64.
        anchor_var(Variable): A 2-D Tensor with shape [M,4] holds expanded 
-            variances of anchors.
+            variances of anchors. The data type can be float32 or float64.
        gt_boxes (Variable): The ground-truth bounding boxes (bboxes) are a 2D
            LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth
-            bboxes of mini-batch input.
+            bboxes of mini-batch input. The data type can be float32 or float64.
        is_crowd (Variable): A 1-D LoDTensor which indicates groud-truth is crowd.
+                             The data type must be int32.
        im_info (Variable): A 2-D LoDTensor with shape [N, 3]. N is the batch size,
        3 is the height, width and scale.
        rpn_batch_size_per_im(int): Total number of RPN examples per image.
+                                    The data type must be int32.
        rpn_straddle_thresh(float): Remove RPN anchors that go outside the image
-            by straddle_thresh pixels.
+            by straddle_thresh pixels. The data type must be float32.
        rpn_fg_fraction(float): Target fraction of RoI minibatch that is labeled
-            foreground (i.e. class > 0), 0-th class is background.
+            foreground (i.e. class > 0), 0-th class is background. The data type must be float32.
        rpn_positive_overlap(float): Minimum overlap required between an anchor
            and ground-truth box for the (anchor, gt box) pair to be a positive
-            example.
+            example. The data type must be float32.
        rpn_negative_overlap(float): Maximum overlap allowed between an anchor
            and ground-truth box for the (anchor, gt box) pair to be a negative
-            examples.
+            examples. The data type must be float32.

    Returns:
        tuple:
-               A tuple(predicted_scores, predicted_location, target_label,
-               target_bbox, bbox_inside_weight) is returned. The predicted_scores 
-               and predicted_location is the predicted result of the RPN.
-               The target_label and target_bbox is the ground truth,
-               respectively. The predicted_location is a 2D Tensor with shape
-               [F, 4], and the shape of target_bbox is same as the shape of
-               the predicted_location, F is the number of the foreground
-               anchors. The predicted_scores is a 2D Tensor with shape
-               [F + B, 1], and the shape of target_label is same as the shape
-               of the predicted_scores, B is the number of the background
-               anchors, the F and B is depends on the input of this operator.
-               Bbox_inside_weight represents whether the predicted loc is fake_fg
-               or not and the shape is [F, 4].
+        A tuple(predicted_scores, predicted_location, target_label,
+        target_bbox, bbox_inside_weight) is returned. The predicted_scores 
+        and predicted_location is the predicted result of the RPN.
+        The target_label and target_bbox is the ground truth,
+        respectively. The predicted_location is a 2D Tensor with shape
+        [F, 4], and the shape of target_bbox is same as the shape of
+        the predicted_location, F is the number of the foreground
+        anchors. The predicted_scores is a 2D Tensor with shape
+        [F + B, 1], and the shape of target_label is same as the shape
+        of the predicted_scores, B is the number of the background
+        anchors, the F and B is depends on the input of this operator.
+        Bbox_inside_weight represents whether the predicted loc is fake_fg
+        or not and the shape is [F, 4].

    Examples:
        .. code-block:: python

            import paddle.fluid as fluid
-            bbox_pred = fluid.layers.data(name='bbox_pred', shape=[100, 4],
-                            append_batch_size=False, dtype='float32')
-            cls_logits = fluid.layers.data(name='cls_logits', shape=[100, 1],
-                            append_batch_size=False, dtype='float32')
-            anchor_box = fluid.layers.data(name='anchor_box', shape=[20, 4],
-                            append_batch_size=False, dtype='float32')
-            anchor_var = fluid.layers.data(name='anchor_var', shape=[20, 4],
-                            append_batch_size=False, dtype='float32')
-            gt_boxes = fluid.layers.data(name='gt_boxes', shape=[10, 4],
-                            append_batch_size=False, dtype='float32')
-            is_crowd = fluid.layers.data(name='is_crowd', shape=[1],
-                            append_batch_size=False, dtype='float32')
-            im_info = fluid.layers.data(name='im_infoss', shape=[1, 3],
-                            append_batch_size=False, dtype='float32')
+            bbox_pred = fluid.data(name='bbox_pred', shape=[None, 4], dtype='float32')
+            cls_logits = fluid.data(name='cls_logits', shape=[None, 1], dtype='float32')
+            anchor_box = fluid.data(name='anchor_box', shape=[None, 4], dtype='float32')
+            anchor_var = fluid.data(name='anchor_var', shape=[None, 4], dtype='float32')
+            gt_boxes = fluid.data(name='gt_boxes', shape=[None, 4], dtype='float32')
+            is_crowd = fluid.data(name='is_crowd', shape=[None], dtype='float32')
+            im_info = fluid.data(name='im_infoss', shape=[None, 3], dtype='float32')
            loc, score, loc_target, score_target, inside_weight = fluid.layers.rpn_target_assign(
                bbox_pred, cls_logits, anchor_box, anchor_var, gt_boxes, is_crowd, im_info)

@ -2217,8 +2213,7 @@ def generate_proposal_labels(rpn_rois,
                             is_cls_agnostic=False,
                             is_cascade_rcnn=False):
    """
-
-    ** Generate Proposal Labels of Faster-RCNN **
+    **Generate Proposal Labels of Faster-RCNN**

    This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth,
    to sample foreground boxes and background boxes, and compute loss target.
@ -2236,37 +2231,43 @@ def generate_proposal_labels(rpn_rois,
    Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss.

    Args:
-        rpn_rois(Variable): A 2-D LoDTensor with shape [N, 4]. N is the number of the GenerateProposalOp's output, each element is a bounding box with [xmin, ymin, xmax, ymax] format.
-        gt_classes(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a class label of groundtruth.
-        is_crowd(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a flag indicates whether a groundtruth is crowd.
+        rpn_rois(Variable): A 2-D LoDTensor with shape [N, 4]. N is the number of the GenerateProposalOp's output, each element is a bounding box with [xmin, ymin, xmax, ymax] format. The data type can be float32 or float64.
+        gt_classes(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a class label of groundtruth. The data type must be int32.
+        is_crowd(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a flag indicates whether a groundtruth is crowd. The data type must be int32.
        gt_boxes(Variable): A 2-D LoDTensor with shape [M, 4]. M is the number of groundtruth, each element is a bounding box with [xmin, ymin, xmax, ymax] format.
        im_info(Variable): A 2-D LoDTensor with shape [B, 3]. B is the number of input images, each element consists of im_height, im_width, im_scale.

-        batch_size_per_im(int): Batch size of rois per images.
-        fg_fraction(float): Foreground fraction in total batch_size_per_im.
-        fg_thresh(float): Overlap threshold which is used to chose foreground sample.
-        bg_thresh_hi(float): Overlap threshold upper bound which is used to chose background sample.
-        bg_thresh_lo(float): Overlap threshold lower bound which is used to chose background sample.
-        bbox_reg_weights(list|tuple): Box regression weights.
-        class_nums(int): Class number.
+        batch_size_per_im(int): Batch size of rois per images. The data type must be int32.
+        fg_fraction(float): Foreground fraction in total batch_size_per_im. The data type must be float32.
+        fg_thresh(float): Overlap threshold which is used to chose foreground sample. The data type must be float32.
+        bg_thresh_hi(float): Overlap threshold upper bound which is used to chose background sample. The data type must be float32.
+        bg_thresh_lo(float): Overlap threshold lower bound which is used to chose background sample. The data type must be float32.
+        bbox_reg_weights(list|tuple): Box regression weights. The data type must be float32.
+        class_nums(int): Class number. The data type must be int32.
        use_random(bool): Use random sampling to choose foreground and background boxes.
        is_cls_agnostic(bool): bbox regression use class agnostic simply which only represent fg and bg boxes.
        is_cascade_rcnn(bool): it will filter some bbox crossing the image's boundary when setting True.

+    Returns:
+        tuple:
+        A tuple with format``(rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights)``.
+
+        - **rois**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4]``. The data type is the same as ``rpn_rois``.
+        - **labels_int32**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 1]``. The data type must be int32.
+        - **bbox_targets**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The regression targets of all RoIs. The data type is the same as ``rpn_rois``.
+        - **bbox_inside_weights**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The weights of foreground boxes' regression loss. The data type is the same as ``rpn_rois``.
+        - **bbox_outside_weights**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The weights of regression loss. The data type is the same as ``rpn_rois``.
+
+
    Examples:
        .. code-block:: python

            import paddle.fluid as fluid
-            rpn_rois = fluid.layers.data(name='rpn_rois', shape=[2, 4],
-                           append_batch_size=False, dtype='float32')
-            gt_classes = fluid.layers.data(name='gt_classes', shape=[8, 1],
-                           append_batch_size=False, dtype='float32')
-            is_crowd = fluid.layers.data(name='is_crowd', shape=[8, 1],
-                           append_batch_size=False, dtype='float32')
-            gt_boxes = fluid.layers.data(name='gt_boxes', shape=[8, 4],
-                           append_batch_size=False, dtype='float32')
-            im_info = fluid.layers.data(name='im_info', shape=[10, 3],
-                           append_batch_size=False, dtype='float32')
+            rpn_rois = fluid.data(name='rpn_rois', shape=[None, 4], dtype='float32')
+            gt_classes = fluid.data(name='gt_classes', shape=[None, 1], dtype='float32')
+            is_crowd = fluid.data(name='is_crowd', shape=[None, 1], dtype='float32')
+            gt_boxes = fluid.data(name='gt_boxes', shape=[None, 4], dtype='float32')
+            im_info = fluid.data(name='im_info', shape=[None, 3], dtype='float32')
            rois, labels, bbox, inside_weights, outside_weights = fluid.layers.generate_proposal_labels(
                           rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
                           class_nums=10)
@ -2496,44 +2497,47 @@ def generate_proposals(scores,
        scores(Variable): A 4-D Tensor with shape [N, A, H, W] represents
            the probability for each box to be an object.
            N is batch size, A is number of anchors, H and W are height and
-            width of the feature map.
+            width of the feature map. The data type must be float32.
        bbox_deltas(Variable): A 4-D Tensor with shape [N, 4*A, H, W]
            represents the differece between predicted box locatoin and
-            anchor location.
+            anchor location. The data type must be float32.
        im_info(Variable): A 2-D Tensor with shape [N, 3] represents origin
            image information for N batch. Info contains height, width and scale
            between origin image size and the size of feature map.
+            The data type must be int32.
        anchors(Variable):   A 4-D Tensor represents the anchors with a layout
            of [H, W, A, 4]. H and W are height and width of the feature map,
            num_anchors is the box count of each position. Each anchor is
-            in (xmin, ymin, xmax, ymax) format an unnormalized.
-        variances(Variable): The expanded variances of anchors with a layout of
+            in (xmin, ymin, xmax, ymax) format an unnormalized. The data type must be float32.
+        variances(Variable): A 4-D Tensor. The expanded variances of anchors with a layout of
            [H, W, num_priors, 4]. Each variance is in
-            (xcenter, ycenter, w, h) format.
+            (xcenter, ycenter, w, h) format. The data type must be float32.
        pre_nms_top_n(float): Number of total bboxes to be kept per
-            image before NMS. 6000 by default.
+            image before NMS. The data type must be float32. `6000` by default.
        post_nms_top_n(float): Number of total bboxes to be kept per
-            image after NMS. 1000 by default.
-        nms_thresh(float): Threshold in NMS, 0.5 by default.
+            image after NMS. The data type must be float32. `1000` by default.
+        nms_thresh(float): Threshold in NMS. The data type must be float32. `0.5` by default.
        min_size(float): Remove predicted boxes with either height or
-            width < min_size. 0.1 by default.
-        eta(float): Apply in adaptive NMS, if adaptive threshold > 0.5,
-            adaptive_threshold = adaptive_threshold * eta in each iteration.
+            width < min_size. The data type must be float32. `0.1` by default.
+        eta(float): Apply in adaptive NMS, if adaptive `threshold > 0.5`,
+            `adaptive_threshold = adaptive_threshold * eta` in each iteration.
+
+    Returns:
+        tuple:
+        A tuple with format ``(rpn_rois, rpn_roi_probs)``.
+
+        - **rpn_rois**: The generated RoIs. 2-D Tensor with shape ``[N, 4]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``.
+        - **rpn_roi_probs**: The scores of generated RoIs. 2-D Tensor with shape ``[N, 1]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``.

    Examples:
        .. code-block:: python
        
            import paddle.fluid as fluid
-            scores = fluid.layers.data(name='scores', shape=[2, 4, 5, 5],
-                         append_batch_size=False, dtype='float32')
-            bbox_deltas = fluid.layers.data(name='bbox_deltas', shape=[2, 16, 5, 5],
-                         append_batch_size=False, dtype='float32')
-            im_info = fluid.layers.data(name='im_info', shape=[2, 3],
-                         append_batch_size=False, dtype='float32')
-            anchors = fluid.layers.data(name='anchors', shape=[5, 5, 4, 4],
-                         append_batch_size=False, dtype='float32')
-            variances = fluid.layers.data(name='variances', shape=[5, 5, 10, 4],
-                         append_batch_size=False, dtype='float32')
+            scores = fluid.data(name='scores', shape=[None, 4, 5, 5], dtype='float32')
+            bbox_deltas = fluid.data(name='bbox_deltas', shape=[None, 16, 5, 5], dtype='float32')
+            im_info = fluid.data(name='im_info', shape=[None, 3], dtype='float32')
+            anchors = fluid.data(name='anchors', shape=[None, 5, 4, 4], dtype='float32')
+            variances = fluid.data(name='variances', shape=[None, 5, 10, 4], dtype='float32')
            rois, roi_probs = fluid.layers.generate_proposals(scores, bbox_deltas,
                         im_info, anchors, variances)

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@ -6761,7 +6761,7 @@ def warpctc(input,
    (https://github.com/baidu-research/warp-ctc)
    to compute Connectionist Temporal Classification (CTC) loss.
    It can be aliased as softmax with CTC, since a native softmax activation is
-    interated to the Warp-CTC library, to to normlize values for each row of the
+    interated to the Warp-CTC library to normlize values for each row of the
    input tensor.

    Args:
@ -6773,14 +6773,15 @@ def warpctc(input,
         (not including the blank label). When it is a 3-D Tensor, it's shape 
         is [max_logit_length, batch_size, num_classes + 1],
         where max_logit_length is the length of the longest
-         input logit sequence.
+         input logit sequence. The data type must be float32.
       label (Variable): The ground truth of variable-length sequence,
         which is a 2-D Tensor with LoD information or a 2-D Tensor without
         LoD information. When it is a 2-D LoDTensor or 2-D Tensor, 
         it is of the shape [Lg, 1], where Lg is th sum of all labels' length.
+         The data type must be int32.
       blank (int, default 0): The blank label index of Connectionist
         Temporal Classification (CTC) loss, which is in the
-         half-opened interval [0, num_classes + 1).
+         half-opened interval [0, num_classes + 1). The data type must be int32. 
       norm_by_times(bool, default false): Whether to normalize the gradients
         by the number of time-step, which is also the sequence's length.
         There is no need to normalize the gradients if warpctc layer was
@ -6792,40 +6793,72 @@ def warpctc(input,

    Returns:
        Variable: The Connectionist Temporal Classification (CTC) loss,
-        which is a 2-D Tensor of the shape [batch_size, 1].
+        which is a 2-D Tensor with the shape [batch_size, 1].
+        The date type is the same as input.

    Examples:
+
        .. code-block:: python

            # using LoDTensor
            import paddle.fluid as fluid
            import numpy as np
            
-            label = fluid.layers.data(name='label', shape=[12, 1],
-                                      dtype='float32', lod_level=1)
-            predict = fluid.layers.data(name='predict', 
-                                        shape=[11, 8],
+            predict = fluid.data(name='predict', 
+                                        shape=[None, 5],
                                        dtype='float32',lod_level=1)
+            label = fluid.data(name='label', shape=[None, 1],
+                                      dtype='int32', lod_level=1)
            cost = fluid.layers.warpctc(input=predict, label=label)
+            place = fluid.CPUPlace()
+            x=fluid.LoDTensor()
+            data = np.random.rand(8, 5).astype("float32")
+            x.set(data, place)
+            x.set_lod([[0,4,8]])
+            y=fluid.LoDTensor()
+            data = np.random.randint(0, 5, [4, 1]).astype("int32")
+            y.set(data, place)
+            y.set_lod([[0,2,4]])
+            exe = fluid.Executor(place)
+            exe.run(fluid.default_startup_program())
+            output= exe.run(feed={"predict": x,"label": y},
+                                         fetch_list=[cost.name])
+            print output
+
+        .. code-block:: python

            # using Tensor
-            input_length = fluid.layers.data(name='logits_length', shape=[11],
-                                         dtype='int64')
-            label_length = fluid.layers.data(name='labels_length', shape=[12],
-                                         dtype='int64')
-            target = fluid.layers.data(name='target', shape=[12, 1],
-                                       dtype='int32')
+            import paddle.fluid as fluid
+            import numpy as np
+            
            # length of the longest logit sequence
-            max_seq_length = 4
+            max_seq_length = 5
            # number of logit sequences
-            batch_size = 4
-            output = fluid.layers.data(name='output', 
-                                       shape=[max_seq_length, batch_size, 8],
+            batch_size = None
+            logits = fluid.data(name='logits', 
+                                       shape=[max_seq_length, batch_size, 5],
                                       dtype='float32')
-            loss = fluid.layers.warpctc(input=output,label=target,
-                                        input_length=input_length,
+            logits_length = fluid.data(name='logits_length', shape=[None],
+                                         dtype='int64')
+            label = fluid.layers.data(name='label', shape=[None, 1],
+                                       dtype='int32')
+            label_length = fluid.layers.data(name='labels_length', shape=[None],
+                                         dtype='int64')
+            cost = fluid.layers.warpctc(input=logits, label=label,
+                                        input_length=logits_length,
                                        label_length=label_length)
-
+            place = fluid.CPUPlace()
+            batch_size = 2
+            x = np.random.rand(max_seq_length, batch_size, 5).astype("float32")
+            y = np.random.randint(0, 5, [max_seq_length * batch_size, 1]).astype("int32")
+            exe = fluid.Executor(place)
+            exe.run(fluid.default_startup_program())
+            output= exe.run(feed={"logits": x,
+                                  "label": y,
+                                  "logits_length": np.array([5, 4]).astype("int64"),
+                                  "labels_length": np.array([3, 2]).astype("int64")},
+                                  fetch_list=[cost.name])
+            print(output)
    """
    helper = LayerHelper('warpctc', **locals())
    this_inputs = {'Logits': [input], 'Label': [label]}
@ -10643,77 +10676,16 @@ def affine_grid(theta, out_shape, name=None):
    the input feature map should be sampled to produce the transformed
    output feature map.

-    .. code-block:: text
-
-        * Case 1:
-
-          Given:
-
-              theta = [[[x_11, x_12, x_13]
-                        [x_14, x_15, x_16]]
-                       [[x_21, x_22, x_23]
-                        [x_24, x_25, x_26]]]
-
-              out_shape = [2, 3, 5, 5]
-
-          Step 1:
-
-              Generate normalized coordinates according to out_shape.
-              The values of the normalized coordinates are in the interval between -1 and 1.
-              The shape of the normalized coordinates is [2, H, W] as below:
-
-              C = [[[-1.  -1.  -1.  -1.  -1. ]
-                    [-0.5 -0.5 -0.5 -0.5 -0.5]
-                    [ 0.   0.   0.   0.   0. ]
-                    [ 0.5  0.5  0.5  0.5  0.5]
-                    [ 1.   1.   1.   1.   1. ]]
-                   [[-1.  -0.5  0.   0.5  1. ]
-                    [-1.  -0.5  0.   0.5  1. ]
-                    [-1.  -0.5  0.   0.5  1. ]
-                    [-1.  -0.5  0.   0.5  1. ]
-                    [-1.  -0.5  0.   0.5  1. ]]]
-              C[0] is the coordinates in height axis and  C[1] is the coordinates in width axis.
-
-          Step2:
-
-              Tanspose and reshape C to shape [H * W, 2] and append ones to last dimension. The we get:
-              C_ = [[-1.  -1.   1. ]
-                    [-0.5 -1.   1. ]
-                    [ 0.  -1.   1. ]
-                    [ 0.5 -1.   1. ]
-                    [ 1.  -1.   1. ]
-                    [-1.  -0.5  1. ]
-                    [-0.5 -0.5  1. ]
-                    [ 0.  -0.5  1. ]
-                    [ 0.5 -0.5  1. ]
-                    [ 1.  -0.5  1. ]
-                    [-1.   0.   1. ]
-                    [-0.5  0.   1. ]
-                    [ 0.   0.   1. ]
-                    [ 0.5  0.   1. ]
-                    [ 1.   0.   1. ]
-                    [-1.   0.5  1. ]
-                    [-0.5  0.5  1. ]
-                    [ 0.   0.5  1. ]
-                    [ 0.5  0.5  1. ]
-                    [ 1.   0.5  1. ]
-                    [-1.   1.   1. ]
-                    [-0.5  1.   1. ]
-                    [ 0.   1.   1. ]
-                    [ 0.5  1.   1. ]
-                    [ 1.   1.   1. ]]
-          Step3:
-              Compute output by equation $$Output[i] = C_ * Theta[i]^T$$
-
-    Args:
-        theta (Variable): A batch of affine transform parameters with shape [N, 2, 3].
-        out_shape (Variable | list | tuple): The shape of target output with format [N, C, H, W].
-                                             ``out_shape`` can be a Variable or a list or tuple.
-        name(str|None): A name for this layer(optional). If set None, the layer
-                        will be named automatically.
+    Args:
+        theta (Variable) - A Tensor with shape [N, 2, 3]. It contains a batch of affine transform parameters.
+                           The data type can be float32 or float64.
+        out_shape (Variable | list | tuple): The shape of target output with format [batch_size, channel, height, width].
+                                             ``out_shape`` can be a Tensor or a list or tuple. The data
+                                             type must be int32.
+        name(str|None): The default value is None.  Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name`.

    Returns:
-        Variable: The output with shape [N, H, W, 2].
+        Variable: A Tensor with shape [batch_size, H, W, 2] while 'H' and 'W' are the height and width of feature map in affine transformation. The data type is the same as `theta`. 

    Raises:
        ValueError: If the type of arguments is not supported.
@ -10723,13 +10695,20 @@ def affine_grid(theta, out_shape, name=None):
        .. code-block:: python

            import paddle.fluid as fluid
-            theta = fluid.layers.data(name="x", shape=[2, 3], dtype="float32")
-            out_shape = fluid.layers.data(name="y", shape=[-1], dtype="float32")
-            data = fluid.layers.affine_grid(theta, out_shape)
-
-            # or
-            data = fluid.layers.affine_grid(theta, [5, 3, 28, 28])
-
+            import numpy as np
+            place = fluid.CPUPlace()
+            theta = fluid.data(name="x", shape=[None, 2, 3], dtype="float32")
+            out_shape = fluid.data(name="y", shape=[4], dtype="int32")
+            grid_0 = fluid.layers.affine_grid(theta, out_shape)
+            grid_1 = fluid.layers.affine_grid(theta, [5, 3, 28, 28])
+            batch_size=2
+            exe = fluid.Executor(place)
+            exe.run(fluid.default_startup_program())
+            output= exe.run(feed={"x": np.random.rand(batch_size,2,3).astype("float32"),
+                                  "y": np.array([5, 3, 28, 28]).astype("int32")},
+                                  fetch_list=[grid_0.name, grid_1.name])
+            print(output[0])
+            print(output[1])
    """
    helper = LayerHelper('affine_grid')