From 00b30b9938259388f5b5bd74b1d039f1baad99ef Mon Sep 17 00:00:00 2001 From: chenweihang <sunny_cwh@163.com> Date: Mon, 27 Aug 2018 07:00:59 +0000 Subject: [PATCH 01/67] doc: unified infershape format --- paddle/fluid/operators/sequence_erase_op.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/sequence_erase_op.cc b/paddle/fluid/operators/sequence_erase_op.cc index 1c86486157..816ba123a6 100644 --- a/paddle/fluid/operators/sequence_erase_op.cc +++ b/paddle/fluid/operators/sequence_erase_op.cc @@ -24,9 +24,9 @@ class SequenceEraseOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), - "Input(X) of SequenceEraseOp should not be null."); + "Input(X) of SequenceErase operator should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of SequenceEraseOp should not be null."); + "Output(Out) of SequenceErase operator should not be null."); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE(x_dims.size() == 2 && x_dims[1] == 1, "Input(X) of SequenceEraseOp should be a 2-D LoDTensor " From 0ff5d8b02a6cffbb6e64b4072d301aec0f2be54c Mon Sep 17 00:00:00 2001 From: minqiyang <minqiyang@baidu.com> Date: Tue, 25 Sep 2018 19:42:28 +0800 Subject: [PATCH 02/67] Port logical_ops to nn --- python/paddle/fluid/layers/control_flow.py | 2 +- python/paddle/fluid/layers/nn.py | 207 +++++++++++++++------ python/paddle/fluid/layers/ops.py | 4 - 3 files changed, 155 insertions(+), 58 deletions(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 0049773bbe..c6250ff6ce 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -21,7 +21,7 @@ from .. import core from ..framework import Program, Variable, Operator from ..layer_helper import LayerHelper, unique_name from ..initializer import force_init_on_cpu -from .ops import logical_and, logical_not, logical_or +from .nn import logical_and, logical_not, logical_or import numpy import warnings import six diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 2cb61a9cd2..11c3707f6d 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -133,6 +133,10 @@ __all__ = [ 'elementwise_max', 'elementwise_min', 'elementwise_pow', + 'logical_and', + 'logical_or', + 'logical_xor', + 'logical_not', ] @@ -1034,8 +1038,8 @@ def cross_entropy(input, label, soft_label=False, ignore_index=-100): soft_label (bool): a flag indicating whether to interpretate the given labels as soft labels. Default: `False`. - ignore_index (int): Specifies a target value that is ignored and does - not contribute to the input gradient. Only valid + ignore_index (int): Specifies a target value that is ignored and does + not contribute to the input gradient. Only valid if soft_label is set to False. Default: -100 Returns: @@ -2795,20 +2799,20 @@ def sequence_pad(x, pad_value, maxlen=None): Args: x(Variable): Input variable which should contain lod information. - pad_value(Variable): The Variable that holds values that will be fill - into padded steps. It can be a scalar or a tensor whose shape - equals to time steps in sequences. If it's a scalar, it will be + pad_value(Variable): The Variable that holds values that will be fill + into padded steps. It can be a scalar or a tensor whose shape + equals to time steps in sequences. If it's a scalar, it will be automatically broadcasted to the shape of time step. - maxlen(int, default None): The length of padded sequences. It can be - None or any positive int. When it is None, all sequences will be - padded up to the length of the longest one among them; when it a - certain positive value, it must be greater than the length of the + maxlen(int, default None): The length of padded sequences. It can be + None or any positive int. When it is None, all sequences will be + padded up to the length of the longest one among them; when it a + certain positive value, it must be greater than the length of the longest original sequence." - + Returns: - Variable: The padded sequence batch and the original lengths before + Variable: The padded sequence batch and the original lengths before padding. All sequences has the same length. - + Examples: .. code-block:: python @@ -4424,8 +4428,8 @@ def softmax_with_cross_entropy(logits, soft_label is set to true, Label is a Tensor<float/double> with soft_label (bool): A flag to indicate whether to interpretate the given labels as soft labels. By default, `soft_label` is set to False. - ignore_index (int): Specifies a target value that is ignored and does - not contribute to the input gradient. Only valid + ignore_index (int): Specifies a target value that is ignored and does + not contribute to the input gradient. Only valid if soft_label is set to False. Default: -100 Returns: @@ -4682,14 +4686,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): def squeeze(input, axes, name=None): """ - Remove single-dimensional entries from the shape of a tensor. Takes a - parameter axes with a list of axes to squeeze. If axes is not provided, all - the single dimensions will be removed from the shape. If an axis is + Remove single-dimensional entries from the shape of a tensor. Takes a + parameter axes with a list of axes to squeeze. If axes is not provided, all + the single dimensions will be removed from the shape. If an axis is selected with shape entry not equal to one, an error is raised. - + Examples: Case 1: - Given + Given X.shape = (1, 3, 1, 5) and axes = [0] @@ -4698,11 +4702,11 @@ def squeeze(input, axes, name=None): Case 2: Given X.shape = (1, 3, 1, 5) - and + and axes = [] we get: Out.shape = (3, 5) - + Args: input (Variable): The input variable to be squeezed. axes (list): List of integers, indicating the dimensions to be squeezed. @@ -4732,14 +4736,14 @@ def squeeze(input, axes, name=None): def unsqueeze(input, axes, name=None): """ - Insert single-dimensional entries to the shape of a tensor. Takes one - required argument axes, a list of dimensions that will be inserted. - Dimension indices in axes are as seen in the output tensor. + Insert single-dimensional entries to the shape of a tensor. Takes one + required argument axes, a list of dimensions that will be inserted. + Dimension indices in axes are as seen in the output tensor. - For example: - Given a tensor such that tensor with shape [3, 4, 5], + For example: + Given a tensor such that tensor with shape [3, 4, 5], then Unsqueezed tensor with axes=[0, 4] has shape [1, 3, 4, 5, 1]. - + Args: input (Variable): The input variable to be unsqueezed. axes (list): List of integers, indicating the dimensions to be inserted. @@ -5838,39 +5842,39 @@ def pad2d(input, Example: Given that X is a channel of image from input: - + X = [[1, 2, 3], [4, 5, 6]] - + Case 0: - + paddings = [0, 1, 2, 3], mode = 'constant' pad_value = 0 - + Out = [[0, 0, 1, 2, 3, 0, 0, 0] [0, 0, 4, 5, 6, 0, 0, 0] [0, 0, 0, 0, 0, 0, 0, 0]] - + Case 1: - + paddings = [0, 1, 2, 1], mode = 'reflect' - + Out = [[3, 2, 1, 2, 3, 2] [6, 5, 4, 5, 6, 5] [3, 2, 1, 2, 3, 2]] - + Case 2: - + paddings = [0, 1, 2, 1], mode = 'edge' - + Out = [[1, 1, 1, 2, 3, 3] [4, 4, 4, 5, 6, 6] [4, 4, 4, 5, 6, 6]] - - + + Args: input (Variable): The input image with [N, C, H, W] format or [N, H, W, C] format. paddings (tuple|list): The padding size. If padding is a tuple, it must @@ -6069,7 +6073,7 @@ def prelu(x, mode, param_attr=None, name=None): channel:elements in a channel share same weight element:each element has a weight name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + will be named automatically. Returns: Variable: The output tensor with the same shape as input. @@ -6247,10 +6251,10 @@ def flatten(x, axis=1, name=None): def sequence_enumerate(input, win_size, pad_value=0, name=None): """ Generate a new sequence for the input index sequence, which enumerates all the - sub-sequences with length `win_size` of the input. + sub-sequences with length `win_size` of the input. The enumerated sequence has the same 1st dimension with variable `input`, and the 2nd dimension is `win_size`, padded by `pad_value` if necessary in generation. - + Examples: Case 1: Input: @@ -6377,20 +6381,20 @@ def unstack(x, axis=0, num=None): **UnStack Layer** This layer unstacks input :code:`x` into several tensors along axis. - + If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x)`. If :code:`num` is None, it would be inferred from :code:`x.shape[axis]`, and if :code:`x.shape[axis]` <= 0 or is unknown, :code:`ValueError` is - raised. + raised. Args: - x (Variable): Input variable. + x (Variable): Input variable. axis (int): The axis along which the input is unstacked. num (int|None): The number of output variables. - + Returns: list(Variable): The unstacked variables. - + """ helper = LayerHelper('unstack', **locals()) @@ -6423,21 +6427,21 @@ def expand(x, expand_times, name=None): .. code-block:: text Input(X) is a 3-D tensor with shape [2, 3, 1]: - + [ [[1], [2], [3]], [[4], [5], [6]] ] - + Attr(expand_times): [1, 2, 2] - + Output(Out) is a 3-D tensor with shape [2, 6, 2]: - + [ [[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]], [[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]] ] - + Args: x (Variable): A tensor with rank in [1, 6]. expand_times (list|tuple): Expand times number for each dimension. @@ -6508,7 +6512,7 @@ def scale(x, bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment} out(Tensor): Output tensor. act(basestring|None): Activation applied to the output. - name(basestring|None): Name of the output. + name(basestring|None): Name of the output. Returns: out(${out_type}): ${out_comment} @@ -6616,3 +6620,100 @@ for func in [ "act (basestring|None): Activation applied to the output.", "name (basestring|None): Name of the output." ]) + + +def _logical_op(op_name, x, y, name=None, out=None, binary_op=True): + helper = LayerHelper(op_name, **locals()) + + assert x.dtype == y.dtype + + if out is None: + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) + + if binary_op: + helper.append_op( + type=op_name, inputs={"X": x, + "Y": y}, outputs={"Out": out}) + else: + helper.append_op(type=op_name, inputs={"X": x}, outputs={"Out": out}) + + return out + + +@templatedoc() +def logical_and(x, y, name=None, out=None): + """ + ${comment} + + Args: + x(${x_type}): ${x_comment} + y(${y_type}): ${y_comment} + out(Tensor): Output tensor of logical operation. + name(basestring|None): Name of the output. + + Returns: + out(${out_type}): ${out_comment} + """ + + return _logical_op( + op_name="logical_and", x=x, y=y, name=name, out=out, binary_op=True) + + +@templatedoc() +def logical_or(x, y, name=None, out=None): + """ + ${comment} + + Args: + x(${x_type}): ${x_comment} + y(${y_type}): ${y_comment} + out(Tensor): Output tensor of logical operation. + name(basestring|None): Name of the output. + + Returns: + out(${out_type}): ${out_comment} + """ + + return _logical_op( + op_name="logical_or", x=x, y=y, name=name, out=out, binary_op=True) + + +@templatedoc() +def logical_xor(x, y, name=None, out=None): + """ + ${comment} + + Args: + x(${x_type}): ${x_comment} + y(${y_type}): ${y_comment} + out(Tensor): Output tensor of logical operation. + name(basestring|None): Name of the output. + + Returns: + out(${out_type}): ${out_comment} + """ + + return _logical_op( + op_name="logical_xor", x=x, y=y, name=name, out=out, binary_op=True) + + +@templatedoc() +def logical_not(x, name=None, out=None): + """ + ${comment} + + Args: + x(${x_type}): ${x_comment} + out(Tensor): Output tensor of logical operation. + name(basestring|None): Name of the output. + + Returns: + out(${out_type}): ${out_comment} + """ + + return _logical_op( + op_name="logical_not", x=x, y=None, name=name, out=out, binary_op=False) diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 7867bfe00e..7060402eb7 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -41,10 +41,6 @@ __all__ = [ 'sigmoid_cross_entropy_with_logits', 'clip', 'clip_by_norm', - 'logical_and', - 'logical_or', - 'logical_xor', - 'logical_not', 'uniform_random_batch_size_like', 'gaussian_random', 'sampling_id', From b1448ded40ea8f762257fd59bd4d1a16f9ee2ed5 Mon Sep 17 00:00:00 2001 From: minqiyang <minqiyang@baidu.com> Date: Tue, 25 Sep 2018 20:07:58 +0800 Subject: [PATCH 03/67] Port clip and clip_by_norm op to nn and change API.sepc --- paddle/fluid/API.spec | 12 +- python/paddle/fluid/layers/nn.py | 205 ++++++++++++++---------------- python/paddle/fluid/layers/ops.py | 2 - 3 files changed, 98 insertions(+), 121 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index d3583cf894..41a83a8df9 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -170,6 +170,12 @@ paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_ paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.logical_and ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.layers.logical_or ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.layers.logical_xor ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.layers.logical_not ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None) @@ -235,12 +241,6 @@ paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords= paddle.fluid.layers.mean ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.mul ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.clip ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.clip_by_norm ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.logical_or ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.logical_xor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.logical_not ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.gaussian_random ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.sampling_id ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 11c3707f6d..4d8f887cba 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -29,114 +29,29 @@ from .. import unique_name from functools import reduce __all__ = [ - 'fc', - 'embedding', - 'dynamic_lstm', - 'dynamic_lstmp', - 'dynamic_gru', - 'gru_unit', - 'linear_chain_crf', - 'crf_decoding', - 'cos_sim', - 'cross_entropy', - 'square_error_cost', - 'chunk_eval', - 'sequence_conv', - 'conv2d', - 'conv3d', - 'sequence_pool', - 'sequence_softmax', - 'softmax', - 'pool2d', - 'pool3d', - 'batch_norm', - 'beam_search_decode', - 'conv2d_transpose', - 'conv3d_transpose', - 'sequence_expand', - 'sequence_expand_as', - 'sequence_pad', - 'lstm_unit', - 'reduce_sum', - 'reduce_mean', - 'reduce_max', - 'reduce_min', - 'reduce_prod', - 'sequence_first_step', - 'sequence_last_step', - 'dropout', - 'split', - 'ctc_greedy_decoder', - 'edit_distance', - 'l2_normalize', - 'matmul', - 'topk', - 'warpctc', - 'sequence_reshape', - 'transpose', - 'im2sequence', - 'nce', - 'hsigmoid', - 'beam_search', - 'row_conv', - 'multiplex', - 'layer_norm', - 'softmax_with_cross_entropy', - 'smooth_l1', - 'one_hot', - 'autoincreased_step_counter', - 'reshape', - 'squeeze', - 'unsqueeze', - 'lod_reset', - 'lrn', - 'pad', - 'pad_constant_like', - 'label_smooth', - 'roi_pool', - 'dice_loss', - 'image_resize', - 'image_resize_short', - 'resize_bilinear', - 'gather', - 'scatter', - 'sequence_scatter', - 'random_crop', - 'mean_iou', - 'relu', - 'log', - 'crop', - 'rank_loss', - 'elu', - 'relu6', - 'pow', - 'stanh', - 'hard_sigmoid', - 'swish', - 'prelu', - 'brelu', - 'leaky_relu', - 'soft_relu', - 'flatten', - 'sequence_mask', - 'stack', - 'pad2d', - 'unstack', - 'sequence_enumerate', - 'expand', - 'sequence_concat', - 'scale', - 'elementwise_add', - 'elementwise_div', - 'elementwise_sub', - 'elementwise_mul', - 'elementwise_max', - 'elementwise_min', - 'elementwise_pow', - 'logical_and', - 'logical_or', - 'logical_xor', - 'logical_not', + 'fc', 'embedding', 'dynamic_lstm', 'dynamic_lstmp', 'dynamic_gru', + 'gru_unit', 'linear_chain_crf', 'crf_decoding', 'cos_sim', 'cross_entropy', + 'square_error_cost', 'chunk_eval', 'sequence_conv', 'conv2d', 'conv3d', + 'sequence_pool', 'sequence_softmax', 'softmax', 'pool2d', 'pool3d', + 'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'conv3d_transpose', + 'sequence_expand', 'sequence_expand_as', 'sequence_pad', 'lstm_unit', + 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min', 'reduce_prod', + 'sequence_first_step', 'sequence_last_step', 'dropout', 'split', + 'ctc_greedy_decoder', 'edit_distance', 'l2_normalize', 'matmul', 'topk', + 'warpctc', 'sequence_reshape', 'transpose', 'im2sequence', 'nce', + 'hsigmoid', 'beam_search', 'row_conv', 'multiplex', 'layer_norm', + 'softmax_with_cross_entropy', 'smooth_l1', 'one_hot', + 'autoincreased_step_counter', 'reshape', 'squeeze', 'unsqueeze', + 'lod_reset', 'lrn', 'pad', 'pad_constant_like', 'label_smooth', 'roi_pool', + 'dice_loss', 'image_resize', 'image_resize_short', 'resize_bilinear', + 'gather', 'scatter', 'sequence_scatter', 'random_crop', 'mean_iou', 'relu', + 'log', 'crop', 'rank_loss', 'elu', 'relu6', 'pow', 'stanh', 'hard_sigmoid', + 'swish', 'prelu', 'brelu', 'leaky_relu', 'soft_relu', 'flatten', + 'sequence_mask', 'stack', 'pad2d', 'unstack', 'sequence_enumerate', + 'expand', 'sequence_concat', 'scale', 'elementwise_add', 'elementwise_div', + 'elementwise_sub', 'elementwise_mul', 'elementwise_max', 'elementwise_min', + 'elementwise_pow', 'logical_and', 'logical_or', 'logical_xor', + 'logical_not', 'clip', 'clip_by_norm' ] @@ -6622,7 +6537,7 @@ for func in [ ]) -def _logical_op(op_name, x, y, name=None, out=None, binary_op=True): +def _logical_op(op_name, x, y, out=None, name=None, binary_op=True): helper = LayerHelper(op_name, **locals()) assert x.dtype == y.dtype @@ -6645,7 +6560,7 @@ def _logical_op(op_name, x, y, name=None, out=None, binary_op=True): @templatedoc() -def logical_and(x, y, name=None, out=None): +def logical_and(x, y, out=None, name=None): """ ${comment} @@ -6664,7 +6579,7 @@ def logical_and(x, y, name=None, out=None): @templatedoc() -def logical_or(x, y, name=None, out=None): +def logical_or(x, y, out=None, name=None): """ ${comment} @@ -6683,7 +6598,7 @@ def logical_or(x, y, name=None, out=None): @templatedoc() -def logical_xor(x, y, name=None, out=None): +def logical_xor(x, y, out=None, name=None): """ ${comment} @@ -6702,7 +6617,7 @@ def logical_xor(x, y, name=None, out=None): @templatedoc() -def logical_not(x, name=None, out=None): +def logical_not(x, out=None, name=None): """ ${comment} @@ -6717,3 +6632,67 @@ def logical_not(x, name=None, out=None): return _logical_op( op_name="logical_not", x=x, y=None, name=name, out=out, binary_op=False) + + +@templatedoc() +def clip(x, min, max, name=None): + """ + ${comment} + + Args: + x(${x_type}): ${x_comment} + min(${min_type}): ${min_comment} + max(${max_type}): ${max_comment} + name(basestring|None): Name of the output. + + Returns: + out(${out_type}): ${out_comment} + """ + + helper = LayerHelper("clip", **locals()) + + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) + + helper.append_op( + type="clip", + inputs={"X": x}, + attrs={"min": min, + "max": max}, + outputs={"Out": out}) + + return out + + +@templatedoc() +def clip_by_norm(x, max_norm, name=None): + """ + ${comment} + + Args: + x(${x_type}): ${x_comment} + max_norm(${max_norm_type}): ${max_norm_comment} + name(basestring|None): Name of the output. + + Returns: + out(${out_type}): ${out_comment} + """ + + helper = LayerHelper("clip_by_norm", **locals()) + + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) + + helper.append_op( + type="clip_by_norm", + inputs={"X": x}, + attrs={"max_norm": max_norm}, + outputs={"Out": out}) + + return out diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 7060402eb7..013ca3aeb0 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -39,8 +39,6 @@ __all__ = [ 'mean', 'mul', 'sigmoid_cross_entropy_with_logits', - 'clip', - 'clip_by_norm', 'uniform_random_batch_size_like', 'gaussian_random', 'sampling_id', From 7c767a44c245ef145644d691fa89319e91610de4 Mon Sep 17 00:00:00 2001 From: minqiyang <minqiyang@baidu.com> Date: Wed, 26 Sep 2018 13:36:27 +0800 Subject: [PATCH 04/67] Polish code test=develop --- python/paddle/fluid/layers/nn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 4d8f887cba..f4f462ec3b 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -6689,6 +6689,7 @@ def clip_by_norm(x, max_norm, name=None): out = helper.create_variable( name=name, dtype=x.dtype, persistable=False) + # max_norm should always be set helper.append_op( type="clip_by_norm", inputs={"X": x}, From 23291abdb6313129a68f86e8f7e8d4cefd5fc11c Mon Sep 17 00:00:00 2001 From: minqiyang <minqiyang@baidu.com> Date: Wed, 26 Sep 2018 15:19:32 +0800 Subject: [PATCH 05/67] Polish code --- python/paddle/fluid/layers/nn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index f4f462ec3b..7757226898 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -6540,7 +6540,8 @@ for func in [ def _logical_op(op_name, x, y, out=None, name=None, binary_op=True): helper = LayerHelper(op_name, **locals()) - assert x.dtype == y.dtype + if binary_op: + assert x.dtype == y.dtype if out is None: if name is None: From e4bc247cd4c1665f502d336f8e3be82355beddbd Mon Sep 17 00:00:00 2001 From: minqiyang <minqiyang@baidu.com> Date: Wed, 26 Sep 2018 15:20:29 +0800 Subject: [PATCH 06/67] Polish code test=develop --- python/paddle/fluid/layers/nn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 7757226898..170bad1aa3 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -6690,7 +6690,6 @@ def clip_by_norm(x, max_norm, name=None): out = helper.create_variable( name=name, dtype=x.dtype, persistable=False) - # max_norm should always be set helper.append_op( type="clip_by_norm", inputs={"X": x}, From f1a08a3bab07df7ae80d569292524a65f0e1f77c Mon Sep 17 00:00:00 2001 From: sneaxiy <sneaxiy@126.com> Date: Thu, 27 Sep 2018 02:07:20 +0000 Subject: [PATCH 07/67] test=develop --- paddle/fluid/API.spec | 16 +++--- python/paddle/fluid/layers/nn.py | 91 +++++++------------------------- 2 files changed, 27 insertions(+), 80 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 130558b091..5e08c97746 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -145,14 +145,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'out', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None, None)) -paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) -paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) -paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) -paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) -paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) -paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) -paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)) +paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 2cb61a9cd2..6e0f3de414 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -6471,14 +6471,12 @@ def _elementwise_op(helper): assert y is not None, 'y cannot be None in {}'.format(op_type) axis = helper.kwargs.get('axis', -1) use_mkldnn = helper.kwargs.get('use_mkldnn', False) - out = helper.kwargs.get('out', None) - if out is None: - name = helper.kwargs.get('name', None) - if name is None: - out = helper.create_tmp_variable(dtype=x.dtype) - else: - out = helper.create_variable( - name=name, dtype=x.dtype, persistable=False) + name = helper.kwargs.get('name', None) + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) helper.append_op( type=op_type, @@ -6491,13 +6489,7 @@ def _elementwise_op(helper): @templatedoc() -def scale(x, - scale=1.0, - bias=0.0, - bias_after_scale=True, - out=None, - act=None, - name=None): +def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): """ ${comment} @@ -6506,7 +6498,6 @@ def scale(x, scale(${scale_type}): ${scale_comment} bias(${bias_type}): ${bias_comment} bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment} - out(Tensor): Output tensor. act(basestring|None): Activation applied to the output. name(basestring|None): Name of the output. @@ -6515,12 +6506,11 @@ def scale(x, """ helper = LayerHelper('scale', **locals()) - if out is None: - if name is None: - out = helper.create_tmp_variable(dtype=x.dtype) - else: - out = helper.create_variable( - name=name, dtype=x.dtype, persistable=False) + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) helper.append_op( type='scale', @@ -6534,73 +6524,31 @@ def scale(x, return helper.append_activation(out) -def elementwise_add(x, - y, - out=None, - axis=-1, - use_mkldnn=False, - act=None, - name=None): +def elementwise_add(x, y, axis=-1, use_mkldnn=False, act=None, name=None): return _elementwise_op(LayerHelper('elementwise_add', **locals())) -def elementwise_div(x, - y, - out=None, - axis=-1, - use_mkldnn=False, - act=None, - name=None): +def elementwise_div(x, y, axis=-1, use_mkldnn=False, act=None, name=None): return _elementwise_op(LayerHelper('elementwise_div', **locals())) -def elementwise_sub(x, - y, - out=None, - axis=-1, - use_mkldnn=False, - act=None, - name=None): +def elementwise_sub(x, y, axis=-1, use_mkldnn=False, act=None, name=None): return _elementwise_op(LayerHelper('elementwise_sub', **locals())) -def elementwise_mul(x, - y, - out=None, - axis=-1, - use_mkldnn=False, - act=None, - name=None): +def elementwise_mul(x, y, axis=-1, use_mkldnn=False, act=None, name=None): return _elementwise_op(LayerHelper('elementwise_mul', **locals())) -def elementwise_max(x, - y, - out=None, - axis=-1, - use_mkldnn=False, - act=None, - name=None): +def elementwise_max(x, y, axis=-1, use_mkldnn=False, act=None, name=None): return _elementwise_op(LayerHelper('elementwise_max', **locals())) -def elementwise_min(x, - y, - out=None, - axis=-1, - use_mkldnn=False, - act=None, - name=None): +def elementwise_min(x, y, axis=-1, use_mkldnn=False, act=None, name=None): return _elementwise_op(LayerHelper('elementwise_min', **locals())) -def elementwise_pow(x, - y, - out=None, - axis=-1, - use_mkldnn=False, - act=None, - name=None): +def elementwise_pow(x, y, axis=-1, use_mkldnn=False, act=None, name=None): return _elementwise_op(LayerHelper('elementwise_pow', **locals())) @@ -6612,7 +6560,6 @@ for func in [ func.__doc__ = _generate_doc_string_( op_proto, additional_args_lines=[ - "out (Tensor): The output tensor of elementwise op.", "act (basestring|None): Activation applied to the output.", "name (basestring|None): Name of the output." ]) From 5d7395cd0f29405f43c2da0b97fb45ec83f59db4 Mon Sep 17 00:00:00 2001 From: wanghaoshuang <wanghaoshuang@baidu.com> Date: Thu, 27 Sep 2018 05:51:27 +0000 Subject: [PATCH 08/67] Fix warning of roi perspective transform op. --- .../detection/roi_perspective_transform_op.cc | 10 +++++----- .../detection/roi_perspective_transform_op.cu | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc index 4cc980b41b..3db9ff947b 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc @@ -260,8 +260,8 @@ class CPUROIPerspectiveTransformOpKernel : public framework::OpKernel<T> { roi2image.Resize({rois_num}); int* roi2image_data = roi2image.mutable_data<int>(ctx.GetPlace()); auto lod = rois->lod().back(); - for (int i = 0; i < lod.size() - 1; ++i) { - for (int j = lod[i]; j < lod[i + 1]; ++j) { + for (size_t i = 0; i < lod.size() - 1; ++i) { + for (size_t j = lod[i]; j < lod[i + 1]; ++j) { roi2image_data[j] = i; } } @@ -393,8 +393,8 @@ class CPUROIPerspectiveTransformGradOpKernel : public framework::OpKernel<T> { roi2image.Resize({rois_num}); int* roi2image_data = roi2image.mutable_data<int>(ctx.GetPlace()); auto lod = rois->lod().back(); - for (int i = 0; i < lod.size() - 1; ++i) { - for (int j = lod[i]; j < lod[i + 1]; ++j) { + for (size_t i = 0; i < lod.size() - 1; ++i) { + for (size_t j = lod[i]; j < lod[i + 1]; ++j) { roi2image_data[j] = i; } } @@ -404,7 +404,7 @@ class CPUROIPerspectiveTransformGradOpKernel : public framework::OpKernel<T> { for (int in_h = 0; in_h < in_height; ++in_h) { for (int in_w = 0; in_w < in_width; ++in_w) { T gradient = 0.0; - for (int roi_idx = lod[n]; roi_idx < lod[n + 1]; ++roi_idx) { + for (size_t roi_idx = lod[n]; roi_idx < lod[n + 1]; ++roi_idx) { const T* rois = rois_data + roi_idx * 8; T roi_x[4]; T roi_y[4]; diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cu b/paddle/fluid/operators/detection/roi_perspective_transform_op.cu index b683b7573d..c82930cc49 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cu +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cu @@ -345,8 +345,8 @@ class CUDAROIPerspectiveTransformOpKernel : public framework::OpKernel<T> { roi2image.Resize({rois_num}); int* roi2image_data = roi2image.mutable_data<int>(platform::CPUPlace()); auto lod = rois->lod().back(); - for (int i = 0; i < lod.size() - 1; ++i) { - for (int j = lod[i]; j < lod[i + 1]; ++j) { + for (size_t i = 0; i < lod.size() - 1; ++i) { + for (size_t j = lod[i]; j < lod[i + 1]; ++j) { roi2image_data[j] = i; } } @@ -432,7 +432,7 @@ __global__ void RoiTransformGradKernel( T gradient = 0.0; // Accumulate gradient over all RoIs that interpolated this element - for (int roi_idx = lod[n]; roi_idx < lod[n + 1]; ++roi_idx) { + for (size_t roi_idx = lod[n]; roi_idx < lod[n + 1]; ++roi_idx) { const T* rois = rois_data + roi_idx * 8; T roi_x[4]; T roi_y[4]; From 153d4f5d152fd055a086b29b6b0c6b3a23ce6f4d Mon Sep 17 00:00:00 2001 From: wanghaoshuang <wanghaoshuang@baidu.com> Date: Thu, 27 Sep 2018 05:54:52 +0000 Subject: [PATCH 09/67] test=develop --- paddle/fluid/operators/detection/roi_perspective_transform_op.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc index 3db9ff947b..42c720e701 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc @@ -104,7 +104,6 @@ bool in_quad(T x, T y, T roi_x[], T roi_y[]) { * a31 = (dx3 * dy2 - dx2 * dy3) / (dx1 * dy2 - dx2 * dy1) / (w - 1) * a32 = (dx1 * dy3 - dx3 * dy1) / (dx1 * dy2 - dx2 * dy1) / (h - 1) * a33 = 1 - * */ template <typename T> void get_transform_matrix(const int transformed_width, From 7ab5626dee0e08262d0b3eaf8f89f05217cccde8 Mon Sep 17 00:00:00 2001 From: Jacek Czaja <jacek.czaja@intel.com> Date: Thu, 13 Sep 2018 14:26:52 +0200 Subject: [PATCH 10/67] - Added initial pass for embedding-fc-lstm - Added draft of new operator - Added fused embedding fc lstm files - First time embedding_fc_lstm_fuse_pass was invoked in test_text_classification - Added Embedding pattern - Not crashing - Enabled draft of embedding_fc_lstm pass (does it job) - First working (Seqcompute only) version - Removed diagnostic comment - First enabling of BatchCompute - Disabling pass for embedding with is_sparse and is_distributed - Cosmetics - Style - Style --- paddle/fluid/framework/ir/CMakeLists.txt | 1 + .../ir/embedding_fc_lstm_fuse_pass.cc | 242 +++++++ .../ir/embedding_fc_lstm_fuse_pass.h | 40 ++ .../framework/ir/graph_pattern_detector.cc | 18 + .../framework/ir/graph_pattern_detector.h | 17 + paddle/fluid/inference/analysis/analyzer.h | 17 +- .../operators/fused_embedding_fc_lstm_op.cc | 608 ++++++++++++++++++ .../operators/fused_embedding_fc_lstm_op.h | 41 ++ 8 files changed, 976 insertions(+), 8 deletions(-) create mode 100644 paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc create mode 100644 paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h create mode 100644 paddle/fluid/operators/fused_embedding_fc_lstm_op.cc create mode 100644 paddle/fluid/operators/fused_embedding_fc_lstm_op.h diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 4dca3ceb45..01733fdda2 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -34,6 +34,7 @@ endif() pass_library(attention_lstm_fuse_pass inference) pass_library(infer_clean_graph_pass inference) pass_library(fc_lstm_fuse_pass inference) +pass_library(embedding_fc_lstm_fuse_pass inference) pass_library(fc_gru_fuse_pass inference) pass_library(seq_concat_fc_fuse_pass inference) diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc new file mode 100644 index 0000000000..38495125c3 --- /dev/null +++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc @@ -0,0 +1,242 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h" +#include <string> +#include "paddle/fluid/framework/lod_tensor.h" + +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/cpu_vec.h" +#include "paddle/fluid/operators/math/fc_compute.h" +#include "paddle/fluid/platform/cpu_info.h" + +namespace paddle { +namespace framework { +namespace ir { + +static int BuildFusion(Graph* graph, const std::string& name_scope, + Scope* scope, bool with_fc_bias) { + GraphPatternDetector gpd; + auto* pattern = gpd.mutable_pattern(); + + // Build pattern + PDNode* x = pattern->NewNode(patterns::PDNodeName(name_scope, "x")) + ->assert_is_op_input("lookup_table") + ->assert_var_not_persistable(); + patterns::Embedding embedding_pattern(pattern, name_scope); + // TODO(jczaja): Intermediate can only be for val that are not used anywhere + // but lookup table output may go into other LSTM (for reverse + // direction) + auto* embedding_out = embedding_pattern(x); + patterns::FC fc_pattern(pattern, name_scope); + + // fc_out is a tmp var, will be removed after fuse, so marked as intermediate. + auto* fc_out = fc_pattern(embedding_out, with_fc_bias)->AsIntermediate(); + patterns::LSTM lstm_pattern(pattern, name_scope); + lstm_pattern(fc_out); + + // Create New OpDesc + auto embedding_lstm_creator = [&](Node* embedding, Node* W, Node* lstm, + Node* input, Node* weight_x, Node* weight_h, + Node* bias, Node* hidden, Node* cell, + Node* xx, Node* fc_bias) { + OpDesc op_desc; + op_desc.SetType("fused_embedding_fc_lstm"); +#define SET_IN(Key, node__) op_desc.SetInput(#Key, {node__->Name()}); + SET_IN(Ids, input); + SET_IN(WeightH, weight_h); + // Neet to have this passed as We need Wc data for peephole connections + SET_IN(Bias, bias); +#undef SET_IN + + // Multiply embeddings with Weights + PADDLE_ENFORCE(scope); + const std::string& embeddings = patterns::UniqueKey("Embeddings"); + auto* embeddings_var = scope->Var(embeddings); + PADDLE_ENFORCE(embeddings_var); + auto* embeddings_tensor = + embeddings_var->GetMutable<framework::LoDTensor>(); + // Get WeightX size: [single_embedding, fc_size] + // and embedding size: [dict_size, single_embedding] + // and create new size of embeddings eg. [dict_size , hidden_size] + auto* embedding_var = scope->FindVar(W->Name()); + PADDLE_ENFORCE(embedding_var); + const auto& embedding_tensor = embedding_var->Get<framework::LoDTensor>(); + + const auto& weightx_tensor = + scope->FindVar(weight_x->Name())->Get<framework::LoDTensor>(); + embeddings_tensor->Resize( + {embedding_tensor.dims()[0], weightx_tensor.dims()[1]}); + + // Multiplie embeddings via WeightsX and add bias + auto embedding_data = embedding_tensor.data<float>(); + auto weightx_data = weightx_tensor.data<float>(); + auto embeddings_data = + embeddings_tensor->mutable_data<float>(platform::CPUPlace()); + + // Adding biases to GEMM result to be + auto* lstm_bias_var = scope->FindVar(bias->Name()); + PADDLE_ENFORCE(lstm_bias_var); + const auto& lstm_bias_tensor = lstm_bias_var->Get<framework::LoDTensor>(); + + auto alpha = 1.0f; + auto beta = 1.0f; + int m = embedding_tensor.dims()[0]; + int n = weightx_tensor.dims()[1]; + int k = embedding_tensor.dims()[1]; + + // Copy only gate biases values (only actual bias data, not peephole + // weights) + std::vector<float> combined_biases(n, 0.0f); + memcpy(&combined_biases[0], lstm_bias_tensor.data<float>(), + n * sizeof(float)); + + if (with_fc_bias) { + // Add FC-bias with LSTM-bias (into GEMM result to be) + auto* fc_bias_var = scope->FindVar(fc_bias->Name()); + const auto& fc_bias_tensor = fc_bias_var->Get<framework::LoDTensor>(); + for (int i = 0; i < fc_bias_tensor.numel(); i++) { + combined_biases[i] = + lstm_bias_tensor.data<float>()[i] + fc_bias_tensor.data<float>()[i]; + } + } + + // broadcast biases + std::vector<float> ones(m, 1.0f); + paddle::operators::math::CBlas<float>::GEMM( + CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, 1, alpha, &ones[0], 1, + &combined_biases[0], n, 0.0f, embeddings_data, n); + + // Wx*embeddings + paddle::operators::math::CBlas<float>::GEMM( + CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, + embedding_data, k, weightx_data, n, beta, embeddings_data, n); + op_desc.SetInput("Embeddings", {embeddings}); + + // Create temp variables. + const std::string BatchedInput = patterns::UniqueKey("BatchedInput"); + const std::string BatchedCellPreAct = + patterns::UniqueKey("BatchedCellPreAct"); + const std::string BatchedGate = patterns::UniqueKey("BatchedGate"); + + scope->Var(BatchedInput)->GetMutable<framework::LoDTensor>(); + scope->Var(BatchedCellPreAct)->GetMutable<framework::LoDTensor>(); + scope->Var(BatchedGate)->GetMutable<framework::LoDTensor>(); + + op_desc.SetInput("H0", {}); + op_desc.SetInput("C0", {}); + op_desc.SetOutput("Hidden", {hidden->Name()}); + op_desc.SetOutput("Cell", {cell->Name()}); + op_desc.SetOutput("XX", {xx->Name()}); + op_desc.SetOutput("BatchedGate", {BatchedGate}); + op_desc.SetOutput("BatchCellPreAct", {BatchedCellPreAct}); + op_desc.SetOutput("BatchedInput", {BatchedInput}); + op_desc.SetAttr("is_reverse", lstm->Op()->GetAttr("is_reverse")); + op_desc.SetAttr("use_peepholes", lstm->Op()->GetAttr("use_peepholes")); + // TODO(TJ): get from attr + op_desc.SetAttr("use_seq", true); + + PADDLE_ENFORCE(graph->Has(kParamScopeAttr)); + auto* scope = graph->Get<Scope*>(kParamScopeAttr); +#define OP_SET_OUT(x) \ + const std::string x = patterns::UniqueKey(#x); \ + op_desc.SetOutput(#x, {x}); \ + scope->Var(x)->GetMutable<LoDTensor>() + OP_SET_OUT(BatchedCell); + OP_SET_OUT(BatchedHidden); + OP_SET_OUT(ReorderedH0); + OP_SET_OUT(ReorderedC0); +#undef OP_SET_OUT + + auto* op = graph->CreateOpNode(&op_desc); + IR_NODE_LINK_TO(input, op); + IR_NODE_LINK_TO(weight_x, op); + IR_NODE_LINK_TO(weight_h, op); + IR_NODE_LINK_TO(bias, op); + IR_NODE_LINK_TO(op, hidden); + return op; + }; + + int fusion_count{0}; + + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + GET_IR_NODE_FROM_SUBGRAPH(lstm, lstm, lstm_pattern); + GET_IR_NODE_FROM_SUBGRAPH(Weight, Weight, lstm_pattern); + GET_IR_NODE_FROM_SUBGRAPH(Bias, Bias, lstm_pattern); + GET_IR_NODE_FROM_SUBGRAPH(Cell, Cell, lstm_pattern); + GET_IR_NODE_FROM_SUBGRAPH(Hidden, Hidden, lstm_pattern); + GET_IR_NODE_FROM_SUBGRAPH(lookup_table, lookup_table, embedding_pattern); + GET_IR_NODE_FROM_SUBGRAPH(W, W, embedding_pattern); + GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern); + GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern); + + // TODO(jczaja): Add support for is_sparse / is_distributed + auto is_sparse = boost::get<bool>(lookup_table->Op()->GetAttr("is_sparse")); + auto is_distributed = + boost::get<bool>(lookup_table->Op()->GetAttr("is_distributed")); + + if (is_sparse == true || is_distributed == true) { + return; + } + + if (with_fc_bias) { + GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern); + GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern); + GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern); + embedding_lstm_creator(lookup_table, W, lstm, subgraph.at(x), w, Weight, + Bias, Hidden, Cell, fc_out, fc_bias); + // Remove unneeded nodes. + // TODO(jczaja): Proper removing of loopup table + std::unordered_set<const Node*> marked_nodes( + //{lookup_table, mul, lstm, elementwise_add, fc_bias, W}); + {mul, lstm, elementwise_add, fc_bias}); + GraphSafeRemoveNodes(graph, marked_nodes); + } else { + GET_IR_NODE_FROM_SUBGRAPH(fc_out, mul_out, fc_pattern); + embedding_lstm_creator(lookup_table, W, lstm, subgraph.at(x), w, Weight, + Bias, Hidden, Cell, fc_out, nullptr); + // Remove unneeded nodes. + // TODO(jczaja): Proper removing of loopup table + // std::unordered_set<const Node*> marked_nodes({lookup_table, W, mul, + // lstm}); + std::unordered_set<const Node*> marked_nodes({mul, lstm}); + GraphSafeRemoveNodes(graph, marked_nodes); + } + + ++fusion_count; + }; + + gpd(graph, handler); + + return fusion_count; +} + +std::unique_ptr<ir::Graph> EmbeddingFCLSTMFusePass::ApplyImpl( + std::unique_ptr<ir::Graph> graph) const { + FusePassBase::Init(name_scope_, graph.get()); + + int fusion_count = BuildFusion(graph.get(), name_scope_, param_scope(), + true /*with_fc_bias*/); + + AddStatis(fusion_count); + return graph; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(embedding_fc_lstm_fuse_pass, + paddle::framework::ir::EmbeddingFCLSTMFusePass); diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h new file mode 100644 index 0000000000..e5ad3067ec --- /dev/null +++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h @@ -0,0 +1,40 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" + +namespace paddle { +namespace framework { +namespace ir { + +// Fusing of Embedding , FC and LSTM op + +// Just FC without bias +class EmbeddingFCLSTMFusePass : public FusePassBase { + public: + virtual ~EmbeddingFCLSTMFusePass() {} + + protected: + std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const; + + const std::string name_scope_{"embedding_fc_lstm_fuse"}; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 6d2c51b0e9..46c6a52c09 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -692,6 +692,24 @@ PDNode *patterns::FC::operator()(paddle::framework::ir::PDNode *x, } } +PDNode *patterns::Embedding::operator()(PDNode *x) { + x->assert_is_op_input("lookup_table", "Ids"); + auto *lookup_table_op = + pattern->NewNode(lookup_table_repr())->assert_is_op("lookup_table"); +#define NEW_NODE(arg__, io__) \ + auto *arg__ = pattern->NewNode(arg__##_repr()) \ + ->assert_is_op_##io__("lookup_table", #arg__); + + NEW_NODE(W, input); + + NEW_NODE(Out, output); +#undef NEW_NODE + + lookup_table_op->LinksFrom({x, W}); + lookup_table_op->LinksTo({Out}); + return Out; +} + PDNode *patterns::LSTM::operator()(PDNode *x) { x->assert_is_op_input("lstm", "Input"); auto *lstm_op = pattern->NewNode(lstm_repr())->assert_is_op("lstm"); diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 69b486c29d..508113bf4f 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -418,6 +418,23 @@ struct FC : public PatternBase { PATTERN_DECL_NODE(Out); }; +// Embedding +struct Embedding : public PatternBase { + Embedding(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "embedding") {} + + PDNode* operator()(PDNode* x); + + // declare operator node's name + PATTERN_DECL_NODE(lookup_table); + // Inputs + // + PATTERN_DECL_NODE(Ids); + PATTERN_DECL_NODE(W); // embeddings + // Outputs + PATTERN_DECL_NODE(Out); +}; + struct LSTM : public PatternBase { LSTM(PDPattern* pattern, const std::string& name_scope) : PatternBase(pattern, name_scope, "lstm") {} diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h index 9bdbefc07c..0aa9367bf5 100644 --- a/paddle/fluid/inference/analysis/analyzer.h +++ b/paddle/fluid/inference/analysis/analyzer.h @@ -64,14 +64,15 @@ class Analyzer : public OrderedRegistry<PassManager> { // larger fusion. const std::vector<std::string> all_ir_passes_{{ // Manual update the passes here. - "infer_clean_graph_pass", // - "attention_lstm_fuse_pass", // - "fc_lstm_fuse_pass", // - "mul_lstm_fuse_pass", // - "fc_gru_fuse_pass", // - "mul_gru_fuse_pass", // - "seq_concat_fc_fuse_pass", // - "fc_fuse_pass", // + "infer_clean_graph_pass", // + "attention_lstm_fuse_pass", // + "embedding_fc_lstm_fuse_pass", // + "fc_lstm_fuse_pass", // + "mul_lstm_fuse_pass", // + "fc_gru_fuse_pass", // + "mul_gru_fuse_pass", // + "seq_concat_fc_fuse_pass", // + "fc_fuse_pass", // #ifdef PADDLE_WITH_MKLDNN "conv_relu_mkldnn_fuse_pass", // #endif diff --git a/paddle/fluid/operators/fused_embedding_fc_lstm_op.cc b/paddle/fluid/operators/fused_embedding_fc_lstm_op.cc new file mode 100644 index 0000000000..3c4cc77452 --- /dev/null +++ b/paddle/fluid/operators/fused_embedding_fc_lstm_op.cc @@ -0,0 +1,608 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/fused_embedding_fc_lstm_op.h" +#include <string> +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/cpu_vec.h" +#include "paddle/fluid/operators/math/fc_compute.h" +#include "paddle/fluid/operators/math/sequence2batch.h" +#include "paddle/fluid/platform/cpu_info.h" + +namespace paddle { +namespace operators { + +void FusedEmbeddingFCLSTMOp::InferShape( + framework::InferShapeContext* ctx) const { + PADDLE_ENFORCE(ctx->HasInput("Embeddings"), + "Assert only one Input(Embeddings) of LSTM."); + PADDLE_ENFORCE(ctx->HasInput("WeightH"), + "Assert only one Input(WeightH) of LSTM."); + PADDLE_ENFORCE(ctx->HasInput("Bias"), "Assert only one Input(Bias) of LSTM."); + PADDLE_ENFORCE(ctx->HasOutput("XX"), "Assert only one Output(XX) of LSTM."); + PADDLE_ENFORCE(ctx->HasOutput("Hidden"), + "Assert only one Output(Hidden) of LSTM."); + PADDLE_ENFORCE(ctx->HasOutput("Cell"), + "Assert only one Output(Cell) of LSTM."); + PADDLE_ENFORCE(ctx->HasInput("Ids"), + "Input(Ids) of LookupTableOp should not be null."); + + auto table_dims = ctx->GetInputDim("Embeddings"); + auto ids_dims = ctx->GetInputDim("Ids"); + int ids_rank = ids_dims.size(); + + PADDLE_ENFORCE_EQ(table_dims.size(), 2); + PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, + "The last dimension of the 'Ids' tensor must be 1."); + + auto x_dims = ctx->GetInputDim("Ids"); + PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(Ids)'s rank must be 2."); + + if (ctx->HasInput("H0")) { + PADDLE_ENFORCE(ctx->HasInput("C0"), + "Input(Cell) and Input(Hidden) of LSTM should not " + "be null at the same time."); + auto h_dims = ctx->GetInputDim("H0"); + auto c_dims = ctx->GetInputDim("C0"); + PADDLE_ENFORCE(h_dims == c_dims, + "The dimension of Input(H0) and Input(C0) " + "should be the same."); + } + + auto embeddings_dims = ctx->GetInputDim("Embeddings"); + PADDLE_ENFORCE_EQ(embeddings_dims.size(), 2, + "The rank of Input(Embeddings) should be 2."); + // PADDLE_ENFORCE_EQ(wx_dims[0], x_dims[1], + // "The first dimension of Input(Embeddings) " + // "should be %d.", + // x_dims[1]); + + auto wh_dims = ctx->GetInputDim("WeightH"); + int frame_size = wh_dims[1] / 4; + PADDLE_ENFORCE_EQ(wh_dims.size(), 2, + "The rank of Input(WeightH) should be 2."); + PADDLE_ENFORCE_EQ(wh_dims[0], frame_size, + "The first dimension of Input(WeightH) " + "should be %d.", + frame_size); + PADDLE_ENFORCE_EQ(wh_dims[1], 4 * frame_size, + "The second dimension of Input(WeightH) " + "should be 4 * %d.", + frame_size); + + auto b_dims = ctx->GetInputDim("Bias"); + PADDLE_ENFORCE_EQ(b_dims.size(), 2, "The rank of Input(Bias) should be 2."); + PADDLE_ENFORCE_EQ(b_dims[0], 1, + "The first dimension of Input(Bias) should be 1."); + PADDLE_ENFORCE_EQ( + b_dims[1], (ctx->Attrs().Get<bool>("use_peepholes") ? 7 : 4) * frame_size, + "The second dimension of Input(Bias) should be " + "7 * %d if enable peepholes connection or" + "4 * %d if disable peepholes", + frame_size, frame_size); + + framework::DDim out_dims({x_dims[0], frame_size}); + ctx->SetOutputDim("Hidden", out_dims); + ctx->SetOutputDim("Cell", out_dims); + ctx->ShareLoD("Ids", "Hidden"); + ctx->ShareLoD("Ids", "Cell"); + int xx_width; + if (ctx->Attrs().Get<bool>("use_seq")) { + xx_width = wh_dims[1]; + } else { + xx_width = x_dims[1] > wh_dims[1] ? wh_dims[1] : x_dims[1]; + PADDLE_ENFORCE(ctx->HasOutput("BatchedInput"), + "Assert only one Output(BatchedInput) of LSTM."); + PADDLE_ENFORCE(ctx->HasOutput("BatchedHidden"), + "Assert only one Output(BatchedHidden) of LSTM."); + PADDLE_ENFORCE(ctx->HasOutput("BatchedCell"), + "Assert only one Output(BatchedCell) of LSTM."); + PADDLE_ENFORCE(ctx->HasOutput("ReorderedH0"), + "Assert only one Output(ReorderedH0) of LSTM"); + PADDLE_ENFORCE(ctx->HasOutput("ReorderedC0"), + "Assert only one Output(ReorderedC0) of LSTM."); + ctx->SetOutputDim("BatchedInput", {x_dims[0], wh_dims[1]}); + ctx->SetOutputDim("BatchedHidden", out_dims); + ctx->SetOutputDim("BatchedCell", out_dims); + } + ctx->SetOutputDim("XX", {x_dims[0], xx_width}); + ctx->ShareLoD("Ids", "XX"); +} + +framework::OpKernelType FusedEmbeddingFCLSTMOp::GetExpectedKernelType( + const framework::ExecutionContext& ctx) const { + return framework::OpKernelType( + framework::ToDataType( + ctx.Input<framework::LoDTensor>("Embeddings")->type()), + ctx.device_context()); +} + +void FusedEmbeddingFCLSTMOpMaker::Make() { + AddInput("Ids", + "An input with type int32 or int64 " + "contains the ids to be looked up in W. " + "The last dimension size must be 1."); + AddInput("Embeddings", + "(Tensor) the learnable weights of X." + " - The shape is (M x 4D), where M is the dim size of x, D is the " + "hidden size. " + " - Weight = {W_cx, W_ix, W_fx, W_ox}"); + AddInput("WeightH", + "(Tensor) same as LSTMOp, the learnable hidden-hidden weights." + " - The shape is (D x 4D), where D is the hidden size. " + " - Weight = {W_ch, W_ih, W_fh, W_oh}"); + AddInput("Bias", + "(Tensor) the learnable weights. Almost same as LSTMOp" + "Note: we should add the fc bias into this (1x4D) in bias." + "input-hidden bias weight and peephole connections weight if " + "setting `use_peepholes` True. " + "1. `use_peepholes = False` " + " - The shape is (1 x 4D). " + " - Bias = {b_c, b_i, b_f, b_o}." + "2. `use_peepholes = True` " + " - The shape is (1 x 7D). " + " - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}."); + AddInput("H0", + "(Tensor, optional) (same as LSTMOp) the initial hidden state is an " + "optional " + "input. This is a tensor with shape (N x D), where N is the " + "batch size and D is the hidden size.") + .AsDispensable(); + AddInput("C0", + "(Tensor, optional) (same as LSTMOp) (the initial cell state is an " + "optional " + "input. This is a tensor with shape (N x D), where N is the " + "batch size. `H0` and `C0` can be NULL but only at the same time.") + .AsDispensable(); + AddOutput("Hidden", + "(LoDTensor) (same as LSTMOp) the hidden state of LSTM operator. " + "The shape is (T x D), and lod is the same with the `Input`."); + AddOutput("Cell", + "(LoDTensor) (same as LSTMOp) the cell state of LSTM operator. " + "The shape is (T x D), and lod is the same with the `Input`."); + AddOutput("XX", + "(LoDTensor) the result after X * WeightX (size is T x 4D)" + " or batched_X (size is T x M), this will be automatically chosen," + " where T is the total time steps in this mini-batch," + " D is the hidden size, M is the dim size of x input.") + .AsIntermediate(); + AddOutput("BatchedInput", "(LoDTensor) (T x 4D).").AsIntermediate(); + AddOutput("BatchedHidden", "(LoDTensor) (T x D).").AsIntermediate(); + AddOutput("BatchedCell", "(LoDTensor) (T x D).").AsIntermediate(); + AddOutput("ReorderedH0", "(LoDTensor) (N x D).").AsIntermediate(); + AddOutput("ReorderedC0", "(LoDTensor) (N x D).").AsIntermediate(); + AddAttr<bool>("use_peepholes", + "(bool, defalut: True) " + "whether to enable diagonal/peephole connections.") + .SetDefault(true); + AddAttr<bool>("is_reverse", + "(bool, defalut: False) " + "whether to compute reversed LSTM.") + .SetDefault(false); + AddAttr<bool>("use_seq", + "(bool, defalut: True) " + "whether to use seq mode to compute.") + .SetDefault(true); + AddAttr<std::string>("gate_activation", + "(string, default: sigmoid)" + "The activation for input gate, forget gate and output " + "gate, `sigmoid` by default.") + .SetDefault("sigmoid") + .InEnum({"sigmoid", "tanh", "relu", "identity"}); + AddAttr<std::string>("cell_activation", + "(string, default: tanh)" + "The activation for cell output, `tanh` by defalut.") + .SetDefault("tanh") + .InEnum({"sigmoid", "tanh", "relu", "identity"}); + AddAttr<std::string>("candidate_activation", + "(string, default: tanh)" + "The activation for candidate hidden state, " + "`tanh` by default.") + .SetDefault("tanh") + .InEnum({"sigmoid", "tanh", "relu", "identity"}); + AddComment(R"DOC( +Fusion Long-Short Term Memory (LSTM) Operator. +This operator fuse the X into LSTM, more details can refer to LSTM op. +)DOC"); +} + +template <typename T> +class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> { + public: +#define INIT_VEC_FUNC \ + std::function<void(const int, const T *, T *)> act_gate, act_cell, act_cand; \ + auto& act_gate_str = ctx.Attr<std::string>("gate_activation"); \ + auto& act_cell_str = ctx.Attr<std::string>("cell_activation"); \ + auto& act_cand_str = ctx.Attr<std::string>("candidate_activation"); \ + if (platform::jit::MayIUse(platform::jit::avx)) { \ + math::VecActivations<T, platform::jit::avx> act_functor; \ + act_gate = act_functor(act_gate_str); \ + act_cell = act_functor(act_cell_str); \ + act_cand = act_functor(act_cand_str); \ + } else { \ + math::VecActivations<T, platform::jit::isa_any> act_functor; \ + act_gate = act_functor(act_gate_str); \ + act_cell = act_functor(act_cell_str); \ + act_cand = act_functor(act_cand_str); \ + } + +#define INIT_BASE_INPUT_OUTPUT \ + auto* ids = ctx.Input<LoDTensor>("Ids"); \ + auto* h0 = ctx.Input<Tensor>("H0"); \ + auto* c0 = ctx.Input<Tensor>("C0"); \ + auto* embeddings = ctx.Input<Tensor>("Embeddings"); \ + auto* wh = ctx.Input<Tensor>("WeightH"); \ + auto* bias = ctx.Input<Tensor>("Bias"); \ + auto* xx = ctx.Output<LoDTensor>("XX"); \ + auto* hidden_out = ctx.Output<LoDTensor>("Hidden"); \ + auto* cell_out = ctx.Output<LoDTensor>("Cell"); \ + bool is_reverse = ctx.Attr<bool>("is_reverse"); \ + bool use_peepholes = ctx.Attr<bool>("use_peepholes"); + +#define INIT_BASE_SIZES \ + auto ids_dims = ids->dims(); /* T x M*/ \ + auto ids_numel = ids->numel(); /* T x 1*/ \ + auto wh_dims = wh->dims(); /* D x 4D*/ \ + const int D = wh_dims[0]; \ + const int D2 = D * 2; \ + const int D3 = D * 3; \ + int64_t row_number = embeddings->dims()[0]; \ + int64_t row_width = embeddings->dims()[1]; \ + const int D4 = wh_dims[1]; + +#define INIT_BASE_INPUT_DATAS \ + const int64_t* ids_data = ids->data<int64_t>(); \ + const T* embeddings_data = embeddings->data<T>(); \ + const T* wh_data = wh->data<T>(); \ + /* diagonal weight*/ \ + const T* wc_data = bias->data<T>() + D4; \ + /* for peephole only*/ \ + Tensor checked_cell; \ + T* checked_cell_data = nullptr; \ + auto place = ctx.GetPlace(); \ + if (use_peepholes) { \ + /* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/ \ + checked_cell_data = checked_cell.mutable_data<T>({2, D}, place); \ + } + +/// Compute LSTM +#define GEMM_WH_ADDON(bs, prev, out) \ + blas.GEMM(CblasNoTrans, CblasNoTrans, bs, D4, D, static_cast<T>(1), prev, D, \ + wh_data, D4, static_cast<T>(1), out, D4) + +// gates: W_ch, W_ih, W_fh, W_oh +#define GET_Ct(ct_1, gates, ct) \ + /* C_t = C_t-1 * fgated + cand_gated * igated*/ \ + act_cand(D, gates, gates); \ + blas.VMUL(D, gates, gates + D, gates + D); \ + blas.VMUL(D, ct_1, gates + D2, gates + D2); \ + blas.VADD(D, gates + D, gates + D2, ct) + +#define GET_Ht(ct, gates, ht) \ + /* H_t = act_cell(C_t) * ogated */ \ + act_cell(D, ct, gates + D2); \ + blas.VMUL(D, gates + D2, gates + D3, ht) + +#define GET_Ct_NOH0C0(gates, ct) \ + /* C_t = igated * cgated*/ \ + act_gate(D, gates + D, gates + D); \ + act_cand(D, gates, gates); \ + blas.VMUL(D, gates, gates + D, ct) + +#define COMPUTE_CtHt_NOH0C0(gates, ct, ht) \ + GET_Ct_NOH0C0(gates, ct); \ + act_gate(D, gates + D3, gates + D3); \ + GET_Ht(ct, gates, ht) + +#define COMPUTE_CtHt_PEEPHOLE_NOH0C0(gates, ct, ht) \ + GET_Ct_NOH0C0(gates, ct); \ + /* get outgated, put W_oc * C_t on igated */ \ + blas.VMUL(D, wc_data + D2, ct, gates + D); \ + blas.VADD(D, gates + D, gates + D3, gates + D3); \ + act_gate(D, gates + D3, gates + D3); \ + GET_Ht(ct, gates, ht) + +#define COMPUTE_CtHt(gates, ct_1, ct, ht) \ + act_gate(D3, gates + D, gates + D); \ + GET_Ct(ct_1, gates, ct); \ + GET_Ht(ct, gates, ht) + +#define COMPUTE_CtHt_PEEPHOLE(gates, ct_1, ct, ht) \ + /* get fgated and igated*/ \ + blas.VMUL(D, wc_data, ct_1, checked_cell_data); \ + blas.VMUL(D, wc_data + D, ct_1, checked_cell_data + D); \ + blas.VADD(D2, checked_cell_data, gates + D, gates + D); \ + act_gate(D2, gates + D, gates + D); \ + GET_Ct(ct_1, gates, ct); \ + /* get ogated*/ \ + blas.VMUL(D, wc_data + D2, ct, gates + D); \ + blas.VADD(D, gates + D, gates + D3, gates + D3); \ + act_gate(D, gates + D3, gates + D3); \ + GET_Ht(ct, gates, ht) + + void SeqCompute(const framework::ExecutionContext& ctx) const { + using DeviceContext = paddle::platform::CPUDeviceContext; + INIT_BASE_INPUT_OUTPUT + INIT_BASE_SIZES + INIT_VEC_FUNC + INIT_BASE_INPUT_DATAS + + // std::cout << "====> SeqCompute" << std::endl; + auto ids_lod = ids->lod(); + const int total_T = ids_dims[0]; + const int N = ids_lod[0].size() - 1; + const T* h0_data = h0 ? h0->data<T>() : nullptr; + const T* c0_data = c0 ? c0->data<T>() : nullptr; + T* xx_data = xx->mutable_data<T>(place); + T* h_out_data = hidden_out->mutable_data<T>(place); + T* c_out_data = cell_out->mutable_data<T>(place); + auto blas = math::GetBlas<DeviceContext, T>(ctx); + + for (int64_t i = 0; i < ids_numel; ++i) { + PADDLE_ENFORCE_LT(ids_data[i], row_number); + PADDLE_ENFORCE_GE(ids_data[i], 0, "ids %d", i); + memcpy(xx_data + i * row_width, embeddings_data + ids_data[i] * row_width, + row_width * sizeof(T)); + } + + int xx_offset = D4; + int gate_offset = D; + if (is_reverse) { + const int offset = (total_T - 1) * D; + xx_data = xx_data + offset * 4; + h_out_data = h_out_data + offset; + c_out_data = c_out_data + offset; + xx_offset = -D4; + gate_offset = -D; + } + +#define MOVE_ONE_STEP \ + prev_h_data = h_out_data; \ + prev_c_data = c_out_data; \ + xx_data = xx_data + xx_offset; \ + h_out_data = h_out_data + gate_offset; \ + c_out_data = c_out_data + gate_offset + +#define PROCESS_H0C0_DEFINES \ + int bid = is_reverse ? N - 1 - i : i; \ + int seq_len = ids_lod[0][bid + 1] - ids_lod[0][bid]; \ + const T* prev_c_data = nullptr; \ + const T* prev_h_data = nullptr; \ + int tstart = 0 + +#define PROCESS_H0C0_PEEPHOLE \ + PROCESS_H0C0_DEFINES; \ + if (h0_data) { \ + prev_h_data = h0_data + bid * D; \ + prev_c_data = c0_data + bid * D; \ + } else { \ + COMPUTE_CtHt_PEEPHOLE_NOH0C0(xx_data, c_out_data, h_out_data); \ + MOVE_ONE_STEP; \ + tstart = 1; \ + } + +#define PROCESS_H0C0 \ + PROCESS_H0C0_DEFINES; \ + if (h0_data) { \ + prev_h_data = h0_data + bid * D; \ + prev_c_data = c0_data + bid * D; \ + } else { \ + COMPUTE_CtHt_NOH0C0(xx_data, c_out_data, h_out_data); \ + MOVE_ONE_STEP; \ + tstart = 1; \ + } + + if (use_peepholes) { + for (int i = 0; i < N; ++i) { + PROCESS_H0C0_PEEPHOLE + for (int step = tstart; step < seq_len; ++step) { + GEMM_WH_ADDON(1, prev_h_data, xx_data); + COMPUTE_CtHt_PEEPHOLE(xx_data, prev_c_data, c_out_data, h_out_data); + MOVE_ONE_STEP; + } + } + } else { + for (int i = 0; i < N; ++i) { + PROCESS_H0C0 + for (int step = tstart; step < seq_len; ++step) { + GEMM_WH_ADDON(1, prev_h_data, xx_data); + COMPUTE_CtHt(xx_data, prev_c_data, c_out_data, h_out_data); + MOVE_ONE_STEP; + } + } + } +#undef PROCESS_H0C0_DEFINES +#undef PROCESS_H0C0_PEEPHOLE +#undef PROCESS_H0C0 +#undef MOVE_ONE_STEP + } + + void BatchCompute(const framework::ExecutionContext& ctx) const { + using DeviceContext = platform::CPUDeviceContext; + INIT_BASE_INPUT_OUTPUT + if (ids->lod()[0].size() == 2) { + SeqCompute(ctx); + return; + } + INIT_BASE_SIZES + INIT_VEC_FUNC + INIT_BASE_INPUT_DATAS + + // std::cout << "===> Batch Compute" << std::endl; + + auto* reordered_h0 = ctx.Output<Tensor>("ReorderedH0"); + auto* reordered_c0 = ctx.Output<Tensor>("ReorderedC0"); + auto* batched_input = ctx.Output<LoDTensor>("BatchedInput"); + auto* batched_c_out = ctx.Output<LoDTensor>("BatchedCell"); + auto* batched_h_out = ctx.Output<LoDTensor>("BatchedHidden"); + T* xx_data = xx->mutable_data<T>(place); + T* batched_input_data = batched_input->mutable_data<T>(place); + T* batched_c_out_data = batched_c_out->mutable_data<T>(place); + T* batched_h_out_data = batched_h_out->mutable_data<T>(place); + hidden_out->mutable_data<T>(place); + cell_out->mutable_data<T>(place); + + math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch; + auto& dev_ctx = ctx.template device_context<DeviceContext>(); + auto blas = math::GetBlas<DeviceContext, T>(dev_ctx); + + for (int64_t i = 0; i < ids_numel; ++i) { + PADDLE_ENFORCE_LT(ids_data[i], row_number); + PADDLE_ENFORCE_GE(ids_data[i], 0, "ids %d", i); + memcpy(xx_data + i * row_width, embeddings_data + ids_data[i] * row_width, + row_width * sizeof(T)); + } + + to_batch(dev_ctx, *xx, batched_input, true, is_reverse); + + auto batched_lod = batched_input->lod(); + const auto& seq_order = batched_lod[2]; + const int max_bs = seq_order.size(); + reordered_h0->Resize({max_bs, D}); + reordered_c0->Resize({max_bs, D}); + + int tstart = 0; + T* prev_h_data = nullptr; + T* prev_c_data = nullptr; + if (h0) { + // reorder h0, c0 + T* reordered_h0_data = reordered_h0->mutable_data<T>(place); + T* reordered_c0_data = reordered_c0->mutable_data<T>(place); + const T* h0_data = h0->data<T>(); + const T* c0_data = c0->data<T>(); + prev_h_data = reordered_h0_data; + prev_c_data = reordered_c0_data; + size_t sz = sizeof(T) * D; + for (int i = 0; i < max_bs; ++i) { + std::memcpy(reordered_h0_data, h0_data + seq_order[i] * D, sz); + std::memcpy(reordered_c0_data, c0_data + seq_order[i] * D, sz); + reordered_h0_data += D; + reordered_c0_data += D; + } + } else { + // compute without h0, c0 + T* cur_in_data = batched_input_data; + T* cur_h_out_data = batched_h_out_data; + T* cur_c_out_data = batched_c_out_data; + for (int i = 0; i < max_bs; ++i) { + GET_Ct_NOH0C0(cur_in_data, cur_c_out_data); + if (use_peepholes) { + blas.VMUL(D, wc_data + D2, cur_c_out_data, cur_in_data + D); + blas.VADD(D, cur_in_data + D, cur_in_data + D3, cur_in_data + D3); + } + act_gate(D, cur_in_data + D3, cur_in_data + D3); + GET_Ht(cur_c_out_data, cur_in_data, cur_h_out_data); + cur_in_data += D4; + cur_c_out_data += D; + cur_h_out_data += D; + } + tstart = 1; + prev_h_data = batched_h_out_data; + prev_c_data = batched_c_out_data; + } + const auto& batch_starts = batched_lod[0]; + const int max_seq_len = batch_starts.size() - 1; + const int offset = tstart * max_bs * D; + batched_input_data = batched_input_data + offset * 4; + batched_h_out_data = batched_h_out_data + offset; + batched_c_out_data = batched_c_out_data + offset; + +#define DEFINE_CUR \ + T* cur_in_data = batched_input_data; \ + T* cur_prev_c_data = prev_c_data; \ + T* cur_c_out_data = batched_c_out_data; \ + T* cur_h_out_data = batched_h_out_data + +#define MOVE_ONE_BATCH \ + cur_in_data += D4; \ + cur_prev_c_data += D; \ + cur_c_out_data += D; \ + cur_h_out_data += D + +#define MOVE_ONE_STEP \ + prev_c_data = batched_c_out_data; \ + prev_h_data = batched_h_out_data; \ + batched_c_out_data = cur_c_out_data; \ + batched_h_out_data = cur_h_out_data; \ + batched_input_data = cur_in_data + + if (use_peepholes) { + for (int step = tstart; step < max_seq_len; ++step) { + const int cur_bs = batch_starts[step + 1] - batch_starts[step]; + GEMM_WH_ADDON(cur_bs, prev_h_data, batched_input_data); + DEFINE_CUR; + for (int i = 0; i < cur_bs; ++i) { + COMPUTE_CtHt_PEEPHOLE(cur_in_data, cur_prev_c_data, cur_c_out_data, + cur_h_out_data); + MOVE_ONE_BATCH; + } + MOVE_ONE_STEP; + } + } else { + for (int step = tstart; step < max_seq_len; ++step) { + const int cur_bs = batch_starts[step + 1] - batch_starts[step]; + GEMM_WH_ADDON(cur_bs, prev_h_data, batched_input_data); + DEFINE_CUR; + for (int i = 0; i < cur_bs; ++i) { + COMPUTE_CtHt(cur_in_data, cur_prev_c_data, cur_c_out_data, + cur_h_out_data); + MOVE_ONE_BATCH; + } + MOVE_ONE_STEP; + } + } +#undef MOVE_ONE_STEP +#undef MOVE_ONE_BATCH +#undef DEFINE_CUR + + math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq; + batched_h_out->set_lod(batched_lod); + to_seq(dev_ctx, *batched_h_out, hidden_out); + batched_c_out->set_lod(batched_lod); + to_seq(dev_ctx, *batched_c_out, cell_out); + } + + void Compute(const framework::ExecutionContext& ctx) const override { + if (ctx.Attr<bool>("use_seq")) { + SeqCompute(ctx); + } else { + BatchCompute(ctx); + } + } + +#undef COMPUTE_CtHt_PEEPHOLE +#undef COMPUTE_CtHt +#undef GET_Ct_NOH0C0 +#undef COMPUTE_CtHt_NOH0C0 +#undef COMPUTE_CtHt_PEEPHOLE_NOH0C0 +#undef GET_Ht +#undef GET_Ct +#undef GEMM_WH_ADDON +#undef INIT_BASE_INPUT_DATAS +#undef INIT_BASE_SIZES +#undef INIT_BASE_INPUT_OUTPUT +#undef INIT_VEC_FUNC +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(fused_embedding_fc_lstm, ops::FusedEmbeddingFCLSTMOp, + ops::FusedEmbeddingFCLSTMOpMaker, + paddle::framework::DefaultGradOpDescMaker<true>); + +REGISTER_OP_CPU_KERNEL(fused_embedding_fc_lstm, + ops::FusedEmbeddingFCLSTMKernel<float>, + ops::FusedEmbeddingFCLSTMKernel<double>); diff --git a/paddle/fluid/operators/fused_embedding_fc_lstm_op.h b/paddle/fluid/operators/fused_embedding_fc_lstm_op.h new file mode 100644 index 0000000000..2775b2ac04 --- /dev/null +++ b/paddle/fluid/operators/fused_embedding_fc_lstm_op.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using LoDTensor = framework::LoDTensor; +using Tensor = framework::Tensor; + +class FusedEmbeddingFCLSTMOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + +class FusedEmbeddingFCLSTMOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override; +}; + +} // namespace operators +} // namespace paddle From d5114c60b098a3c5f778d48b70d0683b093b49db Mon Sep 17 00:00:00 2001 From: Jacek Czaja <jacek.czaja@intel.com> Date: Tue, 25 Sep 2018 11:00:30 +0200 Subject: [PATCH 11/67] - Reviewers suggesstions to fused_embedding_fc_lstm_op --- .../fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc | 11 ++++++----- paddle/fluid/operators/fused_embedding_fc_lstm_op.cc | 4 ---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc index 38495125c3..af3f23cbf9 100644 --- a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h" +#include <algorithm> #include <string> #include "paddle/fluid/framework/lod_tensor.h" @@ -98,17 +99,17 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, // Copy only gate biases values (only actual bias data, not peephole // weights) - std::vector<float> combined_biases(n, 0.0f); - memcpy(&combined_biases[0], lstm_bias_tensor.data<float>(), - n * sizeof(float)); + std::vector<float> combined_biases; + combined_biases.reserve(n); + std::copy_n(lstm_bias_tensor.data<float>(), n, + std::back_inserter(combined_biases)); if (with_fc_bias) { // Add FC-bias with LSTM-bias (into GEMM result to be) auto* fc_bias_var = scope->FindVar(fc_bias->Name()); const auto& fc_bias_tensor = fc_bias_var->Get<framework::LoDTensor>(); for (int i = 0; i < fc_bias_tensor.numel(); i++) { - combined_biases[i] = - lstm_bias_tensor.data<float>()[i] + fc_bias_tensor.data<float>()[i]; + combined_biases[i] += fc_bias_tensor.data<float>()[i]; } } diff --git a/paddle/fluid/operators/fused_embedding_fc_lstm_op.cc b/paddle/fluid/operators/fused_embedding_fc_lstm_op.cc index 3c4cc77452..0b917a4036 100644 --- a/paddle/fluid/operators/fused_embedding_fc_lstm_op.cc +++ b/paddle/fluid/operators/fused_embedding_fc_lstm_op.cc @@ -63,10 +63,6 @@ void FusedEmbeddingFCLSTMOp::InferShape( auto embeddings_dims = ctx->GetInputDim("Embeddings"); PADDLE_ENFORCE_EQ(embeddings_dims.size(), 2, "The rank of Input(Embeddings) should be 2."); - // PADDLE_ENFORCE_EQ(wx_dims[0], x_dims[1], - // "The first dimension of Input(Embeddings) " - // "should be %d.", - // x_dims[1]); auto wh_dims = ctx->GetInputDim("WeightH"); int frame_size = wh_dims[1] / 4; From 7aa0247bd13dbf016a5a7ed6ff14eb2fb841772f Mon Sep 17 00:00:00 2001 From: minqiyang <minqiyang@baidu.com> Date: Thu, 27 Sep 2018 19:25:16 +0800 Subject: [PATCH 12/67] Regenerate API.spec test=develop --- paddle/fluid/API.spec | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index e7f710bf2d..e4a84535d4 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -23,7 +23,7 @@ paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wai paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')) paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0)) paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.DistributeTranspilerConfig.__init__ +paddle.fluid.DistributeTranspilerConfig.__init__ paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)) paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)) paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ExecutionStrategy) -> None @@ -153,12 +153,6 @@ paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_ paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) -paddle.fluid.layers.logical_and ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.logical_or ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.logical_xor ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.logical_not ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0)) paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype', 'use_mkldnn'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32', False)) paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')) @@ -166,6 +160,12 @@ paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shap paddle.fluid.layers.sum ArgSpec(args=['x', 'use_mkldnn'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None) +paddle.fluid.layers.logical_and ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.layers.logical_or ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.layers.logical_xor ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.layers.logical_not ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None) @@ -231,19 +231,6 @@ paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords= paddle.fluid.layers.mean ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.mul ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.gaussian_random ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.sampling_id ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.sum ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.slice ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.shape ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.clip ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.clip_by_norm ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.logical_or ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.logical_xor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.logical_not ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) @@ -326,7 +313,7 @@ paddle.fluid.transpiler.HashName.reset ArgSpec(args=['self'], varargs=None, keyw paddle.fluid.transpiler.RoundRobin.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.RoundRobin.dispatch ArgSpec(args=['self', 'varlist'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.RoundRobin.reset ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.transpiler.DistributeTranspilerConfig.__init__ +paddle.fluid.transpiler.DistributeTranspilerConfig.__init__ paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn', 'use_mkldnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True, False)) paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max')) paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)) From 301af73ea9fe7e38af98d24607f3b909406fd073 Mon Sep 17 00:00:00 2001 From: minqiyang <minqiyang@baidu.com> Date: Thu, 27 Sep 2018 19:49:56 +0800 Subject: [PATCH 13/67] Port Ubuntu to support python 3.5 --- Dockerfile | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 634be18a51..a2394e16c2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,6 +25,7 @@ COPY ./paddle/scripts/docker/root/ /root/ RUN apt-get update && \ apt-get install -y --allow-downgrades patchelf \ git python-pip python-dev python-opencv openssh-server bison \ + python3 python3-dev python3-pip \ libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ @@ -73,22 +74,32 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8 RUN easy_install -U pip && \ pip install -U wheel && \ pip install -U docopt PyYAML sphinx==1.5.6 && \ - pip install sphinx-rtd-theme==0.1.9 recommonmark + pip install sphinx-rtd-theme==0.1.9 recommonmark && \ + pip3 install --upgrade pip && \ + pip3 install -U wheel && \ + pip3 install -U docopt PyYAML sphinx==1.5.6 && \ + pip3 install sphinx-rtd-theme==0.1.9 recommonmark RUN pip install pre-commit 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ - pip install opencv-python + pip install opencv-python && \ + pip3 install pre-commit 'ipython==5.3.0' && \ + pip3 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ + pip3 install opencv-python #For docstring checker RUN pip install pylint pytest astroid isort LinkChecker +RUN pip3 install pylint pytest astroid isort COPY ./python/requirements.txt /root/ RUN pip install -r /root/requirements.txt +RUN pip3 install -r /root/requirements.txt # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 RUN apt-get install -y libssl-dev libffi-dev RUN pip install certifi urllib3[secure] +RUN pip3 install certifi urllib3[secure] # Install woboq_codebrowser to /woboq From db790fff7a479a677bae6b0f0d4535de3f0feeee Mon Sep 17 00:00:00 2001 From: minqiyang <minqiyang@baidu.com> Date: Thu, 27 Sep 2018 19:53:38 +0800 Subject: [PATCH 14/67] Activate test test=develop --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a2394e16c2..3affe41016 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,7 +25,7 @@ COPY ./paddle/scripts/docker/root/ /root/ RUN apt-get update && \ apt-get install -y --allow-downgrades patchelf \ git python-pip python-dev python-opencv openssh-server bison \ - python3 python3-dev python3-pip \ + python3 python3-pip python3-dev \ libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ From 910cd415f2147291f5cee83c103c1a1bd84e982f Mon Sep 17 00:00:00 2001 From: Jacek Czaja <jacek.czaja@intel.com> Date: Thu, 27 Sep 2018 14:01:11 +0200 Subject: [PATCH 15/67] - Disabled embedding_fc_lstm_fuse by defult and extended test_text_classification ot use new op --- paddle/fluid/inference/api/paddle_inference_api.h | 2 +- .../api/analyzer_text_classification_tester.cc | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 984358b2bd..77b04bb6f5 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -216,7 +216,7 @@ struct AnalysisConfig : public NativeConfig { bool enable_ir_optim = true; // Manually determine the IR passes to run. IrPassMode ir_mode{IrPassMode::kExclude}; - std::vector<std::string> ir_passes; + std::vector<std::string> ir_passes{"embedding_fc_lstm_fuse_pass"}; // NOTE this is just for internal development, please not use it. bool _use_mkldnn{false}; diff --git a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc index 340ef152f0..ca19475bda 100644 --- a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc @@ -104,5 +104,18 @@ TEST(Analyzer_Text_Classification, compare) { CompareNativeAndAnalysis(cfg, input_slots_all); } +TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) { + AnalysisConfig cfg; + SetConfig(&cfg); + // Enable embedding_fc_lstm_fuse_pass (disabled by default) + auto it = std::find(cfg.ir_passes.begin(), cfg.ir_passes.end(), + "embedding_fc_lstm_fuse_pass"); + if (it != cfg.ir_passes.end()) cfg.ir_passes.erase(it); + + std::vector<std::vector<PaddleTensor>> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); +} + } // namespace inference } // namespace paddle From 4a7b9f78332183521b230be21a8814acc3baca95 Mon Sep 17 00:00:00 2001 From: velconia <velconias@gmail.com> Date: Fri, 28 Sep 2018 13:00:07 +0800 Subject: [PATCH 16/67] Fix pip install in mac test=develop --- paddle/scripts/paddle_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 838d5dc869..b01bbd2e14 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -395,7 +395,7 @@ EOF ctest --output-on-failure -j8 # make install should also be test when unittest make install -j 8 - pip install /usr/local/opt/paddle/share/wheels/*.whl + pip install ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]] ; then paddle version fi From d24f1f0aa4da9497d158b5a983565a4683a02207 Mon Sep 17 00:00:00 2001 From: Xin Pan <panxin.grad@gmail.com> Date: Fri, 28 Sep 2018 14:52:00 +0800 Subject: [PATCH 17/67] Current scope needs to be thread-safe for training scope's API modifies its internal state. And scope's API can be called from multiple threads during traing. Hence, we need locks to protect the scope's internal states. We can optimize it in the future. But the current solution is buggy. test=develop --- paddle/fluid/framework/scope.cc | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 40dee143f5..1a727a2c8c 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -20,13 +20,6 @@ limitations under the License. */ #include "paddle/fluid/framework/threadpool.h" #include "paddle/fluid/string/printf.h" -// The mutex is not needed by training and inference, only for distribution. -#if PADDLE_WITH_DISTRIBUTE -#define WITH_LOCK 1 -#else -#define WITH_LOCK 0 -#endif - DEFINE_bool(benchmark, false, "Doing memory benchmark. It will make deleting scope synchronized, " "and add some memory usage logs." @@ -56,24 +49,18 @@ int64_t GetEagerDeletionThreshold() { Scope::~Scope() { DropKids(); } Scope& Scope::NewScope() const { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif kids_.push_back(new Scope(this)); return *kids_.back(); } Variable* Scope::Var(const std::string& name) { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif return VarInternal(name); } Variable* Scope::Var(std::string* name) { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif auto new_name = string::Sprintf("%p.%d", this, vars_.size()); if (name != nullptr) { *name = new_name; @@ -82,39 +69,29 @@ Variable* Scope::Var(std::string* name) { } Variable* Scope::FindVar(const std::string& name) const { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif return FindVarInternal(name); } const Scope* Scope::FindScope(const Variable* var) const { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif return FindScopeInternal(var); } void Scope::DropKids() { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif for (Scope* s : kids_) delete s; kids_.clear(); } bool Scope::HasKid(const Scope* scope) const { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); return it != this->kids_.end(); } std::vector<std::string> Scope::LocalVarNames() const { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif std::vector<std::string> known_vars; known_vars.reserve(this->vars_.size()); for (auto& p : vars_) { @@ -124,9 +101,7 @@ std::vector<std::string> Scope::LocalVarNames() const { } void Scope::DeleteScope(Scope* scope) const { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); this->kids_.erase(it); @@ -139,9 +114,7 @@ void Scope::DeleteScope(Scope* scope) const { } void Scope::EraseVars(const std::vector<std::string>& var_names) { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif std::set<std::string> var_set(var_names.begin(), var_names.end()); for (auto it = vars_.begin(); it != vars_.end();) { if (var_set.find(it->first) != var_set.end()) { @@ -154,16 +127,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) { void Scope::Rename(const std::string& origin_name, const std::string& new_name) const { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif RenameInternal(origin_name, new_name); } std::string Scope::Rename(const std::string& origin_name) const { -#if WITH_LOCK std::unique_lock<std::mutex> lock(mutex_); -#endif auto new_name = string::Sprintf("%p.%d", this, vars_.size()); RenameInternal(origin_name, new_name); return new_name; From 1dcd6ee532258e55d4c21ca2f2fe9e9039c3d87e Mon Sep 17 00:00:00 2001 From: Tao Luo <luotao02@baidu.com> Date: Fri, 28 Sep 2018 15:18:17 +0800 Subject: [PATCH 18/67] add resnet50 inference UT --- .../fluid/inference/tests/api/CMakeLists.txt | 8 ++ .../tests/api/analyzer_resnet50_tester.cc | 92 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 70f9e397c9..c3dd1f4336 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -70,6 +70,14 @@ if (NOT EXISTS ${OCR_INSTALL_DIR}) endif() inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc) +# resnet50 +set(RESNET50_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/resnet50") +if (NOT EXISTS ${RESNET50_INSTALL_DIR}) + inference_download_and_uncompress(${RESNET50_INSTALL_DIR} ${INFERENCE_URL} "resnet50_model.tar.gz") +endif() +inference_analysis_test(test_analyzer_resnet50 SRCS analyzer_resnet50_tester.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${RESNET50_INSTALL_DIR}/model) + # anakin if (WITH_ANAKIN AND WITH_MKL) # only needed in CI # anakin rnn1 diff --git a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc new file mode 100644 index 0000000000..0dda7f64ba --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc @@ -0,0 +1,92 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include <fstream> +#include <iostream> +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +void SetConfig(AnalysisConfig *cfg) { + cfg->param_file = FLAGS_infer_model + "/params"; + cfg->prog_file = FLAGS_infer_model + "/model"; + cfg->use_gpu = false; + cfg->device = 0; + cfg->enable_ir_optim = true; + cfg->specify_input_name = true; +} + +void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) { + PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data."); + + PaddleTensor input; + // channel=3, height/width=318 + std::vector<int> shape({FLAGS_batch_size, 3, 318, 318}); + input.shape = shape; + input.dtype = PaddleDType::FLOAT32; + + // fill input data, for profile easily, do not use random data here. + size_t size = FLAGS_batch_size * 3 * 318 * 318; + input.data.Resize(size * sizeof(float)); + float *input_data = static_cast<float *>(input.data.data()); + for (size_t i = 0; i < size; i++) { + *(input_data + i) = static_cast<float>(i) / size; + } + + std::vector<PaddleTensor> input_slots; + input_slots.assign({input}); + (*inputs).emplace_back(input_slots); +} + +// Easy for profiling independently. +TEST(Analyzer_resnet50, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector<PaddleTensor> outputs; + + std::vector<std::vector<PaddleTensor>> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); + + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + PADDLE_ENFORCE_EQ(outputs.size(), 1UL); + size_t size = GetSize(outputs[0]); + // output is a 512-dimension feature + EXPECT_EQ(size, 512 * FLAGS_batch_size); + } +} + +// Check the fuse status +TEST(Analyzer_resnet50, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + GetFuseStatis(cfg, &num_ops); +} + +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_resnet50, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector<std::vector<PaddleTensor>> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle From 1df69f7c9dc53e317babc32d0d91842a11fedd97 Mon Sep 17 00:00:00 2001 From: Jacek Czaja <jacek.czaja@intel.com> Date: Fri, 28 Sep 2018 09:42:13 +0200 Subject: [PATCH 19/67] - Fix to comment test=develop --- paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc index af3f23cbf9..b155da375f 100644 --- a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc @@ -199,7 +199,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, embedding_lstm_creator(lookup_table, W, lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out, fc_bias); // Remove unneeded nodes. - // TODO(jczaja): Proper removing of loopup table + // TODO(jczaja): Proper removing of lookup table std::unordered_set<const Node*> marked_nodes( //{lookup_table, mul, lstm, elementwise_add, fc_bias, W}); {mul, lstm, elementwise_add, fc_bias}); @@ -209,7 +209,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, embedding_lstm_creator(lookup_table, W, lstm, subgraph.at(x), w, Weight, Bias, Hidden, Cell, fc_out, nullptr); // Remove unneeded nodes. - // TODO(jczaja): Proper removing of loopup table + // TODO(jczaja): Proper removing of lookup table // std::unordered_set<const Node*> marked_nodes({lookup_table, W, mul, // lstm}); std::unordered_set<const Node*> marked_nodes({mul, lstm}); From 9ae5baebfa3939a3af07a3e4338a34bb5667c993 Mon Sep 17 00:00:00 2001 From: JiabinYang <marsyang199376@gmail.com> Date: Fri, 28 Sep 2018 07:52:24 +0000 Subject: [PATCH 20/67] test=develop --- paddle/legacy/trainer/tests/CMakeLists.txt | 6 +++++- .../recognize_digits/CMakeLists.txt | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/paddle/legacy/trainer/tests/CMakeLists.txt b/paddle/legacy/trainer/tests/CMakeLists.txt index 08548bea4c..fbefcced56 100644 --- a/paddle/legacy/trainer/tests/CMakeLists.txt +++ b/paddle/legacy/trainer/tests/CMakeLists.txt @@ -16,7 +16,11 @@ endfunction() trainer_test(test_Compare) trainer_test(test_PyDataProviderWrapper) trainer_test(test_recurrent_machine_generation) -trainer_test(test_Trainer) +if(NOT APPLE) + trainer_test(test_Trainer) +else() + message(WARNING "These tests has been disabled in OSX for random fail: \n test_Trainer") +endif() ############### test_TrainerOnePass ########################## if(WITH_PYTHON) diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/CMakeLists.txt index 673c965b66..ad056aaa7b 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/CMakeLists.txt @@ -2,6 +2,16 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") # default test -foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) -endforeach() +if(NOT APPLE) + foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) + endforeach() +else() + foreach(src ${TEST_OPS}) + if(${src} STREQUAL "test_recognize_digits_conv") + message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src}) + else() + py_test(${src} SRCS ${src}.py) + endif() + endforeach() +endif() <!DOCTYPE html> <html lang="en-US" class="theme-auto"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <title>Internal Server Error - Paddle - Educoder: Git with Educoder</title> <link rel="manifest" href="data:application/json;base64,eyJuYW1lIjoiRWR1Y29kZXI6IEdpdCB3aXRoIEVkdWNvZGVyIiwic2hvcnRfbmFtZSI6IkVkdWNvZGVyOiBHaXQgd2l0aCBFZHVjb2RlciIsInN0YXJ0X3VybCI6Imh0dHBzOi8vdGVzdGdpdGVhLmVkdWNvZGVyLm5ldC8iLCJpY29ucyI6W3sic3JjIjoiaHR0cHM6Ly90ZXN0Z2l0ZWEuZWR1Y29kZXIubmV0L2Fzc2V0cy9pbWcvbG9nby5wbmciLCJ0eXBlIjoiaW1hZ2UvcG5nIiwic2l6ZXMiOiI1MTJ4NTEyIn0seyJzcmMiOiJodHRwczovL3Rlc3RnaXRlYS5lZHVjb2Rlci5uZXQvYXNzZXRzL2ltZy9sb2dvLnN2ZyIsInR5cGUiOiJpbWFnZS9zdmcreG1sIiwic2l6ZXMiOiI1MTJ4NTEyIn1dfQ=="> <meta name="theme-color" content="#6cc644"> <meta name="default-theme" content="auto"> <meta name="author" content="m53297601"> <meta name="description" content="Paddle"> <meta name="keywords" content="go,git,self-hosted,gitea"> <meta name="referrer" content="no-referrer"> <link rel="icon" href="/assets/img/favicon.svg" type="image/svg+xml"> <link rel="alternate icon" href="/assets/img/favicon.png" type="image/png"> <link rel="stylesheet" href="/assets/css/index.css?v=development"> <script> window.addEventListener('error', function(e) {window._globalHandlerErrors=window._globalHandlerErrors||[]; window._globalHandlerErrors.push(e);}); window.config = { appUrl: 'https:\/\/testgitea.educoder.net\/', appSubUrl: '', assetVersionEncoded: encodeURIComponent('development'), assetUrlPrefix: '\/assets', runModeIsProd: true , customEmojis: {"codeberg":":codeberg:","git":":git:","gitea":":gitea:","github":":github:","gitlab":":gitlab:","gogs":":gogs:"}, useServiceWorker: false , csrfToken: 'sfGH_G1euKw2B_nF1UVLqbjN4Bk6MTc0Mzk5NDYwMDc4NTUwOTEzMA', pageData: {}, requireTribute: null , notificationSettings: {"EventSourceUpdateTime":10000,"MaxTimeout":60000,"MinTimeout":10000,"TimeoutStep":10000}, enableTimeTracking: true , mermaidMaxSourceCharacters: 5000 , i18n: { copy_success: 'Copied!', copy_error: 'Copy failed', error_occurred: 'An error occurred', network_error: 'Network error', }, }; window.config.pageData = window.config.pageData || {}; </script> <noscript> <style> .dropdown:hover > .menu { display: block; } .ui.secondary.menu .dropdown.item > .menu { margin-top: 0; } </style> </noscript> <meta property="og:title" content="Paddle"> <meta property="og:url" content="https://testgitea.educoder.net/m53297601/Paddle"> <meta property="og:type" content="object"> <meta property="og:image" content="https://testgitea.educoder.net/avatar/c5e0d5aac12de99422bb90819e49ac5b"> <meta property="og:site_name" content="Educoder: Git with Educoder"> <link rel="stylesheet" href="/assets/css/theme-auto.css?v=development"> </head> <body> <div class="full height"> <noscript>This website works better with JavaScript.</noscript> <div class="ui top secondary stackable main menu following bar light no-vertical-tabs"> <div class="ui container" id="navbar"> <div class="item brand" style="justify-content: space-between;"> <a href="/" aria-label="Home"> <img width="30" height="30" src="/assets/img/logo.svg" alt="Logo" aria-hidden="true"> </a> <div class="ui basic icon button mobile-only" id="navbar-expand-toggle"> <i class="sidebar icon"></i> </div> </div> <a class="item " href="/explore/repos">Explore</a> <a class="item" target="_blank" rel="noopener noreferrer" href="https://docs.gitea.io">Help</a> <div class="right stackable menu"> <a class="item" href="/user/sign_up"> <svg viewBox="0 0 16 16" class="svg octicon-person" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M10.5 5a2.5 2.5 0 1 1-5 0 2.5 2.5 0 0 1 5 0zm.061 3.073a4 4 0 1 0-5.123 0 6.004 6.004 0 0 0-3.431 5.142.75.75 0 0 0 1.498.07 4.5 4.5 0 0 1 8.99 0 .75.75 0 1 0 1.498-.07 6.005 6.005 0 0 0-3.432-5.142z"/></svg> Register </a> <a class="item" rel="nofollow" href="/user/login?redirect_to=%2fm53297601%2fPaddle%2fcommit%2fea7dc9cbf65f429fc6bfb47e6d3548bb55d43cd2.patch"> <svg viewBox="0 0 16 16" class="svg octicon-sign-in" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M2 2.75C2 1.784 2.784 1 3.75 1h2.5a.75.75 0 0 1 0 1.5h-2.5a.25.25 0 0 0-.25.25v10.5c0 .138.112.25.25.25h2.5a.75.75 0 0 1 0 1.5h-2.5A1.75 1.75 0 0 1 2 13.25V2.75zm6.56 4.5 1.97-1.97a.75.75 0 1 0-1.06-1.06L6.22 7.47a.75.75 0 0 0 0 1.06l3.25 3.25a.75.75 0 1 0 1.06-1.06L8.56 8.75h5.69a.75.75 0 0 0 0-1.5H8.56z"/></svg> Sign In </a> </div> </div> </div> <div class="page-content ui container full-screen-width center"> <p style="margin-top: 100px"><img src="/assets/img/500.png" alt="500"/></p> <div class="ui divider"></div> <br> <p>Gitea Version: development</p> </div> </div> <footer> <div class="ui container"> <div class="ui left"> <a target="_blank" rel="noopener noreferrer" href="https://gitea.io">Powered by Gitea</a> Version: development Page: <strong>225ms</strong> Template: <strong>0ms</strong> </div> <div class="ui right links"> <div class="ui language bottom floating slide up dropdown link item"> <svg viewBox="0 0 16 16" class="svg octicon-globe" width="16" height="16" aria-hidden="true"><path fill-rule="evenodd" d="M1.543 7.25h2.733c.144-2.074.866-3.756 1.58-4.948.12-.197.237-.381.353-.552a6.506 6.506 0 0 0-4.666 5.5zm2.733 1.5H1.543a6.506 6.506 0 0 0 4.666 5.5 11.13 11.13 0 0 1-.352-.552c-.715-1.192-1.437-2.874-1.581-4.948zm1.504 0h4.44a9.637 9.637 0 0 1-1.363 4.177c-.306.51-.612.919-.857 1.215a9.978 9.978 0 0 1-.857-1.215A9.637 9.637 0 0 1 5.78 8.75zm4.44-1.5H5.78a9.637 9.637 0 0 1 1.363-4.177c.306-.51.612-.919.857-1.215.245.296.55.705.857 1.215A9.638 9.638 0 0 1 10.22 7.25zm1.504 1.5c-.144 2.074-.866 3.756-1.58 4.948-.12.197-.237.381-.353.552a6.506 6.506 0 0 0 4.666-5.5h-2.733zm2.733-1.5h-2.733c-.144-2.074-.866-3.756-1.58-4.948a11.738 11.738 0 0 0-.353-.552 6.506 6.506 0 0 1 4.666 5.5zM8 0a8 8 0 1 0 0 16A8 8 0 0 0 8 0z"/></svg> <div class="text">English</div> <div class="menu language-menu"> <a lang="id-ID" data-url="/?lang=id-ID" class="item ">Bahasa Indonesia</a> <a lang="de-DE" data-url="/?lang=de-DE" class="item ">Deutsch</a> <a lang="en-US" data-url="/?lang=en-US" class="item active selected">English</a> <a lang="es-ES" data-url="/?lang=es-ES" class="item ">Español</a> <a lang="fr-FR" data-url="/?lang=fr-FR" class="item ">Français</a> <a lang="it-IT" data-url="/?lang=it-IT" class="item ">Italiano</a> <a lang="lv-LV" data-url="/?lang=lv-LV" class="item ">Latviešu</a> <a lang="hu-HU" data-url="/?lang=hu-HU" class="item ">Magyar nyelv</a> <a lang="nl-NL" data-url="/?lang=nl-NL" class="item ">Nederlands</a> <a lang="pl-PL" data-url="/?lang=pl-PL" class="item ">Polski</a> <a lang="pt-PT" data-url="/?lang=pt-PT" class="item ">Português de Portugal</a> <a lang="pt-BR" data-url="/?lang=pt-BR" class="item ">Português do Brasil</a> <a lang="fi-FI" data-url="/?lang=fi-FI" class="item ">Suomi</a> <a lang="sv-SE" data-url="/?lang=sv-SE" class="item ">Svenska</a> <a lang="tr-TR" data-url="/?lang=tr-TR" class="item ">Türkçe</a> <a lang="cs-CZ" data-url="/?lang=cs-CZ" class="item ">Čeština</a> <a lang="el-GR" data-url="/?lang=el-GR" class="item ">Ελληνικά</a> <a lang="bg-BG" data-url="/?lang=bg-BG" class="item ">Български</a> <a lang="ru-RU" data-url="/?lang=ru-RU" class="item ">Русский</a> <a lang="uk-UA" data-url="/?lang=uk-UA" class="item ">Українська</a> <a lang="fa-IR" data-url="/?lang=fa-IR" class="item ">فارسی</a> <a lang="ml-IN" data-url="/?lang=ml-IN" class="item ">മലയാളം</a> <a lang="ja-JP" data-url="/?lang=ja-JP" class="item ">日本語</a> <a lang="zh-CN" data-url="/?lang=zh-CN" class="item ">简体中文</a> <a lang="zh-TW" data-url="/?lang=zh-TW" class="item ">繁體中文(台灣)</a> <a lang="zh-HK" data-url="/?lang=zh-HK" class="item ">繁體中文(香港)</a> <a lang="ko-KR" data-url="/?lang=ko-KR" class="item ">한국어</a> </div> </div> <a href="/assets/js/licenses.txt">Licenses</a> <a href="/api/swagger">API</a> </div> </div> </footer> <script src="/assets/js/index.js?v=development" onerror="alert('Failed to load asset files from ' + this.src + ', please make sure the asset files can be accessed and the ROOT_URL setting in app.ini is correct.')"></script> </body> </html>