From 8e19c324ab82feeea87cd582737ebae5bec95fb8 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 12 Jun 2018 12:47:11 +0800 Subject: [PATCH 01/21] update split_lod_tensor, create_array and array_length doc --- python/paddle/fluid/layers/control_flow.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 80e8ff484a..114c1f0ed4 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -62,6 +62,8 @@ def split_lod_tensor(input, mask, level=0): The output is the true branch and the false branch with the mask applied to the input at a certain level in the tensor. + Mainly used in IfElse to split data into two parts. Related API: IfElse. + Args: input(tuple|list|None): The input tensor that contains complete lod information needed to construct the output. @@ -83,6 +85,7 @@ def split_lod_tensor(input, mask, level=0): out_true, out_false = layers.split_lod_tensor( input=x, mask=y, level=level) + """ helper = LayerHelper('split_lod_tensor', **locals()) out_true = helper.create_tmp_variable(dtype=input.dtype) @@ -887,14 +890,18 @@ def array_write(x, i, array=None): def create_array(dtype): - """This function creates an array of type :math:`LOD_TENSOR_ARRAY` using the - LayerHelper. + """ + **Create LoDTensor Array** + + This function creates an array of type :math:`LOD_TENSOR_ARRAY` using the + LayerHelper. It is mainly used to implement RNN with array_write, array_read + and While. Args: dtype (int|float): The data type of the elements in the array. Returns: - Variable: The tensor variable storing the elements of data type. + Variable: The lod_tensor_array variable storing the elements of data type. Examples: .. code-block:: python @@ -1020,9 +1027,14 @@ def shrink_memory(x, i, table): def array_length(array): - """This function performs the operation to find the length of the input + """ + **Get the length of Input LoDTensorArray** + + This function performs the operation to find the length of the input LOD_TENSOR_ARRAY. + Related API: array_read, array_write, While. + Args: array (LOD_TENSOR_ARRAY): The input array that will be used to compute the length. From 2c1e2caa7d8c225040fdc3674df72afd7313f219 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 12 Jun 2018 13:35:00 +0800 Subject: [PATCH 02/21] update document --- python/paddle/fluid/layers/control_flow.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 114c1f0ed4..15f294698c 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -60,15 +60,14 @@ def split_lod_tensor(input, mask, level=0): This function takes in an input that contains the complete lod information, and takes in a mask which is used to mask certain parts of the input. The output is the true branch and the false branch with the mask applied to - the input at a certain level in the tensor. - - Mainly used in IfElse to split data into two parts. Related API: IfElse. + the input at a certain level in the tensor. Mainly used in IfElse to split + data into two parts. Args: input(tuple|list|None): The input tensor that contains complete lod information needed to construct the output. mask(list): A bool column vector which masks the input. - level(int): The specific lod level to rank. + level(int): The specific lod level to split. Returns: Variable: The true branch of tensor as per the mask applied to input. @@ -108,8 +107,9 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0): This function takes in an input :math:`x`, the True branch, the False branch and a binary :math:`mask`. Using this information, this function - merges the True and False branches of the tensor into a single Output - at a certain lod level indiacted by :math:`level`. + merges the True and False branches of the tensor into a single tensor as + output at a certain lod level indicated by :math:`level`. Used in IfElse + to merge the output if True block and False Block. Args: in_true(tuple|list|None): The True branch to be merged. @@ -117,7 +117,7 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0): x(tuple|list|None): The input tensor that contains complete lod information needed to construct the output. mask(list): A bool column vector which masks the input. - level(int): The specific lod level to rank. + level(int): The specific lod level to merge. Returns: Variable: The merged output tensor. From 4d0fd7e725b259509896f6d891965feec7effee8 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 12 Jun 2018 14:50:56 +0800 Subject: [PATCH 03/21] add API reference for create_tensor --- python/paddle/fluid/layers/tensor.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 62b01d595a..6ce486d70d 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -39,6 +39,25 @@ __all__ = [ def create_tensor(dtype, name=None, persistable=False): + """ + **Create a Tensor with certain data type and name** + + Args: + dtype (string): 'float32'|'int32'|..., the data type of the + created tensor. + name (string|None): The name of the created tensor, if not set, + the name will be a random unique one. + persistable (bool): Set the persistable flag of the create tensor, + default value is False. + + Returns: + Variable: The tensor variable storing the created tensor. + + Examples: + .. code-block:: python + + tensor = fluid.layers.create_tensor(dtype='float32') + """ helper = LayerHelper("create_tensor", **locals()) return helper.create_variable( name=helper.name, dtype=dtype, persistable=persistable) From d82422997a7c21a9d440fae18c6c60c84a5bff7a Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 12 Jun 2018 20:07:17 +0800 Subject: [PATCH 04/21] add doc for batch norm --- python/paddle/fluid/layers/nn.py | 51 ++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 9e2c06d26f..6719a4d7ec 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1541,8 +1541,55 @@ def batch_norm(input, moving_variance_name=None, do_model_average_for_mean_and_var=False): """ - This function helps create an operator to implement - the BatchNorm layer using the configurations from the input parameters. + **Batch Normalization Layer** + + Can be used as a normalizer function for conv2d and fully_connected operations. + The required data format for this layer is one of the following: + 1. NHWC `[batch, in_height, in_width, in_channels]` + 2. NCHW `[batch, in_channels, in_height, in_width]` + + Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift + `_ for more details. + + :math:`input` is the input features over a mini-batch. + + .. math:: + + \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\ + \ mini-batch\ mean \\\\ + \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\ + \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\ + \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\ + \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\ + y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift + + Args: + input(variable): The input variable which is a LoDTensor. + act(string, default None): Activation type, linear|relu|prelu|... + is_test(bool, default False): Used for training or training. + momentum(float, default 0.9): + epsilon(float, default 1e-05): + param_attr(ParamAttr): The parameter attribute for Parameter `scale`. + bias_attr(ParamAttr): The parameter attribute for Parameter `bias`. + data_layout(string, default NCHW): NCHW|NHWC + in_place(bool, default False): Make the input and output of batch norm reuse memory. + use_mkldnn(bool, Default false): ${use_mkldnn_comment} + name(string, Default None): A name for this layer(optional). If set None, the layer + will be named automatically. + moving_mean_name(string, Default None): The name of moving_mean which store the global Mean. + moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance. + do_model_average_for_mean_and_var(bool, Default False): + + Returns: + The sequence's last step variable which is a Tensor. + + Examples: + + .. code-block:: python + + hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w') + hidden2 = fluid.layers.batch_norm(input=hidden1) + """ helper = LayerHelper('batch_norm', **locals()) dtype = helper.input_dtype() From f3e631cd9e59741f3d2531706080e3a94c979f35 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 12 Jun 2018 20:21:18 +0800 Subject: [PATCH 05/21] small update --- python/paddle/fluid/layers/nn.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 1a010ab3ac..d5db75ebea 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -4078,7 +4078,7 @@ def image_resize(input, name=None, resample='BILINEAR'): """ - Resize a batch of images. + **Resize a batch of images** The input must be a tensor of the shape (num_batches, channels, in_h, in_w), and the resizing only applies on the last two dimensions(hight and width). @@ -4208,6 +4208,8 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'): def gather(input, index): """ + **Gather Layer** + Output is obtained by gathering entries of the outer-most dimension of X indexed by `index` and concatenate them together. From e72eb0edec589ce6bee07ec5eb34ee7ceca2af33 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 12 Jun 2018 20:23:47 +0800 Subject: [PATCH 06/21] small update --- paddle/fluid/operators/detection/polygon_box_transform_op.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc index 335e8dd470..568d50d457 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cc +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc @@ -83,11 +83,13 @@ class PolygonBoxTransformOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( PolygonBoxTransform Operator. + +PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate. + The input is the final geometry output in detection network. We use 2*n numbers to denote the coordinate shift from n corner vertices of the polygon_box to the pixel location. As each distance offset contains two numbers (xi, yi), the geometry output contains 2*n channels. -PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate. )DOC"); } }; From dde0a28073420134d4aae01d91511ead3d0c362a Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 12 Jun 2018 20:51:56 +0800 Subject: [PATCH 07/21] add doc for Switch --- python/paddle/fluid/layers/control_flow.py | 25 +++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 15f294698c..7999ee0f80 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -1132,6 +1132,28 @@ class ConditionalBlock(object): class Switch(object): + """ + **Switch Class** + + Many programming languages provide `switch` as a generalization of `if-elif-else`. + Switch class works just like a `if-elif-else`. + + The Semantics: + + 1. A `switch` control-flow checks cases one-by-one. + 1. The condition of each case is a boolean value, which is a scalar. + 1. It runs the first matched case, or the default case if there is one. + 1. Once it matches a case, it runs the corresponding branch and only that branch. + + Examples: + .. code-block:: python + + with control_flow.Switch() as switch: + with switch.case(global_step == zero_var): + tensor.assign(input=one_var, output=div_res) + + """ + def __init__(self, name=None): self.helper = LayerHelper('switch', name=name) self.inside_scope = False @@ -1161,7 +1183,8 @@ class Switch(object): return ConditionalBlockGuard(cond_block) def default(self): - """create a default case for this switch + """ + create a default case for this switch """ pre_cond_num = len(self.pre_not_conditions) if pre_cond_num == 0: From d76f8a8f5d06419f3db2647fec8956444ca7c1fe Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 12 Jun 2018 21:24:39 +0800 Subject: [PATCH 08/21] refine doc of polynomial_decay --- .../fluid/layers/learning_rate_scheduler.py | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 716cc7824e..2e5cff74c1 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -162,22 +162,27 @@ def polynomial_decay(learning_rate, end_learning_rate=0.0001, power=1.0, cycle=False): - """Applies polynomial decay to the initial learning rate. + """ + **polynomial_decay** + + Applies polynomial decay to the initial learning rate. + + .. code-block::python + + if cycle: + decay_steps = decay_steps * ceil(global_step / decay_steps) + else: + global_step = min(global_step, decay_steps) + decayed_learning_rate = (learning_rate - end_learning_rate) * + (1 - global_step / decay_steps) ^ power + end_learning_rate - >>> if cycle: - >>> decay_steps = decay_steps * ceil(global_step / decay_steps) - >>> else: - >>> global_step = min(global_step, decay_steps) - >>> decayed_learning_rate = (learning_rate - end_learning_rate) * - >>> (1 - global_step / decay_steps) ^ power + - >>> end_learning_rate Args: - learning_rate: A scalar float32 value or a Variable. This + learning_rate(Variable|float32): A scalar float32 value or a Variable. This will be the initial learning rate during training - decay_steps: A Python `int32` number. - end_learning_rate: A Python `float` number. - power: A Python `float` number - cycle: Boolean. If set true, decay the learning rate every decay_steps. + decay_steps(int32): A Python `int32` number. + end_learning_rate(float): A Python `float` number. + power(float): A Python `float` number + cycle(bool, Default False): Boolean. If set true, decay the learning rate every decay_steps. Returns: The decayed learning rate From 76129f03314afb26acae18e7a5838612f6fb28f0 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 13 Jun 2018 22:16:23 +0800 Subject: [PATCH 09/21] update comment --- python/paddle/fluid/layers/control_flow.py | 7 +++---- python/paddle/fluid/layers/nn.py | 5 +++-- python/paddle/fluid/layers/tensor.py | 7 +++---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 7999ee0f80..feac42d94e 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -893,12 +893,11 @@ def create_array(dtype): """ **Create LoDTensor Array** - This function creates an array of type :math:`LOD_TENSOR_ARRAY` using the - LayerHelper. It is mainly used to implement RNN with array_write, array_read - and While. + This function creates an array of LOD_TENSOR_ARRAY . It is mainly used to + implement RNN with array_write, array_read and While. Args: - dtype (int|float): The data type of the elements in the array. + dtype (int|float): The data type of the elements in the lod_tensor_array. Returns: Variable: The lod_tensor_array variable storing the elements of data type. diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 80452a1e8b..3f3b7e20ef 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1618,8 +1618,9 @@ def batch_norm(input, 1. NHWC `[batch, in_height, in_width, in_channels]` 2. NCHW `[batch, in_channels, in_height, in_width]` - Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift - `_ for more details. + Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing + Internal Covariate Shift `_ + for more details. :math:`input` is the input features over a mini-batch. diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 6ce486d70d..6b7f69807b 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -40,15 +40,14 @@ __all__ = [ def create_tensor(dtype, name=None, persistable=False): """ - **Create a Tensor with certain data type and name** + **Create a Tensor** Args: dtype (string): 'float32'|'int32'|..., the data type of the created tensor. - name (string|None): The name of the created tensor, if not set, + name (string, Default: None): The name of the created tensor, if not set, the name will be a random unique one. - persistable (bool): Set the persistable flag of the create tensor, - default value is False. + persistable (bool, Default: False): Set the persistable flag of the create tensor. Returns: Variable: The tensor variable storing the created tensor. From 0ae670917489d24e25e648c85df6a0f8a110f979 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 14 Jun 2018 10:49:07 +0800 Subject: [PATCH 10/21] update document --- python/paddle/fluid/layers/control_flow.py | 16 +++++++++------- .../fluid/layers/learning_rate_scheduler.py | 10 +++++----- python/paddle/fluid/layers/nn.py | 2 ++ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index feac42d94e..5354582aaa 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -76,13 +76,13 @@ def split_lod_tensor(input, mask, level=0): Examples: .. code-block:: python - x = layers.data(name='x', shape=[1]) + x = fluid.layers.data(name='x', shape=[1]) x.persistable = True - y = layers.data(name='y', shape=[1]) + y = fluid.layers.data(name='y', shape=[1]) y.persistable = True - out_true, out_false = layers.split_lod_tensor( + out_true, out_false = fluid.layers.split_lod_tensor( input=x, mask=y, level=level) """ @@ -891,7 +891,7 @@ def array_write(x, i, array=None): def create_array(dtype): """ - **Create LoDTensor Array** + **Create LoDTensorArray** This function creates an array of LOD_TENSOR_ARRAY . It is mainly used to implement RNN with array_write, array_read and While. @@ -989,7 +989,8 @@ def array_read(array, i): Returns: Variable: The tensor type variable that has the data written to it. Examples: - .. code-block::python + .. code-block:: python + tmp = fluid.layers.zeros(shape=[10], dtype='int32') i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) arr = layers.array_read(tmp, i=i) @@ -1027,7 +1028,7 @@ def shrink_memory(x, i, table): def array_length(array): """ - **Get the length of Input LoDTensorArray** + **Get the Length of Input LoDTensorArray** This function performs the operation to find the length of the input LOD_TENSOR_ARRAY. @@ -1042,12 +1043,13 @@ def array_length(array): Variable: The length of the input LoDTensorArray. Examples: - .. code-block::python + .. code-block:: python tmp = fluid.layers.zeros(shape=[10], dtype='int32') i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) arr = fluid.layers.array_write(tmp, i=i) arr_len = fluid.layers.array_length(arr) + """ helper = LayerHelper('array_length', **locals()) tmp = helper.create_tmp_variable(dtype='int64') diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 2e5cff74c1..2dbc51c23f 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -163,11 +163,11 @@ def polynomial_decay(learning_rate, power=1.0, cycle=False): """ - **polynomial_decay** + **Polynomial Decay** Applies polynomial decay to the initial learning rate. - .. code-block::python + .. code-block:: python if cycle: decay_steps = decay_steps * ceil(global_step / decay_steps) @@ -180,9 +180,9 @@ def polynomial_decay(learning_rate, learning_rate(Variable|float32): A scalar float32 value or a Variable. This will be the initial learning rate during training decay_steps(int32): A Python `int32` number. - end_learning_rate(float): A Python `float` number. - power(float): A Python `float` number - cycle(bool, Default False): Boolean. If set true, decay the learning rate every decay_steps. + end_learning_rate(float, Default: 0.0001): A Python `float` number. + power(float, Default: 1.0): A Python `float` number + cycle(bool, Default: False): Boolean. If set true, decay the learning rate every decay_steps. Returns: The decayed learning rate diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 3f3b7e20ef..7c4393c4d9 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1615,7 +1615,9 @@ def batch_norm(input, Can be used as a normalizer function for conv2d and fully_connected operations. The required data format for this layer is one of the following: + 1. NHWC `[batch, in_height, in_width, in_channels]` + 2. NCHW `[batch, in_channels, in_height, in_width]` Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing From 21ecd357cffb7165813ffa65b2ab7c810eddfece Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 14 Jun 2018 11:01:07 +0800 Subject: [PATCH 11/21] little optimize --- python/paddle/fluid/layers/nn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 7c4393c4d9..627718f87e 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -4099,7 +4099,7 @@ def image_resize(input, name=None, resample='BILINEAR'): """ - **Resize a batch of images** + **Resize a Batch of Images** The input must be a tensor of the shape (num_batches, channels, in_h, in_w), and the resizing only applies on the last two dimensions(hight and width). From 62bf672eddfdbd8c9287292c5ddae80d4eae2af4 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 14 Jun 2018 11:20:46 +0800 Subject: [PATCH 12/21] update document for Switch --- python/paddle/fluid/layers/control_flow.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 5354582aaa..db5b07558a 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -1142,16 +1142,19 @@ class Switch(object): The Semantics: 1. A `switch` control-flow checks cases one-by-one. - 1. The condition of each case is a boolean value, which is a scalar. - 1. It runs the first matched case, or the default case if there is one. - 1. Once it matches a case, it runs the corresponding branch and only that branch. + + 2. The condition of each case is a boolean value, which is a scalar. + + 3. It runs the first matched case, or the default case if there is one. + + 4. Once it matches a case, it runs the corresponding branch and only that branch. Examples: .. code-block:: python - with control_flow.Switch() as switch: + with fluid.control_flow.Switch() as switch: with switch.case(global_step == zero_var): - tensor.assign(input=one_var, output=div_res) + fluid.tensor.assign(input=one_var, output=div_res) """ From 2f9ed97eb66250a788702e21240bc09fea93b85d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 14 Jun 2018 13:54:44 +0800 Subject: [PATCH 13/21] follow comment --- python/paddle/fluid/layers/control_flow.py | 2 -- python/paddle/fluid/layers/nn.py | 14 +++++++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index db5b07558a..5394ac3278 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -55,8 +55,6 @@ __all__ = [ def split_lod_tensor(input, mask, level=0): """ - **split_lod_tensor** - This function takes in an input that contains the complete lod information, and takes in a mask which is used to mask certain parts of the input. The output is the true branch and the false branch with the mask applied to diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 627718f87e..d3899cd442 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1638,23 +1638,23 @@ def batch_norm(input, Args: input(variable): The input variable which is a LoDTensor. - act(string, default None): Activation type, linear|relu|prelu|... - is_test(bool, default False): Used for training or training. - momentum(float, default 0.9): - epsilon(float, default 1e-05): + act(string, Default None): Activation type, linear|relu|prelu|... + is_test(bool, Default False): Used for training or training. + momentum(float, Default 0.9): + epsilon(float, Default 1e-05): param_attr(ParamAttr): The parameter attribute for Parameter `scale`. bias_attr(ParamAttr): The parameter attribute for Parameter `bias`. data_layout(string, default NCHW): NCHW|NHWC - in_place(bool, default False): Make the input and output of batch norm reuse memory. + in_place(bool, Default False): Make the input and output of batch norm reuse memory. use_mkldnn(bool, Default false): ${use_mkldnn_comment} name(string, Default None): A name for this layer(optional). If set None, the layer will be named automatically. moving_mean_name(string, Default None): The name of moving_mean which store the global Mean. moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance. - do_model_average_for_mean_and_var(bool, Default False): + do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not. Returns: - The sequence's last step variable which is a Tensor. + Variable: A tensor variable which is the result after applying batch normalization on the input. Examples: From 9de779f1cfae9daba1b38b8df829cb0e56247592 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 15 Jun 2018 13:18:33 +0800 Subject: [PATCH 14/21] update switch class --- python/paddle/fluid/layers/control_flow.py | 25 ++++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 209a767e73..2bc43c5ce9 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -1156,16 +1156,14 @@ class ConditionalBlock(object): class Switch(object): """ - **Switch Class** - - Many programming languages provide `switch` as a generalization of `if-elif-else`. - Switch class works just like a `if-elif-else`. + Switch class works just like a `if-elif-else`. Can be used in learning rate scheduler + to modify learning rate The Semantics: 1. A `switch` control-flow checks cases one-by-one. - 2. The condition of each case is a boolean value, which is a scalar. + 2. The condition of each case is a boolean value, which is a scalar Variable. 3. It runs the first matched case, or the default case if there is one. @@ -1174,9 +1172,22 @@ class Switch(object): Examples: .. code-block:: python - with fluid.control_flow.Switch() as switch: + lr = fluid.layers.tensor.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=True, + name="learning_rate") + one_var = tensor.fill_constant( + shape=[1], dtype='float32', value=1.0) + two_var = tensor.fill_constant( + shape=[1], dtype='float32', value=2.0) + + with fluid.layers.control_flow.Switch() as switch: with switch.case(global_step == zero_var): - fluid.tensor.assign(input=one_var, output=div_res) + fluid.layers.tensor.assign(input=one_var, output=lr) + with switch.default(): + fluid.layers.tensor.assign(input=two_var, output=lr) """ From e2783bb6afeb4e5b4160ff4283c18672f2f0632e Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 15 Jun 2018 14:22:21 +0800 Subject: [PATCH 15/21] update split_lod_tensor doc --- python/paddle/fluid/layers/control_flow.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 2bc43c5ce9..e261e3f63a 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -69,8 +69,10 @@ def split_lod_tensor(input, mask, level=0): level(int): The specific lod level to split. Returns: - Variable: The true branch of tensor as per the mask applied to input. - Variable: The false branch of tensor as per the mask applied to input. + tuple(Variable, Variable): + The true branch of tensor as per the mask applied to input. + + The false branch of tensor as per the mask applied to input. Examples: .. code-block:: python From 1c9fc655d0b4745f74940f99acc8421faf8656f5 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 15 Jun 2018 15:16:14 +0800 Subject: [PATCH 16/21] update --- python/paddle/fluid/layers/detection.py | 73 ++++++++++--------- .../fluid/layers/learning_rate_scheduler.py | 12 ++- python/paddle/fluid/layers/tensor.py | 8 +- 3 files changed, 48 insertions(+), 45 deletions(-) diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index edf528a595..dacb31f8b6 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -603,7 +603,7 @@ def prior_box(input, offset=0.5, name=None): """ - **Prior box operator** + **Prior Box Operator** Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. Each position of the input produce N prior boxes, N is determined by @@ -632,26 +632,30 @@ def prior_box(input, name(str): Name of the prior box op. Default: None. Returns: - boxes(Variable): the output prior boxes of PriorBox. - The layout is [H, W, num_priors, 4]. - H is the height of input, W is the width of input, - num_priors is the total - box count of each position of input. - Variances(Variable): the expanded variances of PriorBox. - The layout is [H, W, num_priors, 4]. - H is the height of input, W is the width of input - num_priors is the total - box count of each position of input + tuple: A tuple with two Variable (boxes, variances) + + boxes: the output prior boxes of PriorBox. + The layout is [H, W, num_priors, 4]. + H is the height of input, W is the width of input, + num_priors is the total + box count of each position of input. + + variances: the expanded variances of PriorBox. + The layout is [H, W, num_priors, 4]. + H is the height of input, W is the width of input + num_priors is the total + box count of each position of input Examples: .. code-block:: python - box, var = prior_box( - input=conv1, - image=images, - min_sizes=[100.], - flip=True, - clip=True) + + box, var = fluid.layers.prior_box( + input=conv1, + image=images, + min_sizes=[100.], + flip=True, + clip=True) """ helper = LayerHelper("prior_box", **locals()) dtype = helper.input_dtype() @@ -721,11 +725,9 @@ def multi_box_head(inputs, stride=1, name=None): """ - **Prior_boxes** - Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. The details of this algorithm, please refer the - section 2.2 of SSD paper (SSD: Single Shot MultiBox Detector) + section 2.2 of SSD paper `SSD: Single Shot MultiBox Detector `_ . Args: @@ -766,24 +768,27 @@ def multi_box_head(inputs, name(str): Name of the prior box layer. Default: None. Returns: - mbox_loc(Variable): The predicted boxes' location of the inputs. - The layout is [N, H*W*Priors, 4]. where Priors - is the number of predicted boxes each position of each input. - mbox_conf(Variable): The predicted boxes' confidence of the inputs. - The layout is [N, H*W*Priors, C]. where Priors - is the number of predicted boxes each position of each input - and C is the number of Classes. - boxes(Variable): the output prior boxes of PriorBox. - The layout is [num_priors, 4]. num_priors is the total - box count of each position of inputs. - Variances(Variable): the expanded variances of PriorBox. - The layout is [num_priors, 4]. num_priors is the total - box count of each position of inputs + tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances) + + mbox_loc: The predicted boxes' location of the inputs. The layout + is [N, H*W*Priors, 4]. where Priors is the number of predicted + boxes each position of each input. + + mbox_conf: The predicted boxes' confidence of the inputs. The layout + is [N, H*W*Priors, C]. where Priors is the number of predicted boxes + each position of each input and C is the number of Classes. + + boxes: the output prior boxes of PriorBox. The layout is [num_priors, 4]. + num_priors is the total box count of each position of inputs. + + variances: the expanded variances of PriorBox. The layout is + [num_priors, 4]. num_priors is the total box count of each position of inputs Examples: .. code-block:: python - mbox_locs, mbox_confs, box, var = layers.multi_box_head( + + mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head( inputs=[conv1, conv2, conv3, conv4, conv5, conv5], image=images, num_classes=21, diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 2dbc51c23f..e76f15d838 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -163,8 +163,6 @@ def polynomial_decay(learning_rate, power=1.0, cycle=False): """ - **Polynomial Decay** - Applies polynomial decay to the initial learning rate. .. code-block:: python @@ -178,14 +176,14 @@ def polynomial_decay(learning_rate, Args: learning_rate(Variable|float32): A scalar float32 value or a Variable. This - will be the initial learning rate during training + will be the initial learning rate during training. decay_steps(int32): A Python `int32` number. - end_learning_rate(float, Default: 0.0001): A Python `float` number. - power(float, Default: 1.0): A Python `float` number - cycle(bool, Default: False): Boolean. If set true, decay the learning rate every decay_steps. + end_learning_rate(float): A Python `float` number. + power(float): A Python `float` number. + cycle(bool): If set true, decay the learning rate every decay_steps. Returns: - The decayed learning rate + Variable: The decayed learning rate """ global_step = _decay_step_counter() diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 25505e4427..978f7dde29 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -40,14 +40,14 @@ __all__ = [ def create_tensor(dtype, name=None, persistable=False): """ - **Create a Tensor** + Create an variable, which will hold a LoDTensor with data type dtype. Args: - dtype (string): 'float32'|'int32'|..., the data type of the + dtype(string): 'float32'|'int32'|..., the data type of the created tensor. - name (string, Default: None): The name of the created tensor, if not set, + name(string): The name of the created tensor, if not set, the name will be a random unique one. - persistable (bool, Default: False): Set the persistable flag of the create tensor. + persistable(bool): Set the persistable flag of the create tensor. Returns: Variable: The tensor variable storing the created tensor. From 6ace04f655be6ea7898b5cbe61dfbdb1e16b7806 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 15 Jun 2018 16:00:07 +0800 Subject: [PATCH 17/21] update --- paddle/fluid/operators/activation_op.cc | 2 +- python/paddle/fluid/layers/nn.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index c73482eb12..8743c9500a 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -133,7 +133,7 @@ $out = \max(x, 0)$ __attribute__((unused)) constexpr char TanhDoc[] = R"DOC( Tanh Activation Operator. -$$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ +$$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index c6c8c7c2d1..485470f281 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -4475,6 +4475,7 @@ def image_resize(input, and the resizing only applies on the last two dimensions(hight and width). Supporting resample methods: + 'BILINEAR' : Bilinear interpolation Args: @@ -4494,8 +4495,8 @@ def image_resize(input, Default: 'BILINEAR' Returns: - out (Variable): The output is a 4-D tensor of the shape - (num_batches, channls, out_h, out_w). + Variable: The output is a 4-D tensor of the shape + (num_batches, channls, out_h, out_w). Examples: .. code-block:: python @@ -4579,7 +4580,7 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'): resample (str): resample method, default: BILINEAR. Returns: - out (Variable): The output is a 4-D tensor of the shape + Variable: The output is a 4-D tensor of the shape (num_batches, channls, out_h, out_w). """ in_shape = input.shape From 8f59d79d751e3174f0a6f98783fa1dbdbc279cc2 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 15 Jun 2018 16:35:53 +0800 Subject: [PATCH 18/21] update doc for sigmoid_cross_entropy_with_logits --- .../fluid/operators/sigmoid_cross_entropy_with_logits_op.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc index 135e2a6f7f..c3b0fe3209 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc @@ -113,14 +113,14 @@ The logistic loss is given as follows: $$loss = -Labels * \log(\sigma(X)) - (1 - Labels) * \log(1 - \sigma(X))$$ -We know that $$\sigma(X) = (1 / (1 + \exp(-X)))$$. By substituting this we get: +We know that $$\sigma(X) = \\frac{1}{1 + \exp(-X)}$$. By substituting this we get: $$loss = X - X * Labels + \log(1 + \exp(-X))$$ For stability and to prevent overflow of $$\exp(-X)$$ when X < 0, we reformulate the loss as follows: - $$loss = \max(X, 0) - X * Labels + \log(1 + \exp(-|X|))$$ + $$loss = \max(X, 0) - X * Labels + \log(1 + \exp(-\|X\|))$$ Both the input `X` and `Labels` can carry the LoD (Level of Details) information. However the output only shares the LoD with input `X`. From a4ee0d0dd165cdc79beca3e0904a7adf5bf58d9c Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sat, 16 Jun 2018 08:58:48 +0800 Subject: [PATCH 19/21] add reverse --- python/paddle/fluid/layers/tensor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 4c97ca40d8..18e0fedcc4 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -35,6 +35,7 @@ __all__ = [ 'argmax', 'ones', 'zeros', + 'reverse', ] From 82a4cf19608c7655a6b6394c65a10933f3b64dc0 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 17 Jun 2018 11:44:25 +0800 Subject: [PATCH 20/21] update image_resize_short and shape doc --- paddle/fluid/operators/shape_op.cc | 9 ++++++--- python/paddle/fluid/layers/nn.py | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/shape_op.cc b/paddle/fluid/operators/shape_op.cc index c75fce7959..b44d5f8980 100644 --- a/paddle/fluid/operators/shape_op.cc +++ b/paddle/fluid/operators/shape_op.cc @@ -36,10 +36,13 @@ class ShapeOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("Input", "(Tensor), The input tensor."); - AddOutput("Out", "(Tensor), The shape of input tensor."); + AddOutput("Out", + "(Tensor), The shape of input tensor, the data type of the shape" + " is int64_t, will be on the same device with the input Tensor."); AddComment(R"DOC( -Shape Operator. -Get the shape of input tensor. +Shape Operator + +Get the shape of input tensor. Only support CPU input Tensor now. )DOC"); } }; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index a3b2d2b777..40e72aa488 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -4650,7 +4650,7 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'): Returns: Variable: The output is a 4-D tensor of the shape - (num_batches, channls, out_h, out_w). + (num_batches, channls, out_h, out_w). """ in_shape = input.shape if len(in_shape) != 4: From 46ae1c93c28d346d9a4c6a4bf7c9d1019216403b Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 17 Jun 2018 14:00:49 +0800 Subject: [PATCH 21/21] add doc for softmax --- python/paddle/fluid/layers/nn.py | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 6032573393..d31d12f971 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1258,6 +1258,45 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True): def softmax(input, param_attr=None, bias_attr=None, use_cudnn=True, name=None): + """ + The input of the softmax layer is a 2-D tensor with shape N x K (N is the + batch_size, K is the dimension of input feature). The output tensor has the + same shape as the input tensor. + + For each row of the input tensor, the softmax operator squashes the + K-dimensional vector of arbitrary real values to a K-dimensional vector of real + values in the range [0, 1] that add up to 1. + + It computes the exponential of the given dimension and the sum of exponential + values of all the other dimensions in the K-dimensional vector input. + Then the ratio of the exponential of the given dimension and the sum of + exponential values of all the other dimensions is the output of the softmax + operator. + + For each row :math:`i` and each column :math:`j` in Input(X), we have: + + .. math:: + + Out[i, j] = \\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])} + + Args: + input (Variable): The input variable. + bias_attr (ParamAttr): attributes for bias + param_attr (ParamAttr): attributes for parameter + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \ + library is installed. + + Returns: + Variable: output of softmax + + Examples: + + .. code-block:: python + + fc = fluid.layers.fc(input=x, size=10) + softmax = fluid.layers.softmax(input=fc) + + """ helper = LayerHelper('softmax', **locals()) dtype = helper.input_dtype() softmax_out = helper.create_tmp_variable(dtype)