From aacd94127bcccfd3a04441526caab27253aea163 Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Thu, 23 Nov 2017 13:15:37 +0800 Subject: [PATCH 1/7] refine bilinear tensor product doc --- paddle/operators/bilinear_tensor_product_op.cc | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/paddle/operators/bilinear_tensor_product_op.cc b/paddle/operators/bilinear_tensor_product_op.cc index c65ba7eb26..487b0001da 100644 --- a/paddle/operators/bilinear_tensor_product_op.cc +++ b/paddle/operators/bilinear_tensor_product_op.cc @@ -77,11 +77,19 @@ class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Out", "The output of bilinear_tensor_product operator."); AddComment(R"DOC( Bilinear Tensor Product operator. -Given input X and Y, a 3D tensor weight, and bias. Each column of the -output is computed by one slice i = 1, . . . , k of the tensor: - - M = (X W_i) \cdot Y - Out_i = \sum_i {M_i} + Bias_i +Given input X and Y, a 3D tensor Weight and a Bias. Each column of the +Output is computed by one slice i = 1, . . . , k of the tensor: + +$$ +M = (X W_i) * Y \\ +Out_i = \sum_j {M_j} + Bias_i +$$ + +Where $$W_i$$ is the i-th slice of Input(Weight); + $$M_j$$ is the j-th column of $$M$$; + $$Out_i$$ is the i-th column of Output(Out); + $$Bias_i$$ is a column vector, each element of it is equal to + the i-th element of $$Bias$$; )DOC"); } From 8ba62a5f94e72e5425c9d9865644c8e42eb1efe8 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Thu, 23 Nov 2017 17:03:13 +0800 Subject: [PATCH 2/7] fix LaTeX syntax in liear_chain_crf op. --- paddle/operators/linear_chain_crf_op.cc | 45 ++++++++++--------- paddle/operators/softmax_op.cc | 2 +- .../softmax_with_cross_entropy_op.cc | 8 ++-- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/paddle/operators/linear_chain_crf_op.cc b/paddle/operators/linear_chain_crf_op.cc index 066bdf67aa..8e079a14e0 100644 --- a/paddle/operators/linear_chain_crf_op.cc +++ b/paddle/operators/linear_chain_crf_op.cc @@ -32,19 +32,19 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { "[(D + 2) x D]. The learnable parameter for the linear_chain_crf " "operator. See more details in the operator's comments."); AddInput("Label", - "(LoDTensor, default LoDTensor) A LoDTensor with shape " + "(LoDTensor, default LoDTensor) A LoDTensor with shape " "[N x 1], where N is the total element number in a mini-batch. " "The ground truth."); AddOutput( "Alpha", "(Tensor, default Tensor) A 2-D Tensor with shape [N x D]. " - "The forward vectors for the entire batch. Denote it as \f$\alpha\f$. " - "\f$\alpha$\f is a memo table used to calculate the normalization " - "factor in CRF. \f$\alpha[k, v]$\f stores the unnormalized " + "The forward vectors for the entire batch. Denote it as $\alpha$. " + "$\alpha$ is a memo table used to calculate the normalization " + "factor in CRF. $\alpha[k, v]$ stores the unnormalized " "probabilites of all possible unfinished sequences of tags that end at " - "position \f$k$\f with tag \f$v$\f. For each \f$k$\f, " - "\f$\alpha[k, v]$\f is a vector of length \f$D$\f with a component for " - "each tag value \f$v$\f. This vector is called a forward vecotr and " + "position $k$ with tag $v$. For each $k$, " + "$\alpha[k, v]$ is a vector of length $D$ with a component for " + "each tag value $v$. This vector is called a forward vecotr and " "will also be used in backward computations.") .AsIntermediate(); AddOutput( @@ -73,9 +73,9 @@ LinearChainCRF Operator. Conditional Random Field defines an undirected probabilistic graph with nodes denoting random variables and edges denoting dependencies between these -variables. CRF learns the conditional probability \f$P(Y|X)\f$, where -\f$X = (x_1, x_2, ... , x_n)\f$ are structured inputs and -\f$Y = (y_1, y_2, ... , y_n)\f$ are labels for the inputs. +variables. CRF learns the conditional probability $P(Y|X)$, where +$X = (x_1, x_2, ... , x_n)$ are structured inputs and +$Y = (y_1, y_2, ... , y_n)$ are labels for the inputs. Linear chain CRF is a special case of CRF that is useful for sequence labeling task. Sequence labeling tasks do not assume a lot of conditional @@ -88,21 +88,22 @@ CRF. Please refer to http://www.cs.columbia.edu/~mcollins/fb.pdf and http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for details. Equation: -1. Denote Input(Emission) to this operator as \f$x\f$ here. +1. Denote Input(Emission) to this operator as $x$ here. 2. The first D values of Input(Transition) to this operator are for starting -weights, denoted as \f$a\f$ here. +weights, denoted as $a$ here. 3. The next D values of Input(Transition) of this operator are for ending -weights, denoted as \f$b\f$ here. +weights, denoted as $b$ here. 4. The remaning values of Input(Transition) are for transition weights, -denoted as \f$w\f$ here. -5. Denote Input(Label) as \f$s\f$ here. - -The probability of a sequence \f$s\f$ of length \f$L\f$ is defined as: -\f$P(s) = (1/Z) \exp(a_{s_1} + b_{s_L} - + \sum_{l=1}^L x_{s_l} - + \sum_{l=2}^L w_{s_{l-1},s_l})\f$ -where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$ over -all possible sequences is \f$1\f$, and \f$x\f$ is the emission feature weight +denoted as $w$ here. +5. Denote Input(Label) as $s$ here. + +The probability of a sequence $s$ of length $L$ is defined as: +$$P(s) = (1/Z) \exp(a_{s_1} + b_{s_L} + + \sum_{l=1}^L x_{s_l} + + \sum_{l=2}^L w_{s_{l-1},s_l})$$ + +where $Z$ is a normalization value so that the sum of $P(s)$ over +all possible sequences is 1, and $x$ is the emission feature weight to the linear chain CRF. Finally, the linear chain CRF operator outputs the logarithm of the conditional diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 93f89e33a7..93e0525bad 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -59,7 +59,7 @@ Then the ratio of the exponential of the given dimension and the sum of exponential values of all the other dimensions is the output of the softmax operator. -For each row `i` and each column `j` in input X, we have: +For each row $i$ and each column $j$ in Input(X), we have: $$Y[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$ )DOC"); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index 3dbb62d2e5..fc027d6f95 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -67,15 +67,15 @@ The equation is as follows: 1) Hard label (one-hot label, so every sample has exactly one class) -$$Loss_j = \f$ -\text{Logit}_{Label_j} + +$$Loss_j = -\text{Logit}_{Label_j} + \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right), -j = 1, ..., K $\f$$ +j = 1,..., K$$ 2) Soft label (each sample can have a distribution over all classes) -$$Loss_j = \f$ -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i - +$$Loss_j = -\sum_{i=0}^{K}\text{Label}_i \left(\text{Logit}_i - \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right), -j = 1,...,K $\f$$ +j = 1,...,K$$ )DOC"); } From 4bdd97625b123e1562f26ce7ce2ef7b24ab70a11 Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Thu, 23 Nov 2017 17:37:32 +0800 Subject: [PATCH 3/7] refine the latex mark --- paddle/operators/bilinear_tensor_product_op.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/paddle/operators/bilinear_tensor_product_op.cc b/paddle/operators/bilinear_tensor_product_op.cc index 487b0001da..c88b2c9beb 100644 --- a/paddle/operators/bilinear_tensor_product_op.cc +++ b/paddle/operators/bilinear_tensor_product_op.cc @@ -78,18 +78,18 @@ class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( Bilinear Tensor Product operator. Given input X and Y, a 3D tensor Weight and a Bias. Each column of the -Output is computed by one slice i = 1, . . . , k of the tensor: +Output is computed by one slice $i = 1, . . . , k$ of the tensor: $$ M = (X W_i) * Y \\ Out_i = \sum_j {M_j} + Bias_i $$ -Where $$W_i$$ is the i-th slice of Input(Weight); - $$M_j$$ is the j-th column of $$M$$; - $$Out_i$$ is the i-th column of Output(Out); - $$Bias_i$$ is a column vector, each element of it is equal to - the i-th element of $$Bias$$; +Where $W_i$ is the $i$-th slice of Input(Weight); + $M_j$ is the $j$-th column of $M$; + $Out_i$ is the $i$-th column of Output(Out); + $Bias_i$ is a column vector, each element of it is equal to + the $i$-th element of $Bias$; )DOC"); } From 50d670ee0621d797f2d54a1d45fa0bc46af153ed Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 24 Nov 2017 11:20:51 +0800 Subject: [PATCH 4/7] Unify dtype and datatype (#5869) * Change all `data_type` in Python to `dtype` * Change `date_type` in C++ to `dtype` * Refine --- paddle/framework/backward.cc | 2 +- paddle/framework/tensor_array.cc | 2 +- paddle/operators/cast_op.cc | 8 +- paddle/operators/cast_op.h | 2 +- .../fill_constant_batch_size_like_op.cc | 4 +- paddle/operators/fill_constant_op.cc | 4 +- paddle/operators/gaussian_random_op.cc | 4 +- paddle/operators/nccl_op.cc | 2 +- paddle/operators/recurrent_op.cc | 2 +- paddle/operators/rnn_memory_helper_op.cc | 6 +- paddle/operators/uniform_random_op.cc | 4 +- paddle/operators/while_op.cc | 2 +- paddle/pybind/protobuf.cc | 4 +- python/paddle/v2/fluid/evaluator.py | 24 +-- python/paddle/v2/fluid/framework.py | 8 +- python/paddle/v2/fluid/initializer.py | 14 +- python/paddle/v2/fluid/io.py | 2 +- python/paddle/v2/fluid/layer_helper.py | 12 +- python/paddle/v2/fluid/layers.py | 137 +++++++++--------- python/paddle/v2/fluid/optimizer.py | 4 +- .../v2/fluid/tests/book/test_fit_a_line.py | 4 +- .../book/test_image_classification_train.py | 4 +- .../tests/book/test_label_semantic_roles.py | 22 +-- .../tests/book/test_recognize_digits_conv.py | 4 +- .../tests/book/test_recognize_digits_mlp.py | 4 +- .../tests/book/test_recommender_system.py | 20 +-- .../book/test_understand_sentiment_conv.py | 4 +- .../test_understand_sentiment_dynamic_lstm.py | 4 +- .../book/test_understand_sentiment_lstm.py | 6 +- .../v2/fluid/tests/book/test_word2vec.py | 18 +-- python/paddle/v2/fluid/tests/op_test.py | 9 +- python/paddle/v2/fluid/tests/test_cast_op.py | 4 +- .../v2/fluid/tests/test_conditional_block.py | 2 +- .../v2/fluid/tests/test_executor_and_mul.py | 4 +- .../tests/test_image_classification_layer.py | 10 +- .../v2/fluid/tests/test_inference_model_io.py | 4 +- python/paddle/v2/fluid/tests/test_layers.py | 42 +++--- .../fluid/tests/test_lod_tensor_array_ops.py | 2 +- .../v2/fluid/tests/test_mnist_if_else_op.py | 10 +- .../paddle/v2/fluid/tests/test_parameter.py | 2 +- .../v2/fluid/tests/test_protobuf_descs.py | 6 +- .../v2/fluid/tests/test_recurrent_op.py | 16 +- .../v2/fluid/tests/test_shrink_rnn_memory.py | 2 +- .../test_split_and_merge_lod_tensor_op.py | 4 +- python/paddle/v2/fluid/tests/test_variable.py | 4 +- python/paddle/v2/fluid/tests/test_while_op.py | 6 +- 46 files changed, 225 insertions(+), 239 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index b9018ecdba..bc0da55cda 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -522,7 +522,7 @@ ParamGradInfoMap AppendBackward( new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}}, {{"shape", std::vector{1}}, {"value", static_cast(1.0)}, - {"data_type", target.GetDataType()}})); + {"dtype", target.GetDataType()}})); // infer var type of fill_one_op fill_one_op->InferVarType(root_block); diff --git a/paddle/framework/tensor_array.cc b/paddle/framework/tensor_array.cc index 0947e33548..6058f1b8b1 100644 --- a/paddle/framework/tensor_array.cc +++ b/paddle/framework/tensor_array.cc @@ -302,7 +302,7 @@ LoDTensor TensorArray::Stack() const { const auto& first_dims = values_.front().dims(); // check all the values have the same shape - // TODO(superjom) check the same dtypes + // TODO(superjom) check the same data_type for (size_t idx = 1; idx < size(); idx++) { const auto& value_dims = values_[idx].dims(); PADDLE_ENFORCE_EQ(first_dims, value_dims); diff --git a/paddle/operators/cast_op.cc b/paddle/operators/cast_op.cc index 70ee7861ba..3082a53ccf 100644 --- a/paddle/operators/cast_op.cc +++ b/paddle/operators/cast_op.cc @@ -25,8 +25,8 @@ class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input tensor of cast op"); AddOutput("Out", "The output tensor of cast op"); - AddAttr("out_data_type", "output data type"); - AddAttr("in_data_type", "input data type"); + AddAttr("out_dtype", "output data type"); + AddAttr("in_dtype", "input data type"); AddComment(R"DOC( Cast Operator. @@ -58,8 +58,8 @@ class CastOpGradMaker : public framework::SingleGradOpDescMaker { grad->SetType("cast"); grad->SetInput("X", OutputGrad("Out")); grad->SetOutput("Out", InputGrad("X")); - grad->SetAttr("out_data_type", GetAttr("in_data_type")); - grad->SetAttr("in_data_type", GetAttr("out_data_type")); + grad->SetAttr("out_dtype", GetAttr("in_dtype")); + grad->SetAttr("in_dtype", GetAttr("out_dtype")); return std::unique_ptr(grad); } }; diff --git a/paddle/operators/cast_op.h b/paddle/operators/cast_op.h index ffdbff7030..850dc8e349 100644 --- a/paddle/operators/cast_op.h +++ b/paddle/operators/cast_op.h @@ -55,7 +55,7 @@ class CastOpKernel : public framework::OpKernel { auto* in = context.Input("X"); auto* out = context.Output("Out"); framework::VisitDataType( - static_cast(context.Attr("out_data_type")), + static_cast(context.Attr("out_dtype")), CastOpFunctor(in, out, context.device_context())); } }; diff --git a/paddle/operators/fill_constant_batch_size_like_op.cc b/paddle/operators/fill_constant_batch_size_like_op.cc index 985b5d1e86..892922cd3a 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/operators/fill_constant_batch_size_like_op.cc @@ -52,7 +52,7 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( - static_cast(ctx.Attr("data_type")), + static_cast(ctx.Attr("dtype")), ctx.device_context()); } }; @@ -63,7 +63,7 @@ class FillConstantBatchSizeLikeOpMaker FillConstantBatchSizeLikeOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/fill_constant_op.cc b/paddle/operators/fill_constant_op.cc index 818f113b90..3d5f84bc23 100644 --- a/paddle/operators/fill_constant_op.cc +++ b/paddle/operators/fill_constant_op.cc @@ -34,7 +34,7 @@ class FillConstantOp : public framework::OperatorBase { using framework::OperatorBase::OperatorBase; void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { - auto data_type = static_cast(Attr("data_type")); + auto data_type = static_cast(Attr("dtype")); auto value = Attr("value"); auto force_cpu = Attr("force_cpu"); auto &out = @@ -55,7 +55,7 @@ class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker { FillConstantOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 53ad86c6c4..254c83e137 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -60,7 +60,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( - static_cast(ctx.Attr("data_type")), + static_cast(ctx.Attr("dtype")), ctx.device_context()); } }; @@ -88,7 +88,7 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { "Random seed of generator." "0 means use system wide seed.") .SetDefault(0); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5(FP32)) " "Output data type.") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/nccl_op.cc b/paddle/operators/nccl_op.cc index 66fcc09bc8..22a37ff1bb 100644 --- a/paddle/operators/nccl_op.cc +++ b/paddle/operators/nccl_op.cc @@ -49,7 +49,7 @@ class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Communicator", "Create Communicator for communicating between gpus"); AddAttr>("gpus", "(vector) GPU id lists"); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 0075ccd242..ea60665e39 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -401,7 +401,7 @@ class RecurrentGradOp : public RecurrentBase { auto &inside_tensor = cur_scope.FindVar(inside_grad_name) ->Get(); framework::AttributeMap attrs; - attrs["data_type"] = framework::ToDataType(inside_tensor.type()); + attrs["dtype"] = framework::ToDataType(inside_tensor.type()); attrs["shape"] = framework::vectorize2int(inside_tensor.dims()); attrs["value"] = 0.0f; diff --git a/paddle/operators/rnn_memory_helper_op.cc b/paddle/operators/rnn_memory_helper_op.cc index b621c7f1ba..3a035f0b9a 100644 --- a/paddle/operators/rnn_memory_helper_op.cc +++ b/paddle/operators/rnn_memory_helper_op.cc @@ -62,7 +62,7 @@ class RNNMemoryHelperOpInfoMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", ""); AddOutput("Out", ""); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); @@ -95,7 +95,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase { auto &in_var_tensor = in_var->Get(); framework::AttributeMap attrs; - attrs["data_type"] = framework::ToDataType(in_var_tensor.type()); + attrs["dtype"] = framework::ToDataType(in_var_tensor.type()); attrs["shape"] = framework::vectorize2int(in_var_tensor.dims()); attrs["value"] = 0.0f; @@ -121,7 +121,7 @@ class RNNMemoryHelperGradOpInfoMaker AddInput("X", ""); AddInput("Out", ""); AddOutput(framework::GradVarName("X"), ""); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 7975efc7cf..fff1dc7ccd 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -66,7 +66,7 @@ class UniformRandomOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( - static_cast(ctx.Attr("data_type")), + static_cast(ctx.Attr("dtype")), ctx.device_context()); } }; @@ -99,7 +99,7 @@ uniform distribution. "Random seed used for generating samples. " "0 means use a seed generated by the system.") .SetDefault(0); - AddAttr("data_type", "(int, default 5(FP32)) Output tensor data type") + AddAttr("dtype", "(int, default 5(FP32)) Output tensor data type") .SetDefault(framework::DataType::FP32); } }; diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index dcc59f5ff2..68b4f77059 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -180,7 +180,7 @@ class WhileGradOp : public framework::OperatorBase { if (var->IsType()) { auto &inside_tensor = var->Get(); framework::AttributeMap attrs; - attrs["data_type"] = framework::ToDataType(inside_tensor.type()); + attrs["dtype"] = framework::ToDataType(inside_tensor.type()); attrs["shape"] = framework::vectorize2int(inside_tensor.dims()); attrs["value"] = 0.0f; diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 5a1ff9b797..6c8f06cccb 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -202,9 +202,9 @@ void BindVarDsec(py::module &m) { }, py::return_value_policy::reference) .def("set_shape", &VarDescBind::SetShape) - .def("set_data_type", &VarDescBind::SetDataType) + .def("set_dtype", &VarDescBind::SetDataType) .def("shape", &VarDescBind::Shape, py::return_value_policy::reference) - .def("data_type", &VarDescBind::GetDataType) + .def("dtype", &VarDescBind::GetDataType) .def("lod_level", &VarDescBind::GetLodLevel) .def("set_lod_level", &VarDescBind::SetLoDLevel) .def("type", &VarDescBind::GetType) diff --git a/python/paddle/v2/fluid/evaluator.py b/python/paddle/v2/fluid/evaluator.py index 3a8f1831cf..0057ed6216 100644 --- a/python/paddle/v2/fluid/evaluator.py +++ b/python/paddle/v2/fluid/evaluator.py @@ -8,7 +8,7 @@ def _clone_var_in_block_(block, var): return block.create_var( name=var.name, shape=var.shape, - dtype=var.data_type, + dtype=var.dtype, type=var.type, lod_level=var.lod_level, persistable=True) @@ -57,7 +57,7 @@ class Evaluator(object): attrs={ "shape": g_var.shape, "value": .0, - "data_type": 5, + "dtype": 5, }) block.append_op( type="scale", inputs={"X": zeros}, outputs={"Out": g_var}) @@ -93,7 +93,7 @@ class Accuracy(Evaluator): def _update_ops(self, input, label, k=1, **kwargs): block = self._main_program.global_block() - topk_out = block.create_var(dtype=input.data_type) + topk_out = block.create_var(dtype=input.dtype) topk_indices = block.create_var(dtype="int64") block.append_op( type="top_k", @@ -122,16 +122,16 @@ class Accuracy(Evaluator): inputs={"X": [self._states["Total"]]}, outputs={"Out": [self._states["Total"]]}, attrs={ - "in_data_type": 5, # float32 - "out_data_type": 2, #int32 + "in_dtype": 5, # float32 + "out_dtype": 2, # int32 }) block.append_op( type="cast", inputs={"X": [self._states["Correct"]]}, outputs={"Out": [self._states["Correct"]]}, attrs={ - "in_data_type": 5, - "out_data_type": 2, + "in_dtype": 5, + "out_dtype": 2, }) block.append_op( @@ -153,7 +153,7 @@ class Accuracy(Evaluator): else: eval_program = Program() block = eval_program.global_block() - eval_out = block.create_var(dtype=self._states["Total"].data_type) + eval_out = block.create_var(dtype=self._states["Total"].dtype) e_total = _clone_var_in_block_(block, self._states["Total"]) e_correct = _clone_var_in_block_(block, self._states["Correct"]) block.append_op( @@ -161,16 +161,16 @@ class Accuracy(Evaluator): inputs={"X": [e_total]}, outputs={"Out": [e_total]}, attrs={ - "in_data_type": 2, #int32 - "out_data_type": 5, #float32 + "in_dtype": 2, # int32 + "out_dtype": 5, # float32 }) block.append_op( type="cast", inputs={"X": [e_correct]}, outputs={"Out": [e_correct]}, attrs={ - "in_data_type": 2, - "out_data_type": 5, + "in_dtype": 2, + "out_dtype": 5, }) block.append_op( type="elementwise_div", diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 7f7c310ad8..fb1c57d296 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -99,9 +99,9 @@ class Variable(object): if not isinstance(dtype, core.DataType): dtype = convert_np_dtype_to_dtype_(dtype) if is_new_var: - self.desc.set_data_type(dtype) + self.desc.set_dtype(dtype) else: - old_dtype = self.data_type + old_dtype = self.dtype if dtype != old_dtype: raise ValueError("Variable {0} has been created before. " "The previous data type is {1}; the new " @@ -162,8 +162,8 @@ class Variable(object): return tuple(self.desc.shape()) @property - def data_type(self): - return self.desc.data_type() + def dtype(self): + return self.desc.dtype() @property def lod_level(self): diff --git a/python/paddle/v2/fluid/initializer.py b/python/paddle/v2/fluid/initializer.py index 1a9d804ee7..9f23e68a76 100644 --- a/python/paddle/v2/fluid/initializer.py +++ b/python/paddle/v2/fluid/initializer.py @@ -93,7 +93,7 @@ class ConstantInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "value": self._value }) var.op = op @@ -140,7 +140,7 @@ class UniformInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "min": self._low, "max": self._high, "seed": self._seed @@ -188,7 +188,7 @@ class NormalInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "mean": self._mean, "std": self._std_dev, "seed": self._seed @@ -265,7 +265,7 @@ class XavierInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "min": -limit, "max": limit, "seed": self._seed @@ -278,7 +278,7 @@ class XavierInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "mean": 0.0, "std": std, "seed": self._seed @@ -348,7 +348,7 @@ class MSRAInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "min": -limit, "max": limit, "seed": self._seed @@ -361,7 +361,7 @@ class MSRAInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "mean": 0.0, "std": std, "seed": self._seed diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index 2d070814ee..6f55fe9e74 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -23,7 +23,7 @@ def _clone_var_in_block_(block, var): return block.create_var( name=var.name, shape=var.shape, - dtype=var.data_type, + dtype=var.dtype, type=var.type, lod_level=var.lod_level, persistable=True) diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index e40551ca73..e0880354fb 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -108,8 +108,8 @@ class LayerHelper(object): dtype = None for each in inputs: if dtype is None: - dtype = each.data_type - elif dtype != each.data_type: + dtype = each.dtype + elif dtype != each.dtype: raise ValueError("Data Type mismatch") return dtype @@ -149,7 +149,7 @@ class LayerHelper(object): self.startup_program.global_block().create_var( name=var.name, type=var.type, - dtype=var.data_type, + dtype=var.dtype, shape=var.shape, persistable=True, initializer=initializer) @@ -180,10 +180,10 @@ class LayerHelper(object): b = self.create_parameter( attr=bias_attr, shape=size, - dtype=input_var.data_type, + dtype=input_var.dtype, suffix='b', initializer=bias_initializer) - tmp = self.create_tmp_variable(dtype=input_var.data_type) + tmp = self.create_tmp_variable(dtype=input_var.dtype) self.append_op( type='elementwise_add', inputs={'X': [input_var], @@ -198,7 +198,7 @@ class LayerHelper(object): return input_var if isinstance(act, basestring): act = {'type': act} - tmp = self.create_tmp_variable(dtype=input_var.data_type) + tmp = self.create_tmp_variable(dtype=input_var.dtype) act_type = act.pop('type') self.append_op( type=act_type, diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index fac91aac97..d094035fe5 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -114,7 +114,7 @@ def embedding(input, is_sparse=False, param_initializer=None, param_attr=None, - data_type='float32', + dtype='float32', main_program=None, startup_program=None): """ @@ -125,7 +125,7 @@ def embedding(input, size: The size of the layer is_sparse: A flag that decleares whether the input is sparse param_attr: Parameters for this layer - data_type: The type of data : float32, float_16, int etc + dtype: The type of data : float32, float_16, int etc main_program: Name of the main program that calls this startup_program: Name of the startup program @@ -145,9 +145,9 @@ def embedding(input, w = helper.create_parameter( attr=helper.param_attr, shape=size, - dtype=data_type, + dtype=dtype, initializer=param_initializer or _get_default_param_initializer()) - tmp = helper.create_tmp_variable(data_type) + tmp = helper.create_tmp_variable(dtype) helper.append_op( type='lookup_table', inputs={'Ids': input, @@ -167,23 +167,23 @@ def dynamic_lstm(input, gate_activation='sigmoid', cell_activation='tanh', candidate_activation='tanh', - data_type='float32', + dtype='float32', main_program=None, startup_program=None): helper = LayerHelper('lstm', **locals()) size = size / 4 weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, 4 * size], dtype=data_type) + attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype) bias_size = [1, 7 * size] if not use_peepholes: bias_size[1] = 4 * size bias = helper.create_parameter( - attr=helper.bias_attr, shape=bias_size, dtype=data_type, suffix='b') + attr=helper.bias_attr, shape=bias_size, dtype=dtype, suffix='b') - hidden = helper.create_tmp_variable(data_type) - cell = helper.create_tmp_variable(data_type) - batch_gate = helper.create_tmp_variable(data_type) - batch_cell_pre_act = helper.create_tmp_variable(data_type) + hidden = helper.create_tmp_variable(dtype) + cell = helper.create_tmp_variable(dtype) + batch_gate = helper.create_tmp_variable(dtype) + batch_cell_pre_act = helper.create_tmp_variable(dtype) helper.append_op( type='lstm', @@ -209,7 +209,7 @@ def dynamic_lstm(input, def data(name, shape, append_batch_size=True, - data_type='float32', + dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR, main_program=None, startup_program=None, @@ -221,7 +221,7 @@ def data(name, name: The name/alias of the function shape: Tuple declaring the shape. append_batch_size: Whether or not to append the data as a batch. - data_type: The type of data : float32, float_16, int etc + dtype: The type of data : float32, float_16, int etc type: The output type. By default it is LOD_TENSOR. main_program: Name of the main program that calls this startup_program: Name of the startup program @@ -251,7 +251,7 @@ def data(name, return helper.create_global_variable( name=name, shape=shape, - dtype=data_type, + dtype=dtype, type=type, stop_gradient=stop_gradient) @@ -362,9 +362,9 @@ def _create_op_func_(op_type): o_name = not_intermediate_outputs[0].name intermediate_output_names = [output.name for output in intermediate_outputs] - def infer_and_check_data_type(op_proto, **kwargs): + def infer_and_check_dtype(op_proto, **kwargs): """ - This function performs the sanity check for data_type and + This function performs the sanity check for dtype and instance type. """ dtype = None @@ -379,8 +379,8 @@ def _create_op_func_(op_type): op_type)) if dtype is None: - dtype = each.data_type - elif dtype != each.data_type: + dtype = each.dtype + elif dtype != each.dtype: raise ValueError( "operator {0} must input same dtype".format(op_type)) @@ -389,7 +389,7 @@ def _create_op_func_(op_type): def func(**kwargs): helper = LayerHelper(op_type, **kwargs) - dtype = infer_and_check_data_type(op_proto, **kwargs) + dtype = infer_and_check_dtype(op_proto, **kwargs) inputs = dict() for ipt in op_proto.inputs: @@ -426,19 +426,19 @@ _create_op_func_('reshape') _create_op_func_('transpose') -def cast(x, data_type, main_program=None): +def cast(x, dtype, main_program=None): """ - This function takes in the input with input_data_type - and casts it to the output_data_type as the output. + This function takes in the input with input_dtype + and casts it to the output_dtype as the output. """ helper = LayerHelper('cast', **locals()) - out = helper.create_tmp_variable(dtype=data_type) + out = helper.create_tmp_variable(dtype=dtype) helper.append_op( type='cast', inputs={'X': [x]}, outputs={'Out': [out]}, - attrs={'in_data_type': x.data_type, - 'out_data_type': out.data_type}) + attrs={'in_dtype': x.dtype, + 'out_dtype': out.dtype}) return out @@ -519,8 +519,8 @@ def split_lod_tensor(input, main_program=None, startup_program=None): helper = LayerHelper('split_lod_tensor', **locals()) - out_true = helper.create_tmp_variable(dtype=input.data_type) - out_false = helper.create_tmp_variable(dtype=input.data_type) + out_true = helper.create_tmp_variable(dtype=input.dtype) + out_false = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='split_lod_tensor', inputs={ @@ -541,7 +541,7 @@ def merge_lod_tensor(in_true, main_program=None, startup_program=None): helper = LayerHelper('merge_lod_tensor', **locals()) - out = helper.create_tmp_variable(dtype=in_true.data_type) + out = helper.create_tmp_variable(dtype=in_true.dtype) helper.append_op( type='merge_lod_tensor', inputs={'X': x, @@ -559,9 +559,9 @@ def cos_sim(X, Y, **kwargs): X and Y and returns that as the output. """ helper = LayerHelper('cos_sim', **kwargs) - out = helper.create_tmp_variable(dtype=X.data_type) - xnorm = helper.create_tmp_variable(dtype=X.data_type) - ynorm = helper.create_tmp_variable(dtype=X.data_type) + out = helper.create_tmp_variable(dtype=X.dtype) + xnorm = helper.create_tmp_variable(dtype=X.dtype) + ynorm = helper.create_tmp_variable(dtype=X.dtype) helper.append_op( type='cos_sim', inputs={'X': [X], @@ -577,7 +577,7 @@ def cross_entropy(input, label, **kwargs): This function computes cross_entropy using the input and label. """ helper = LayerHelper('cross_entropy', **kwargs) - out = helper.create_tmp_variable(dtype=input.data_type) + out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='cross_entropy', inputs={'X': [input], @@ -593,14 +593,14 @@ def square_error_cost(input, label, **kwargs): The output is appending the op to do the above. """ helper = LayerHelper('square_error_cost', **kwargs) - minus_out = helper.create_tmp_variable(dtype=input.data_type) + minus_out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='elementwise_sub', inputs={'X': [input], 'Y': [label]}, outputs={'Out': [minus_out]}) - square_out = helper.create_tmp_variable(dtype=input.data_type) + square_out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='square', inputs={'X': [minus_out]}, outputs={'Y': [square_out]}) return square_out @@ -612,7 +612,7 @@ def accuracy(input, label, k=1, **kwargs): The output is the top_k inputs and their indices. """ helper = LayerHelper("accuracy", **kwargs) - topk_out = helper.create_tmp_variable(dtype=input.data_type) + topk_out = helper.create_tmp_variable(dtype=input.dtype) topk_indices = helper.create_tmp_variable(dtype="int64") helper.append_op( type="top_k", @@ -883,12 +883,12 @@ def batch_norm(input, initializer=ConstantInitializer(0.0)) mean = helper.create_global_variable( - dtype=input.data_type, shape=param_shape, persistable=True) + dtype=input.dtype, shape=param_shape, persistable=True) helper.set_variable_initializer( var=mean, initializer=ConstantInitializer(0.0)) variance = helper.create_global_variable( - dtype=input.data_type, shape=param_shape, persistable=True) + dtype=input.dtype, shape=param_shape, persistable=True) helper.set_variable_initializer( var=variance, initializer=ConstantInitializer(1.0)) @@ -927,8 +927,8 @@ def batch_norm(input, def beam_search_decode(ids, scores, main_program=None, startup_program=None): helper = LayerHelper('beam_search_decode', **locals()) - sentence_ids = helper.create_tmp_variable(dtype=ids.data_type) - sentence_scores = helper.create_tmp_variable(dtype=ids.data_type) + sentence_ids = helper.create_tmp_variable(dtype=ids.dtype) + sentence_scores = helper.create_tmp_variable(dtype=ids.dtype) helper.append_op( type="beam_search_decode", @@ -1066,7 +1066,7 @@ class StaticRNN(object): boot_var = parent_block.create_var( name=var_name, shape=shape, - dtype=batch_ref.data_type, + dtype=batch_ref.dtype, persistable=False) parent_block.append_op( @@ -1076,7 +1076,7 @@ class StaticRNN(object): attrs={ 'value': init_value, 'shape': boot_var.shape, - 'data_type': boot_var.data_type, + 'dtype': boot_var.dtype, 'input_dim_idx': ref_batch_dim_idx, 'output_dim_idx': init_batch_dim_idx }) @@ -1085,7 +1085,7 @@ class StaticRNN(object): else: pre_mem = self.helper.create_variable( name=unique_name("@".join([self.helper.name, "mem"])), - dtype=init.data_type, + dtype=init.dtype, shape=init.shape) self.memories[pre_mem.name] = StaticRNNMemoryLink( init=init, pre_mem=pre_mem) @@ -1101,10 +1101,7 @@ class StaticRNN(object): raise ValueError("Static RNN only take fix seq_len input") ipt = self.helper.create_variable( - name=x.name, - dtype=x.data_type, - shape=list(x.shape[1:]), - type=x.type) + name=x.name, dtype=x.dtype, shape=list(x.shape[1:]), type=x.type) self.inputs.append(ipt) return ipt @@ -1113,17 +1110,17 @@ class StaticRNN(object): if not isinstance(o, Variable): raise TypeError("step output takes a Variable") - tmp_o = self.helper.create_tmp_variable(dtype=o.data_type) + tmp_o = self.helper.create_tmp_variable(dtype=o.dtype) self.helper.append_op( type='rnn_memory_helper', inputs={'X': [o]}, outputs={'Out': tmp_o}, - attrs={'data_type': o.data_type}) + attrs={'dtype': o.dtype}) out_var = self.parent_block().create_var( name=tmp_o.name, shape=[self.seq_len] + list(tmp_o.shape), - dtype=tmp_o.data_type) + dtype=tmp_o.dtype) self.outputs.append(out_var) @@ -1195,13 +1192,13 @@ class StaticRNN(object): pre_memories.append(mem.pre_mem.name) mem_var = rnn_block.var(mem.mem.name) assert isinstance(mem_var, Variable) - new_mem = self.helper.create_tmp_variable(dtype=mem_var.data_type) + new_mem = self.helper.create_tmp_variable(dtype=mem_var.dtype) rnn_block.append_op( type='rnn_memory_helper', inputs={'X': [mem_var]}, outputs={'Out': [new_mem]}, - attrs={'data_type': mem_var.data_type}) + attrs={'dtype': mem_var.dtype}) memories.append(new_mem.name) @@ -1251,7 +1248,7 @@ class While(object): if not isinstance(cond, Variable): raise TypeError("condition should be a variable") assert isinstance(cond, Variable) - if cond.data_type != core.DataType.BOOL: + if cond.dtype != core.DataType.BOOL: raise TypeError("condition should be a bool variable") if reduce(lambda a, b: a * b, cond.shape, 1) != 1: raise TypeError("condition should be a bool scalar") @@ -1323,9 +1320,9 @@ def lstm(x, main_program=main_program, startup_program=startup_program) - data_type = x.data_type - c = helper.create_tmp_variable(data_type) - h = helper.create_tmp_variable(data_type) + dtype = x.dtype + c = helper.create_tmp_variable(dtype) + h = helper.create_tmp_variable(dtype) helper.append_op( type='lstm_unit', @@ -1367,7 +1364,7 @@ def lod_tensor_to_array(x, table, main_program=None): array = helper.create_variable( name=unique_name("lod_tensor_to_array"), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, - dtype=x.data_type) + dtype=x.dtype) helper.append_op( type='lod_tensor_to_array', inputs={'X': x, @@ -1382,7 +1379,7 @@ def array_to_lod_tensor(x, table, main_program=None): LOD_Tensor. """ helper = LayerHelper("array_to_lod_tensor", **locals()) - tmp = helper.create_tmp_variable(dtype=x.data_type) + tmp = helper.create_tmp_variable(dtype=x.dtype) helper.append_op( type="array_to_lod_tensor", inputs={'X': x, @@ -1394,7 +1391,7 @@ def array_to_lod_tensor(x, table, main_program=None): def fill_constant(shape, dtype, value, main_program=None, startup_program=None): """ This function creates a tensor , with shape as mentioned in the input and - specified data_type and fills this up with a constant value that + specified dtype and fills this up with a constant value that comes in the input. It also sets the stop_gradient to be True. """ helper = LayerHelper("fill_constant", **locals()) @@ -1403,11 +1400,9 @@ def fill_constant(shape, dtype, value, main_program=None, startup_program=None): type='fill_constant', inputs={}, outputs={'Out': [out]}, - attrs={ - 'shape': shape, - 'data_type': out.data_type, - 'value': float(value) - }) + attrs={'shape': shape, + 'dtype': out.dtype, + 'value': float(value)}) out.stop_gradient = True return out @@ -1428,7 +1423,7 @@ def fill_constant_batch_size_like(input, outputs={'Out': [out]}, attrs={ 'shape': shape, - 'data_type': out.data_type, + 'dtype': out.dtype, 'value': float(value), 'input_dim_idx': input_dim_idx, 'output_dim_idx': output_dim_idx @@ -1461,7 +1456,7 @@ def increment(x, value=1.0, in_place=True, main_program=None): """ helper = LayerHelper("increment", **locals()) if not in_place: - out = helper.create_tmp_variable(dtype=x.data_type) + out = helper.create_tmp_variable(dtype=x.dtype) else: out = x helper.append_op( @@ -1482,7 +1477,7 @@ def array_write(x, i, array=None, main_program=None): array = helper.create_variable( name="{0}.out".format(helper.name), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, - dtype=x.data_type) + dtype=x.dtype) helper.append_op( type='write_to_array', inputs={'X': [x], @@ -1521,7 +1516,7 @@ def array_read(array, i, main_program=None): array, Variable) or array.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY: raise TypeError("array should be tensor array vairable") - out = helper.create_tmp_variable(dtype=array.data_type) + out = helper.create_tmp_variable(dtype=array.dtype) helper.append_op( type='read_from_array', inputs={'X': [array], @@ -1536,7 +1531,7 @@ def shrink_memory(x, i, table, main_program=None): as mentioned in the input parameter. """ helper = LayerHelper('shrink_memory', **locals()) - out = helper.create_tmp_variable(dtype=x.data_type) + out = helper.create_tmp_variable(dtype=x.dtype) helper.append_op( type='shrink_rnn_memory', inputs={'X': [x], @@ -1698,11 +1693,11 @@ class IfElse(object): parent_block = self.parent_block() out_true = parent_block.create_var( name=unique_name('ifelse_input' + self.helper.name), - dtype=x.data_type) + dtype=x.dtype) out_false = parent_block.create_var( name=unique_name('ifelse_input' + self.helper.name), - dtype=x.data_type) + dtype=x.dtype) parent_block.append_op( type='split_lod_tensor', inputs={ @@ -1744,7 +1739,7 @@ class IfElse(object): # create outside tensor outside_out = parent_block.create_var( name=unique_name("_".join([self.helper.name, 'output'])), - dtype=each_out.data_type) + dtype=each_out.dtype) out_table.append(outside_out) # assign local var to outside diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 87a478c290..e82f0f060d 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -92,7 +92,7 @@ class Optimizer(object): var = self.helper.create_global_variable( name=unique_name(name), persistable=True, - dtype=dtype or param.data_type, + dtype=dtype or param.dtype, type=param.type, shape=param.shape) self.helper.set_variable_initializer( @@ -202,7 +202,7 @@ class Optimizer(object): """ params_grads = append_backward_ops(loss, parameter_list, no_grad_set or set()) - # Add regularization if any + # Add regularization if any params_grads = append_regularization_ops(params_grads) optimize_ops = self.create_optimization_pass(params_grads, loss, startup_program) diff --git a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py index a7f3bfc0ca..a899f1088d 100644 --- a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py @@ -7,11 +7,11 @@ from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.io import save_persistables, load_persistables from paddle.v2.fluid.optimizer import SGDOptimizer -x = layers.data(name='x', shape=[13], data_type='float32') +x = layers.data(name='x', shape=[13], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) -y = layers.data(name='y', shape=[1], data_type='float32') +y = layers.data(name='y', shape=[1], dtype='float32') cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(x=cost) diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py index efe63a68f0..a3acab67ce 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -90,8 +90,8 @@ def vgg16_bn_drop(input): classdim = 10 data_shape = [3, 32, 32] -images = layers.data(name='pixel', shape=data_shape, data_type='float32') -label = layers.data(name='label', shape=[1], data_type='int64') +images = layers.data(name='pixel', shape=data_shape, dtype='float32') +label = layers.data(name='label', shape=[1], dtype='int64') # Add neural network config # option 1. resnet diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index f66e6e748b..9c9064ba96 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -34,26 +34,26 @@ def load_parameter(file_name, h, w): def db_lstm(): # 8 features - word = layers.data(name='word_data', shape=[1], data_type='int64') - predicate = layers.data(name='verb_data', shape=[1], data_type='int64') - ctx_n2 = layers.data(name='ctx_n2_data', shape=[1], data_type='int64') - ctx_n1 = layers.data(name='ctx_n1_data', shape=[1], data_type='int64') - ctx_0 = layers.data(name='ctx_0_data', shape=[1], data_type='int64') - ctx_p1 = layers.data(name='ctx_p1_data', shape=[1], data_type='int64') - ctx_p2 = layers.data(name='ctx_p2_data', shape=[1], data_type='int64') - mark = layers.data(name='mark_data', shape=[1], data_type='int64') + word = layers.data(name='word_data', shape=[1], dtype='int64') + predicate = layers.data(name='verb_data', shape=[1], dtype='int64') + ctx_n2 = layers.data(name='ctx_n2_data', shape=[1], dtype='int64') + ctx_n1 = layers.data(name='ctx_n1_data', shape=[1], dtype='int64') + ctx_0 = layers.data(name='ctx_0_data', shape=[1], dtype='int64') + ctx_p1 = layers.data(name='ctx_p1_data', shape=[1], dtype='int64') + ctx_p2 = layers.data(name='ctx_p2_data', shape=[1], dtype='int64') + mark = layers.data(name='mark_data', shape=[1], dtype='int64') predicate_embedding = layers.embedding( input=predicate, size=[pred_len, word_dim], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'vemb'}) mark_embedding = layers.embedding( input=mark, size=[mark_dict_len, mark_dim], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE) word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] @@ -125,7 +125,7 @@ def to_lodtensor(data, place): def main(): # define network topology feature_out = db_lstm() - target = layers.data(name='target', shape=[1], data_type='int64') + target = layers.data(name='target', shape=[1], dtype='int64') crf_cost = layers.linear_chain_crf( input=feature_out, label=target, diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py index 8f73768960..0bea5f95c8 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py @@ -8,8 +8,8 @@ import paddle.v2.fluid.nets as nets from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.optimizer import AdamOptimizer -images = layers.data(name='pixel', shape=[1, 28, 28], data_type='float32') -label = layers.data(name='label', shape=[1], data_type='int64') +images = layers.data(name='pixel', shape=[1, 28, 28], dtype='float32') +label = layers.data(name='label', shape=[1], dtype='int64') conv_pool_1 = nets.simple_img_conv_pool( input=images, filter_size=5, diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index e42e4c9cc0..03d3881549 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -10,7 +10,7 @@ from paddle.v2.fluid.optimizer import MomentumOptimizer from paddle.v2.fluid.regularizer import L2DecayRegularizer BATCH_SIZE = 128 -image = layers.data(name='x', shape=[784], data_type='float32') +image = layers.data(name='x', shape=[784], dtype='float32') param_attr = { 'name': None, @@ -27,7 +27,7 @@ predict = layers.fc(input=hidden2, act='softmax', param_attr=param_attr) -label = layers.data(name='y', shape=[1], data_type='int64') +label = layers.data(name='y', shape=[1], dtype='int64') cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py index 55ded3aed3..f8dc151857 100644 --- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -18,11 +18,11 @@ def get_usr_combined_features(): USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 - uid = layers.data(name='user_id', shape=[1], data_type='int64') + uid = layers.data(name='user_id', shape=[1], dtype='int64') usr_emb = layers.embedding( input=uid, - data_type='float32', + dtype='float32', size=[USR_DICT_SIZE, 32], param_attr={'name': 'user_table'}, is_sparse=IS_SPARSE) @@ -31,7 +31,7 @@ def get_usr_combined_features(): USR_GENDER_DICT_SIZE = 2 - usr_gender_id = layers.data(name='gender_id', shape=[1], data_type='int64') + usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') usr_gender_emb = layers.embedding( input=usr_gender_id, @@ -42,7 +42,7 @@ def get_usr_combined_features(): usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) - usr_age_id = layers.data(name='age_id', shape=[1], data_type="int64") + usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") usr_age_emb = layers.embedding( input=usr_age_id, @@ -53,7 +53,7 @@ def get_usr_combined_features(): usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 - usr_job_id = layers.data(name='job_id', shape=[1], data_type="int64") + usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") usr_job_emb = layers.embedding( input=usr_job_id, @@ -75,11 +75,11 @@ def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 - mov_id = layers.data(name='movie_id', shape=[1], data_type='int64') + mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_emb = layers.embedding( input=mov_id, - data_type='float32', + dtype='float32', size=[MOV_DICT_SIZE, 32], param_attr={'name': 'movie_table'}, is_sparse=IS_SPARSE) @@ -88,7 +88,7 @@ def get_mov_combined_features(): CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data(name='category_id', shape=[1], data_type='int64') + category_id = layers.data(name='category_id', shape=[1], dtype='int64') mov_categories_emb = layers.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) @@ -98,7 +98,7 @@ def get_mov_combined_features(): MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data(name='movie_title', shape=[1], data_type='int64') + mov_title_id = layers.data(name='movie_title', shape=[1], dtype='int64') mov_title_emb = layers.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) @@ -126,7 +126,7 @@ def model(): # need cos sim inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features) - label = layers.data(name='score', shape=[1], data_type='float32') + label = layers.data(name='score', shape=[1], dtype='float32') square_cost = layers.square_error_cost(input=inference, label=label) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py index 4929f7cf61..3103be83a6 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py @@ -10,8 +10,8 @@ from paddle.v2.fluid.optimizer import AdamOptimizer def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32): - data = layers.data(name="words", shape=[1], data_type="int64") - label = layers.data(name="label", shape=[1], data_type="int64") + data = layers.data(name="words", shape=[1], dtype="int64") + label = layers.data(name="label", shape=[1], dtype="int64") emb = layers.embedding(input=data, size=[input_dim, emb_dim]) conv_3 = nets.sequence_conv_pool( diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py index b3ee919388..208978224f 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py @@ -14,8 +14,8 @@ def stacked_lstm_net(input_dim, hid_dim=512, stacked_num=3): assert stacked_num % 2 == 1 - data = layers.data(name="words", shape=[1], data_type="int64") - label = layers.data(name="label", shape=[1], data_type="int64") + data = layers.data(name="words", shape=[1], dtype="int64") + label = layers.data(name="label", shape=[1], dtype="int64") emb = layers.embedding(input=data, size=[input_dim, emb_dim]) # add bias attr diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py index 9a51a2f207..8aebeba653 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py @@ -12,19 +12,19 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): name="words", shape=[seq_len * batch_size, 1], append_batch_size=False, - data_type="int64") + dtype="int64") label = layers.data( name="label", shape=[batch_size, 1], append_batch_size=False, - data_type="int64") + dtype="int64") emb = layers.embedding(input=data, size=[dict_dim, emb_dim]) emb = layers.reshape(x=emb, shape=[batch_size, seq_len, emb_dim]) emb = layers.transpose(x=emb, axis=[1, 0, 2]) c_pre_init = layers.fill_constant( - dtype=emb.data_type, shape=[batch_size, emb_dim], value=0.0) + dtype=emb.dtype, shape=[batch_size, emb_dim], value=0.0) layer_1_out = layers.lstm(emb, c_pre_init=c_pre_init, hidden_dim=emb_dim) layer_1_out = layers.transpose(x=layer_1_out, axis=[1, 0, 2]) diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index afa7b28519..0629e1cab7 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -16,34 +16,34 @@ IS_SPARSE = True word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) -first_word = layers.data(name='firstw', shape=[1], data_type='int64') -second_word = layers.data(name='secondw', shape=[1], data_type='int64') -third_word = layers.data(name='thirdw', shape=[1], data_type='int64') -forth_word = layers.data(name='forthw', shape=[1], data_type='int64') -next_word = layers.data(name='nextw', shape=[1], data_type='int64') +first_word = layers.data(name='firstw', shape=[1], dtype='int64') +second_word = layers.data(name='secondw', shape=[1], dtype='int64') +third_word = layers.data(name='thirdw', shape=[1], dtype='int64') +forth_word = layers.data(name='forthw', shape=[1], dtype='int64') +next_word = layers.data(name='nextw', shape=[1], dtype='int64') embed_first = layers.embedding( input=first_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) embed_second = layers.embedding( input=second_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) embed_third = layers.embedding( input=third_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) embed_forth = layers.embedding( input=forth_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) diff --git a/python/paddle/v2/fluid/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py index 90269e308a..51023bd19a 100644 --- a/python/paddle/v2/fluid/tests/op_test.py +++ b/python/paddle/v2/fluid/tests/op_test.py @@ -458,7 +458,7 @@ class OpTest(unittest.TestCase): mean_inputs = map(block.var, output_names) if len(mean_inputs) == 1: - loss = block.create_var(dtype=mean_inputs[0].data_type, shape=[1]) + loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[1]) op = block.append_op( inputs={"X": mean_inputs}, outputs={"Out": loss}, type='mean') op.desc.infer_var_type(block.desc) @@ -466,8 +466,7 @@ class OpTest(unittest.TestCase): else: avg_sum = [] for cur_loss in mean_inputs: - cur_avg_loss = block.create_var( - dtype=cur_loss.data_type, shape=[1]) + cur_avg_loss = block.create_var(dtype=cur_loss.dtype, shape=[1]) op = block.append_op( inputs={"X": [cur_loss]}, outputs={"Out": [cur_avg_loss]}, @@ -476,13 +475,13 @@ class OpTest(unittest.TestCase): op.desc.infer_shape(block.desc) avg_sum.append(cur_avg_loss) - loss_sum = block.create_var(dtype=avg_sum[0].data_type, shape=[1]) + loss_sum = block.create_var(dtype=avg_sum[0].dtype, shape=[1]) op_sum = block.append_op( inputs={"X": avg_sum}, outputs={"Out": loss_sum}, type='sum') op_sum.desc.infer_var_type(block.desc) op_sum.desc.infer_shape(block.desc) - loss = block.create_var(dtype=loss_sum.data_type, shape=[1]) + loss = block.create_var(dtype=loss_sum.dtype, shape=[1]) op_loss = block.append_op( inputs={"X": loss_sum}, outputs={"Out": loss}, diff --git a/python/paddle/v2/fluid/tests/test_cast_op.py b/python/paddle/v2/fluid/tests/test_cast_op.py index 0c4b631065..4e431bb88d 100644 --- a/python/paddle/v2/fluid/tests/test_cast_op.py +++ b/python/paddle/v2/fluid/tests/test_cast_op.py @@ -10,8 +10,8 @@ class TestCastOp(op_test.OpTest): self.inputs = {'X': ipt.astype('float32')} self.outputs = {'Out': ipt.astype('float64')} self.attrs = { - 'in_data_type': int(core.DataType.FP32), - 'out_data_type': int(core.DataType.FP64) + 'in_dtype': int(core.DataType.FP32), + 'out_dtype': int(core.DataType.FP64) } self.op_type = 'cast' diff --git a/python/paddle/v2/fluid/tests/test_conditional_block.py b/python/paddle/v2/fluid/tests/test_conditional_block.py index 293803f004..2a30fd1079 100644 --- a/python/paddle/v2/fluid/tests/test_conditional_block.py +++ b/python/paddle/v2/fluid/tests/test_conditional_block.py @@ -9,7 +9,7 @@ import numpy class ConditionalBlock(unittest.TestCase): def test_forward(self): - data = layers.data(name='X', shape=[1], data_type='float32') + data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = layers.ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') diff --git a/python/paddle/v2/fluid/tests/test_executor_and_mul.py b/python/paddle/v2/fluid/tests/test_executor_and_mul.py index 709250d0c8..da64739de5 100644 --- a/python/paddle/v2/fluid/tests/test_executor_and_mul.py +++ b/python/paddle/v2/fluid/tests/test_executor_and_mul.py @@ -8,11 +8,11 @@ import numpy class TestExecutor(unittest.TestCase): def test_mul(self): - a = data(name='a', shape=[784], data_type='float32') + a = data(name='a', shape=[784], dtype='float32') b = data( name='b', shape=[784, 100], - data_type='float32', + dtype='float32', append_batch_size=False) out = mul(x=a, y=b) place = core.CPUPlace() diff --git a/python/paddle/v2/fluid/tests/test_image_classification_layer.py b/python/paddle/v2/fluid/tests/test_image_classification_layer.py index bf5444107f..8e8e1b0a8c 100644 --- a/python/paddle/v2/fluid/tests/test_image_classification_layer.py +++ b/python/paddle/v2/fluid/tests/test_image_classification_layer.py @@ -32,7 +32,7 @@ class TestLayer(unittest.TestCase): images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program) layers.batch_norm( input=images, @@ -47,7 +47,7 @@ class TestLayer(unittest.TestCase): images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program) layers.dropout( x=images, @@ -64,7 +64,7 @@ class TestLayer(unittest.TestCase): images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program, startup_program=startup_program) conv1 = conv_block(images, 64, 2, [0.3, 0], main_program, @@ -80,13 +80,13 @@ class TestLayer(unittest.TestCase): image1 = layers.data( name='pixel1', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program, startup_program=startup_program) image2 = layers.data( name='pixel2', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program, startup_program=startup_program) out = layers.elementwise_add( diff --git a/python/paddle/v2/fluid/tests/test_inference_model_io.py b/python/paddle/v2/fluid/tests/test_inference_model_io.py index 98b95713b7..74f1ce2326 100644 --- a/python/paddle/v2/fluid/tests/test_inference_model_io.py +++ b/python/paddle/v2/fluid/tests/test_inference_model_io.py @@ -19,13 +19,13 @@ class TestBook(unittest.TestCase): x = layers.data( name='x', shape=[2], - data_type='float32', + dtype='float32', main_program=program, startup_program=init_program) y = layers.data( name='y', shape=[1], - data_type='float32', + dtype='float32', main_program=program, startup_program=init_program) diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index f88e0b4e15..87dc6d1a62 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -9,11 +9,11 @@ class TestBook(unittest.TestCase): def test_fit_a_line(self): program = Program() x = layers.data( - name='x', shape=[13], data_type='float32', main_program=program) + name='x', shape=[13], dtype='float32', main_program=program) y_predict = layers.fc(input=x, size=1, act=None, main_program=program) y = layers.data( - name='y', shape=[1], data_type='float32', main_program=program) + name='y', shape=[1], dtype='float32', main_program=program) cost = layers.square_error_cost( input=y_predict, label=y, main_program=program) @@ -28,12 +28,9 @@ class TestBook(unittest.TestCase): # Change g_program, so the rest layers use `g_program` images = layers.data( - name='pixel', - shape=[784], - data_type='float32', - main_program=program) + name='pixel', shape=[784], dtype='float32', main_program=program) label = layers.data( - name='label', shape=[1], data_type='int32', main_program=program) + name='label', shape=[1], dtype='int32', main_program=program) hidden1 = layers.fc(input=images, size=128, act='relu', @@ -58,7 +55,7 @@ class TestBook(unittest.TestCase): images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='int32', + dtype='int32', main_program=program) layers.conv2d( input=images, @@ -74,10 +71,10 @@ class TestBook(unittest.TestCase): images = layers.data( name='pixel', shape=[1, 28, 28], - data_type='float32', + dtype='float32', main_program=program) label = layers.data( - name='label', shape=[1], data_type='int32', main_program=program) + name='label', shape=[1], dtype='int32', main_program=program) conv_pool_1 = nets.simple_img_conv_pool( input=images, filter_size=5, @@ -112,39 +109,39 @@ class TestBook(unittest.TestCase): dict_size = 10000 embed_size = 32 first_word = layers.data( - name='firstw', shape=[1], data_type='int64', main_program=program) + name='firstw', shape=[1], dtype='int64', main_program=program) second_word = layers.data( - name='secondw', shape=[1], data_type='int64', main_program=program) + name='secondw', shape=[1], dtype='int64', main_program=program) third_word = layers.data( - name='thirdw', shape=[1], data_type='int64', main_program=program) + name='thirdw', shape=[1], dtype='int64', main_program=program) forth_word = layers.data( - name='forthw', shape=[1], data_type='int64', main_program=program) + name='forthw', shape=[1], dtype='int64', main_program=program) next_word = layers.data( - name='nextw', shape=[1], data_type='int64', main_program=program) + name='nextw', shape=[1], dtype='int64', main_program=program) embed_first = layers.embedding( input=first_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) embed_second = layers.embedding( input=second_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) embed_third = layers.embedding( input=third_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) embed_forth = layers.embedding( input=forth_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) @@ -173,12 +170,9 @@ class TestBook(unittest.TestCase): # Change g_program, so the rest layers use `g_program` images = layers.data( - name='pixel', - shape=[784], - data_type='float32', - main_program=program) + name='pixel', shape=[784], dtype='float32', main_program=program) label = layers.data( - name='label', shape=[1], data_type='int32', main_program=program) + name='label', shape=[1], dtype='int32', main_program=program) hidden = layers.fc(input=images, size=128, main_program=program) crf = layers.linear_chain_crf( input=hidden, label=label, main_program=program) diff --git a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py index b18cb6b49f..16e64b8cd5 100644 --- a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py +++ b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py @@ -132,7 +132,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): x = layers.data( name='x', shape=[1], - data_type='float32', + dtype='float32', main_program=program, stop_gradient=False) table = layers.lod_rank_table(x, level=0, main_program=program) diff --git a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py index 8af99005dc..e76357a5be 100644 --- a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py +++ b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py @@ -11,10 +11,9 @@ import numpy as np class TestMNISTIfElseOp(unittest.TestCase): def test_raw_api(self): kwargs = {'startup_program': Program(), 'main_program': Program()} - image = layers.data( - name='x', shape=[784], data_type='float32', **kwargs) + image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) - label = layers.data(name='y', shape=[1], data_type='int64', **kwargs) + label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0, **kwargs) @@ -84,10 +83,9 @@ class TestMNISTIfElseOp(unittest.TestCase): def test_ifelse(self): kwargs = {'startup_program': Program(), 'main_program': Program()} - image = layers.data( - name='x', shape=[784], data_type='float32', **kwargs) + image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) - label = layers.data(name='y', shape=[1], data_type='int64', **kwargs) + label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0, **kwargs) diff --git a/python/paddle/v2/fluid/tests/test_parameter.py b/python/paddle/v2/fluid/tests/test_parameter.py index a633d22c2b..d467e4bbb7 100644 --- a/python/paddle/v2/fluid/tests/test_parameter.py +++ b/python/paddle/v2/fluid/tests/test_parameter.py @@ -20,7 +20,7 @@ class TestParameter(unittest.TestCase): self.assertIsNotNone(param) self.assertEqual('fc.w', param.name) self.assertEqual((784, 100), param.shape) - self.assertEqual(core.DataType.FP32, param.data_type) + self.assertEqual(core.DataType.FP32, param.dtype) self.assertEqual(0, param.block.idx) exe = Executor(core.CPUPlace()) p = exe.run(g_main_program, fetch_list=[param])[0] diff --git a/python/paddle/v2/fluid/tests/test_protobuf_descs.py b/python/paddle/v2/fluid/tests/test_protobuf_descs.py index 098a9802df..d8abe17606 100644 --- a/python/paddle/v2/fluid/tests/test_protobuf_descs.py +++ b/python/paddle/v2/fluid/tests/test_protobuf_descs.py @@ -101,13 +101,13 @@ class TestVarDesc(unittest.TestCase): self.assertEqual(src_shape, res_shape) self.assertEqual(core.VarDesc.VarType.SELECTED_ROWS, var.type()) - def test_data_type(self): + def test_dtype(self): program_desc = core.ProgramDesc() block = program_desc.block(0) var = block.var('my_var') var.set_type(core.VarDesc.VarType.LOD_TENSOR) - var.set_data_type(core.DataType.INT32) - self.assertEqual(core.DataType.INT32, var.data_type()) + var.set_dtype(core.DataType.INT32) + self.assertEqual(core.DataType.INT32, var.dtype()) self.assertEqual(core.VarDesc.VarType.LOD_TENSOR, var.type()) diff --git a/python/paddle/v2/fluid/tests/test_recurrent_op.py b/python/paddle/v2/fluid/tests/test_recurrent_op.py index b623d12318..88bcdc3e6a 100644 --- a/python/paddle/v2/fluid/tests/test_recurrent_op.py +++ b/python/paddle/v2/fluid/tests/test_recurrent_op.py @@ -118,14 +118,14 @@ class RecurrentOpTest1(unittest.TestCase): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data( shape=[self.input_dim], - data_type='float32', + dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False @@ -251,14 +251,14 @@ class RecurrentOpTest2(RecurrentOpTest1): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data( shape=[self.input_dim], - data_type='float32', + dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False @@ -350,21 +350,21 @@ class RecurrentOpMultipleMemoryTest(RecurrentOpTest1): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot1 = layers.data( shape=[self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='h_boot1', append_batch_size=False, **self.p_info) h_boot1.stop_gradient = False h_boot2 = layers.data( shape=[self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='h_boot2', append_batch_size=False, **self.p_info) @@ -435,7 +435,7 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) diff --git a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py index 1a3b88e18e..953629d610 100644 --- a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py +++ b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py @@ -9,7 +9,7 @@ import numpy class TestShrinkRNNMemory(unittest.TestCase): def test_shrink_rnn_memory(self): - x = layers.data('x', shape=[100], data_type='float32') + x = layers.data('x', shape=[100], dtype='float32') x.stop_gradient = False table = layers.lod_rank_table(x=x) i = layers.zeros(dtype='int64', shape=[1]) diff --git a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py index 3aed83b2ea..a98cb3bbab 100644 --- a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py +++ b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py @@ -123,13 +123,13 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): x = layers.data( name='x', shape=[1], - data_type='float32', + dtype='float32', main_program=program, stop_gradient=False) y = layers.data( name='y', shape=[1], - data_type='bool', + dtype='bool', main_program=program, stop_gradient=False) diff --git a/python/paddle/v2/fluid/tests/test_variable.py b/python/paddle/v2/fluid/tests/test_variable.py index c3e1f9ac0a..92ffdceb6c 100644 --- a/python/paddle/v2/fluid/tests/test_variable.py +++ b/python/paddle/v2/fluid/tests/test_variable.py @@ -22,13 +22,13 @@ class TestVariable(unittest.TestCase): w = b.create_var( dtype="float64", shape=[784, 100], lod_level=0, name="fc.w") self.assertNotEqual(str(w), "") - self.assertEqual(core.DataType.FP64, w.data_type) + self.assertEqual(core.DataType.FP64, w.dtype) self.assertEqual((784, 100), w.shape) self.assertEqual("fc.w", w.name) self.assertEqual(0, w.lod_level) w = b.create_var(name='fc.w') - self.assertEqual(core.DataType.FP64, w.data_type) + self.assertEqual(core.DataType.FP64, w.dtype) self.assertEqual((784, 100), w.shape) self.assertEqual("fc.w", w.name) self.assertEqual(0, w.lod_level) diff --git a/python/paddle/v2/fluid/tests/test_while_op.py b/python/paddle/v2/fluid/tests/test_while_op.py index 84b432333f..fca0cdcc31 100644 --- a/python/paddle/v2/fluid/tests/test_while_op.py +++ b/python/paddle/v2/fluid/tests/test_while_op.py @@ -9,11 +9,11 @@ import numpy class TestWhileOp(unittest.TestCase): def test_simple_forward(self): d0 = layers.data( - "d0", shape=[10], append_batch_size=False, data_type='float32') + "d0", shape=[10], append_batch_size=False, dtype='float32') d1 = layers.data( - "d1", shape=[10], append_batch_size=False, data_type='float32') + "d1", shape=[10], append_batch_size=False, dtype='float32') d2 = layers.data( - "d2", shape=[10], append_batch_size=False, data_type='float32') + "d2", shape=[10], append_batch_size=False, dtype='float32') i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') From c9172c1cb30ec13a854b9a1c7d85ea8eeae19b30 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Fri, 24 Nov 2017 12:36:50 +0800 Subject: [PATCH 5/7] Make enforce target (#5889) * make enforce a target and dependent on nccl when gpu is enabled * add some more dependency --- paddle/memory/CMakeLists.txt | 2 +- paddle/platform/CMakeLists.txt | 15 ++++++++++----- paddle/platform/dynload/CMakeLists.txt | 2 +- paddle/platform/enforce.cc | 19 +++++++++++++++++++ paddle/platform/enforce.h | 2 -- 5 files changed, 31 insertions(+), 9 deletions(-) create mode 100644 paddle/platform/enforce.cc diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index aed5275dbf..8841c14ee0 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(detail) -cc_library(memory SRCS memory.cc DEPS place) +cc_library(memory SRCS memory.cc DEPS place enforce) cc_library(memcpy SRCS memcpy.cc) cc_library(paddle_memory diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index bd86a9fe26..88df28a966 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -1,15 +1,20 @@ -cc_library(cpu_info SRCS cpu_info.cc DEPS gflags glog) +if(WITH_GPU) + cc_library(enforce SRCS enforce.cc DEPS nccl) +else() + cc_library(enforce SRCS enforce.cc) +endif() +cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece enforce) + +cc_library(cpu_info SRCS cpu_info.cc DEPS gflags glog enforce) cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info) -nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog) +nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog enforce) -cc_library(place SRCS place.cc) +cc_library(place SRCS place.cc DEPS enforce) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) add_subdirectory(dynload) -cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece) - IF(WITH_GPU) set(GPU_CTX_DEPS dynload_cuda dynamic_loader) ELSE() diff --git a/paddle/platform/dynload/CMakeLists.txt b/paddle/platform/dynload/CMakeLists.txt index bb3fec1be9..f4fda65907 100644 --- a/paddle/platform/dynload/CMakeLists.txt +++ b/paddle/platform/dynload/CMakeLists.txt @@ -1,3 +1,3 @@ -cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) +cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc DEPS dynamic_loader nccl) diff --git a/paddle/platform/enforce.cc b/paddle/platform/enforce.cc new file mode 100644 index 0000000000..e8d31bc782 --- /dev/null +++ b/paddle/platform/enforce.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/enforce.h" + +namespace paddle { +namespace platform {} // namespace platform +} // namespace paddle diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index bfe708748a..415020ab96 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -49,7 +49,6 @@ limitations under the License. */ namespace paddle { namespace platform { -namespace { #ifdef __GNUC__ inline std::string demangle(std::string name) { int status = -4; // some arbitrary value to eliminate the compiler warning @@ -60,7 +59,6 @@ inline std::string demangle(std::string name) { #else inline std::string demangle(std::string name) { return name; } #endif -} struct EnforceNotMet : public std::exception { std::exception_ptr exp_; From 3a76062c8463d58bfeef7e4d6a4f899f79341989 Mon Sep 17 00:00:00 2001 From: QI JUN Date: Fri, 24 Nov 2017 13:18:13 +0800 Subject: [PATCH 6/7] support testing when training and handle dropout and batch_norm operator in testing mode (#5734) * is_training to is_test in dropout op * handle dropout and batch_norm operator when prune pdesc in testing mode * handle dropout and batch_norm operator when prune pdesc in testing mode * add get_inference_program method * fix dropout op * fix ci * test data after each batch training * refine code * refine test_book3 * fix ci * follow comments --- paddle/framework/executor.cc | 2 +- paddle/framework/prune.cc | 23 +++++++++++ paddle/framework/prune.h | 2 + paddle/operators/dropout_op.cc | 8 ++-- paddle/operators/dropout_op.cu | 2 +- paddle/operators/dropout_op.h | 6 +-- paddle/pybind/pybind.cc | 5 +++ python/paddle/v2/fluid/evaluator.py | 3 ++ python/paddle/v2/fluid/framework.py | 7 ++++ python/paddle/v2/fluid/io.py | 19 +++++++-- .../book/test_image_classification_train.py | 40 +++++++++++++++++-- .../tests/book/test_recognize_digits_mlp.py | 37 +++++++++++++++-- .../paddle/v2/fluid/tests/test_dropout_op.py | 10 ++--- 13 files changed, 141 insertions(+), 23 deletions(-) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index adedd8cb0e..2ffb5b7dbb 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -120,7 +120,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id, for (auto& op_desc : block.AllOps()) { auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); - VLOG(10) << op->DebugString(); + VLOG(3) << op->DebugString(); op->Run(*local_scope, *device); } if (create_local_scope) { diff --git a/paddle/framework/prune.cc b/paddle/framework/prune.cc index bf3066983c..da76052eb4 100644 --- a/paddle/framework/prune.cc +++ b/paddle/framework/prune.cc @@ -26,6 +26,8 @@ namespace framework { const std::string kFeedOpType = "feed"; const std::string kFetchOpType = "fetch"; +const std::string kDropOutOpType = "dropout"; +const std::string kBatchNormOpType = "batch_norm"; bool HasDependentVar(const OpDesc& op_desc, const std::set& dependent_vars) { @@ -106,5 +108,26 @@ void Prune(const ProgramDesc& input, ProgramDesc* output) { prune_impl(input, output, 0); } +void inference_optimize_impl(const ProgramDesc& input, ProgramDesc* output, + int block_id) { + *output = input; + auto* op_field = output->mutable_blocks(block_id)->mutable_ops(); + for (auto& op_desc : *op_field) { + if (op_desc.type() == kDropOutOpType || + op_desc.type() == kBatchNormOpType) { + for (auto& attr : *op_desc.mutable_attrs()) { + if (attr.name() == "is_test") { + attr.set_b(true); + break; + } + } + } + } +} + +void InferenceOptimize(const ProgramDesc& input, ProgramDesc* output) { + inference_optimize_impl(input, output, 0); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/prune.h b/paddle/framework/prune.h index 8cfb16343a..23db014894 100644 --- a/paddle/framework/prune.h +++ b/paddle/framework/prune.h @@ -22,5 +22,7 @@ namespace framework { void Prune(const ProgramDesc& input, ProgramDesc* output); +void InferenceOptimize(const ProgramDesc& input, ProgramDesc* output); + } // namespace framework } // namespace paddle diff --git a/paddle/operators/dropout_op.cc b/paddle/operators/dropout_op.cc index 818146aca7..932c0bf8fb 100644 --- a/paddle/operators/dropout_op.cc +++ b/paddle/operators/dropout_op.cc @@ -30,7 +30,7 @@ class DropoutOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", x_dims); - if (ctx->Attrs().Get("is_training") == true) { + if (ctx->Attrs().Get("is_test") == false) { ctx->SetOutputDim("Mask", x_dims); } ctx->ShareLoD("X", /*->*/ "Out"); @@ -49,7 +49,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("dropout_prob", "Probability of setting units to zero.") .SetDefault(.5f); - AddAttr("is_training", "True if in training phase.").SetDefault(true); + AddAttr("is_test", "True if in test phase.").SetDefault(false); AddAttr("seed", "Dropout random seed.").SetDefault(0); AddComment(R"DOC( @@ -71,8 +71,8 @@ class DropoutOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_training"), true, - "GradOp is only callable when is_training is true"); + PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_test"), false, + "GradOp is only callable when is_test is false"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasInput("Mask"), "Mask must not be null."); diff --git a/paddle/operators/dropout_op.cu b/paddle/operators/dropout_op.cu index 30c769000f..db3578b9bf 100644 --- a/paddle/operators/dropout_op.cu +++ b/paddle/operators/dropout_op.cu @@ -59,7 +59,7 @@ class GPUDropoutKernel : public framework::OpKernel { auto Y = EigenMatrix::Reshape(*y, 1); auto place = context.GetEigenDevice(); - if (context.Attr("is_training")) { + if (!context.Attr("is_test")) { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); int size = framework::product(mask->dims()); diff --git a/paddle/operators/dropout_op.h b/paddle/operators/dropout_op.h index 6000b75fec..d9a130fdc0 100644 --- a/paddle/operators/dropout_op.h +++ b/paddle/operators/dropout_op.h @@ -35,7 +35,7 @@ class CPUDropoutKernel : public framework::OpKernel { auto* y_data = y->mutable_data(context.GetPlace()); float dropout_prob = context.Attr("dropout_prob"); - if (context.Attr("is_training")) { + if (!context.Attr("is_test")) { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); int seed = context.Attr("seed"); @@ -65,8 +65,8 @@ template class DropoutGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE(context.Attr("is_training"), - "GradOp is only callable when is_training is true"); + PADDLE_ENFORCE(!context.Attr("is_test"), + "GradOp is only callable when is_test is false"); auto* grad_x = context.Output(framework::GradVarName("X")); auto* grad_y = context.Input(framework::GradVarName("Out")); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 3d8d3f1d2f..e697739cc6 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -293,6 +293,11 @@ All parameter, weight, gradient are variables in Paddle. Prune(*prog_with_targets.Proto(), &pruned_desc); return new ProgramDescBind(pruned_desc); }); + m.def("inference_optimize", [](ProgramDescBind &origin) { + ProgramDesc pruned_desc; + InferenceOptimize(*(origin.Proto()), &pruned_desc); + return new ProgramDescBind(pruned_desc); + }); m.def_submodule( "var_names", "The module will return special predefined variable name in Paddle") diff --git a/python/paddle/v2/fluid/evaluator.py b/python/paddle/v2/fluid/evaluator.py index 0057ed6216..f78d2f814c 100644 --- a/python/paddle/v2/fluid/evaluator.py +++ b/python/paddle/v2/fluid/evaluator.py @@ -33,6 +33,9 @@ class Evaluator(object): else: self._main_program = g_main_program + def states(self): + return self._states + def _update_ops(self, *args, **kwargs): """ append update ops to the global states diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index fb1c57d296..872c19c2f6 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -511,6 +511,13 @@ class Program(object): res.sync_with_cpp() return res + def inference_optimize(self): + res = Program() + res.desc = core.inference_optimize(self.desc) + res.blocks = [Block(res, i) for i in xrange(res.desc.num_blocks())] + res.sync_with_cpp() + return res + @staticmethod def parse_from_string(binary_str): p = Program() diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index 6f55fe9e74..e5b2aa3b91 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -6,7 +6,8 @@ from paddle.v2.fluid.framework import Program, Parameter, g_main_program, \ __all__ = [ 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', - 'load_persistables', "save_inference_model", "load_inference_model" + 'load_persistables', "save_inference_model", "load_inference_model", + "get_inference_program" ] @@ -151,6 +152,17 @@ def load_persistables(executor, dirname, main_program=None): predicate=is_persistable) +def get_inference_program(target_vars, main_program=None): + if main_program is None: + main_program = g_main_program + if not isinstance(target_vars, list): + target_vars = [target_vars] + + pruned_program = main_program.prune(targets=target_vars) + inference_program = pruned_program.inference_optimize() + return inference_program + + def save_inference_model(dirname, feeded_var_names, target_vars, @@ -177,13 +189,14 @@ def save_inference_model(dirname, if not os.path.isdir(dirname): os.makedirs(dirname) - pruned_program = main_program.prune(target_vars) + pruned_program = main_program.prune(targets=target_vars) + inference_program = pruned_program.inference_optimize() fetch_var_names = [v.name for v in target_vars] model_file_name = dirname + "/__model__" with open(model_file_name, "w") as f: pickle.dump({ - "program_desc_str": pruned_program.desc.serialize_to_string(), + "program_desc_str": inference_program.desc.serialize_to_string(), "feed_var_names": feeded_var_names, "fetch_var_names": fetch_var_names }, f, -1) diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py index a3acab67ce..76cbd410f9 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -5,6 +5,7 @@ import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets import paddle.v2.fluid.evaluator as evaluator +from paddle.v2.fluid.io import get_inference_program from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.initializer import XavierInitializer from paddle.v2.fluid.optimizer import AdamOptimizer @@ -116,9 +117,11 @@ PASS_NUM = 1 train_reader = paddle.batch( paddle.reader.shuffle( - paddle.dataset.cifar.train10(), buf_size=128 * 10), + paddle.dataset.cifar.train10(), buf_size=BATCH_SIZE * 10), batch_size=BATCH_SIZE) +test_reader = paddle.batch(paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE) + place = core.CPUPlace() exe = Executor(place) @@ -149,10 +152,41 @@ for pass_id in range(PASS_NUM): loss = np.array(outs[0]) acc = np.array(outs[1]) pass_acc = accuracy.eval(exe) + + batch_id = batch_id + 1 + + test_accuracy, test_acc_out = evaluator.accuracy( + input=predict, label=label) + + test_target = [avg_cost, test_acc_out] + test_accuracy.states().values() + inference_program = get_inference_program(test_target) + + test_accuracy.reset(exe) + + for data in test_reader(): + x_data = np.array(map(lambda x: x[0].reshape(data_shape), + data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = exe.run(inference_program, + feed={'pixel': tensor_x, + 'label': tensor_y}, + fetch_list=[avg_cost, test_acc_out]) + out = np.array(outs[0]) + acc = np.array(outs[1]) + + test_pass_acc = test_accuracy.eval(exe) + print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + " loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( - pass_acc)) - batch_id = batch_id + 1 + pass_acc) + " test_pass_acc:" + str(test_pass_acc)) if batch_id > 1: # this model is slow, so if we can train two mini batch, we think it works properly. diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index 03d3881549..f57a5c8d98 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -4,6 +4,7 @@ import paddle.v2.fluid.core as core import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.evaluator as evaluator +from paddle.v2.fluid.io import get_inference_program from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.initializer import UniformInitializer from paddle.v2.fluid.optimizer import MomentumOptimizer @@ -42,6 +43,8 @@ train_reader = paddle.batch( paddle.dataset.mnist.train(), buf_size=8192), batch_size=BATCH_SIZE) +test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) + place = core.CPUPlace() exe = Executor(place) @@ -69,8 +72,36 @@ for pass_id in range(PASS_NUM): acc = np.array(outs[1]) pass_acc = accuracy.eval(exe) - if pass_acc > 0.7: + test_accuracy, test_acc_out = evaluator.accuracy( + input=predict, label=label) + + test_target = [avg_cost, test_acc_out] + test_accuracy.states().values() + inference_program = get_inference_program(test_target) + + test_accuracy.reset(exe) + for data in test_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = exe.run(inference_program, + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_cost, test_acc_out]) + out = np.array(outs[0]) + acc = np.array(outs[1]) + + test_pass_acc = test_accuracy.eval(exe) + print("pass_id=" + str(pass_id) + " train_cost=" + str( + out) + " train_acc=" + str(acc) + " train_pass_acc=" + str(pass_acc) + + " test_acc=" + str(test_pass_acc)) + + if test_pass_acc > 0.7: exit(0) - # print("pass_id=" + str(pass_id) + " auc=" + - # str(acc) + " pass_acc=" + str(pass_acc)) exit(1) diff --git a/python/paddle/v2/fluid/tests/test_dropout_op.py b/python/paddle/v2/fluid/tests/test_dropout_op.py index b14a366fca..4f5ea836b4 100644 --- a/python/paddle/v2/fluid/tests/test_dropout_op.py +++ b/python/paddle/v2/fluid/tests/test_dropout_op.py @@ -7,7 +7,7 @@ class TestDropoutOp(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_training': True} + self.attrs = {'dropout_prob': 0.0, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64)).astype('float32') @@ -24,7 +24,7 @@ class TestDropoutOp2(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 1.0, 'is_training': True} + self.attrs = {'dropout_prob': 1.0, 'is_test': False} self.outputs = { 'Out': np.zeros((32, 64)).astype('float32'), 'Mask': np.zeros((32, 64)).astype('float32') @@ -35,7 +35,7 @@ class TestDropoutOp3(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 2)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_training': True} + self.attrs = {'dropout_prob': 0.0, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64, 2)).astype('float32') @@ -46,7 +46,7 @@ class TestDropoutOp4(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.35, 'is_training': False} + self.attrs = {'dropout_prob': 0.35, 'is_test': True} self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']} def test_check_output(self): @@ -57,7 +57,7 @@ class TestDropoutOp5(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 3)).astype("float32")} - self.attrs = {'dropout_prob': 0.75, 'is_training': False} + self.attrs = {'dropout_prob': 0.75, 'is_test': True} self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']} def test_check_output(self): From 65c859db7aadfdaccb1a04afe788d66d0e4a8694 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Fri, 24 Nov 2017 13:32:47 +0800 Subject: [PATCH 7/7] beam_search_decode support multi data type (#5847) * beam_search_decode support multi data type * add VisitDataType for beam search decode * use Specialization to handle bool * move Specialization of BeamSearchDecodeFunctor out of class --- paddle/operators/beam_search_decode_op.cc | 36 +++++++++++++++++-- .../fluid/tests/test_beam_search_decode_op.py | 6 ++-- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/paddle/operators/beam_search_decode_op.cc b/paddle/operators/beam_search_decode_op.cc index 3904a97d58..c796a0c5d0 100644 --- a/paddle/operators/beam_search_decode_op.cc +++ b/paddle/operators/beam_search_decode_op.cc @@ -17,6 +17,36 @@ limitations under the License. */ namespace paddle { namespace operators { +struct BeamSearchDecodeFunctor { + BeamSearchDecodeFunctor(const LoDTensorArray& step_ids, + const LoDTensorArray& step_scores, + LoDTensor* id_tensor, LoDTensor* score_tensor) + : step_ids_(step_ids), + step_scores_(step_scores), + id_tensor_(id_tensor), + score_tensor_(score_tensor) {} + + template + void operator()() const; + + const LoDTensorArray& step_ids_; + const LoDTensorArray& step_scores_; + LoDTensor* id_tensor_; + LoDTensor* score_tensor_; +}; + +template +void BeamSearchDecodeFunctor::operator()() const { + BeamSearchDecoder beam_search_decoder; + beam_search_decoder.PackAllSteps(step_ids_, step_scores_, id_tensor_, + score_tensor_); +} + +template <> +void BeamSearchDecodeFunctor::operator()() const { + PADDLE_THROW("beam search decode op does not support bool!"); +} + class BeamSearchDecodeOp : public framework::OperatorBase { public: BeamSearchDecodeOp(const std::string& type, @@ -45,9 +75,9 @@ class BeamSearchDecodeOp : public framework::OperatorBase { LoDTensor* sentenceIds = ctx.Output("SentenceIds"); LoDTensor* sentenceScores = ctx.Output("SentenceScores"); - BeamSearchDecoder beam_search_decoder; - beam_search_decoder.PackAllSteps(*ids, *scores, sentenceIds, - sentenceScores); + framework::VisitDataType( + framework::ToDataType(scores->at(0).type()), + BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores)); } }; diff --git a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py index 8a11820d2a..5fad7d8cce 100644 --- a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py +++ b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py @@ -35,15 +35,15 @@ class TestBeamSearchDecodeOp(unittest.TestCase): self.append_lod_tensor( scores, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]], np.array( - [1, 2, 3, 4, 5, 6], dtype="float32")) + [1, 2, 3, 4, 5, 6], dtype="float64")) self.append_lod_tensor( scores, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]], np.array( - [0, 1, 2, 3, 4, 5], dtype="float32")) + [0, 1, 2, 3, 4, 5], dtype="float64")) self.append_lod_tensor( scores, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]], np.array( - [0, 1, 2, 3, 4], dtype="float32")) + [0, 1, 2, 3, 4], dtype="float64")) sentence_ids = self.scope.var("sentence_ids").get_tensor() sentence_scores = self.scope.var("sentence_scores").get_tensor()