From 0fad7647d7eb883c849ce6468ba5e781276e9358 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 12 Jan 2018 12:06:32 +0800 Subject: [PATCH 1/6] add how to update whl in build_from_source.md --- doc/getstarted/build_and_install/build_from_source_cn.rst | 5 +++++ doc/getstarted/build_and_install/build_from_source_en.rst | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/doc/getstarted/build_and_install/build_from_source_cn.rst b/doc/getstarted/build_and_install/build_from_source_cn.rst index 41ac07ca56..1e75087462 100644 --- a/doc/getstarted/build_and_install/build_from_source_cn.rst +++ b/doc/getstarted/build_and_install/build_from_source_cn.rst @@ -32,6 +32,11 @@ PaddlePaddle主要使用 `CMake `_ 以及GCC, G++作为编译 pip install build/python/dist/*.whl +如果机器中已经安装过PaddlePaddle,请卸载再安装: + +.. code-block:: bash + + pip install build/python/dist/*.whl -U .. _run_test: diff --git a/doc/getstarted/build_and_install/build_from_source_en.rst b/doc/getstarted/build_and_install/build_from_source_en.rst index 92211aee8c..fde8a18daa 100644 --- a/doc/getstarted/build_and_install/build_from_source_en.rst +++ b/doc/getstarted/build_and_install/build_from_source_en.rst @@ -36,6 +36,11 @@ machine or copy it to the target machine. pip install build/python/dist/*.whl +If the machine has installed PaddlePaddle before, please uninstall it first and reinstall it again. + +.. code-block:: bash + + pip install build/python/dist/*.whl -U .. _run_test: From 5528ec13212a1171c8274769b3b6e1842cc6861f Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 12 Jan 2018 17:10:52 +0800 Subject: [PATCH 2/6] add another method of update whl --- doc/getstarted/build_and_install/build_from_source_cn.rst | 7 ++++++- doc/getstarted/build_and_install/build_from_source_en.rst | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/getstarted/build_and_install/build_from_source_cn.rst b/doc/getstarted/build_and_install/build_from_source_cn.rst index 1e75087462..71904dc41e 100644 --- a/doc/getstarted/build_and_install/build_from_source_cn.rst +++ b/doc/getstarted/build_and_install/build_from_source_cn.rst @@ -32,10 +32,15 @@ PaddlePaddle主要使用 `CMake `_ 以及GCC, G++作为编译 pip install build/python/dist/*.whl -如果机器中已经安装过PaddlePaddle,请卸载再安装: +如果机器中已经安装过PaddlePaddle,有两种方法: .. code-block:: bash + 1. 先卸载之前的版本,再重新安装 + pip uninstall paddlepaddle + pip install build/python/dist/*.whl + + 2. 直接升级到更新的版本 pip install build/python/dist/*.whl -U .. _run_test: diff --git a/doc/getstarted/build_and_install/build_from_source_en.rst b/doc/getstarted/build_and_install/build_from_source_en.rst index fde8a18daa..27f73b2e2c 100644 --- a/doc/getstarted/build_and_install/build_from_source_en.rst +++ b/doc/getstarted/build_and_install/build_from_source_en.rst @@ -36,10 +36,15 @@ machine or copy it to the target machine. pip install build/python/dist/*.whl -If the machine has installed PaddlePaddle before, please uninstall it first and reinstall it again. +If the machine has installed PaddlePaddle before, there are two methods: .. code-block:: bash + 1. uninstall and reinstall + pip uninstall paddlepaddle + pip install build/python/dist/*.whl + + 2. upgrade directly pip install build/python/dist/*.whl -U .. _run_test: From 8ac744f372ed8980007d53ee3cbc1acd7db51a56 Mon Sep 17 00:00:00 2001 From: ying Date: Fri, 12 Jan 2018 14:57:23 +0800 Subject: [PATCH 3/6] add wrapper for elementwise math operator. --- doc/api/v2/fluid/layers.rst | 129 ++++++++++++++++++ python/paddle/v2/fluid/__init__.py | 19 ++- python/paddle/v2/fluid/backward.py | 5 +- python/paddle/v2/fluid/clip.py | 4 +- python/paddle/v2/fluid/default_scope_funcs.py | 26 ++-- python/paddle/v2/fluid/evaluator.py | 35 ++--- python/paddle/v2/fluid/framework.py | 12 +- python/paddle/v2/fluid/initializer.py | 7 +- python/paddle/v2/fluid/io.py | 12 +- python/paddle/v2/fluid/layers/nn.py | 45 ++++-- python/paddle/v2/fluid/layers/ops.py | 29 +++- python/paddle/v2/fluid/layers/tensor.py | 12 +- .../fluid/memory_optimization_transpiler.py | 6 +- python/paddle/v2/fluid/nets.py | 5 +- python/paddle/v2/fluid/registry.py | 17 ++- python/paddle/v2/fluid/regularizer.py | 6 +- 16 files changed, 304 insertions(+), 65 deletions(-) diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst index 696a8012aa..24bdf08fff 100644 --- a/doc/api/v2/fluid/layers.rst +++ b/doc/api/v2/fluid/layers.rst @@ -358,3 +358,132 @@ reduce_min .. autofunction:: paddle.v2.fluid.layers.reduce_min :noindex: +logsigmoid +---------- +.. autofunction:: paddle.v2.fluid.layers.logsigmoid + :noindex: + +exp +--- +.. autofunction:: paddle.v2.fluid.layers.exp + :noindex: + +relu +---- +.. autofunction:: paddle.v2.fluid.layers.relu + :noindex: + +tanh +---- +.. autofunction:: paddle.v2.fluid.layers.tanh + :noindex: + +tanh_shrink +----------- +.. autofunction:: paddle.v2.fluid.layers.tanh_shrink + :noindex: + +softshrink +---------- +.. autofunction:: paddle.v2.fluid.layers.softshrink + :noindex: + +sqrt +---- +.. autofunction:: paddle.v2.fluid.layers.sqrt + :noindex: + +abs +---- +.. autofunction:: paddle.v2.fluid.layers.abs + :noindex: + +ceil +---- +.. autofunction:: paddle.v2.fluid.layers.ceil + :noindex: + +floor +----- +.. autofunction:: paddle.v2.fluid.layers.floor + :noindex: + +round +----- +.. autofunction:: paddle.v2.fluid.layers.round + :noindex: + +reciprocal +---------- +.. autofunction:: paddle.v2.fluid.layers.reciprocal + :noindex: + +log +--- +.. autofunction:: paddle.v2.fluid.layers.log + :noindex: + +square +------ +.. autofunction:: paddle.v2.fluid.layers.square + :noindex: + +softplus +-------- +.. autofunction:: paddle.v2.fluid.layers.softplus + :noindex: + +softsign +--------- +.. autofunction:: paddle.v2.fluid.layers.softsign + :noindex: + +brelu +----- +.. autofunction:: paddle.v2.fluid.layers.brelu + :noindex: + +leaky_relu +---------- +.. autofunction:: paddle.v2.fluid.layers.leaky_relu + :noindex: + +soft_relu +--------- +.. autofunction:: paddle.v2.fluid.layers.soft_relu + :noindex: + +elu +---- +.. autofunction:: paddle.v2.fluid.layers.elu + :noindex: + +relu6 +----- +.. autofunction:: paddle.v2.fluid.layers.relu6 + :noindex: + +pow +---- +.. autofunction:: paddle.v2.fluid.layers.pow + :noindex: + +hard_shrink +----------- +.. autofunction:: paddle.v2.fluid.layers.hard_shrink + :noindex: + +thresholded_relu +---------------- +.. autofunction:: paddle.v2.fluid.layers.thresholded_relu + :noindex: + +hard_sigmoid +------------- +.. autofunction:: paddle.v2.fluid.layers.hard_sigmoid + :noindex: + +swish +------ +.. autofunction:: paddle.v2.fluid.layers.swish + :noindex: diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index 422aa0a5ba..ec5159fca1 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -23,9 +23,22 @@ from memory_optimization_transpiler import memory_optimize Tensor = LoDTensor __all__ = framework.__all__ + executor.__all__ + [ - 'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward', - 'regularizer', 'LoDTensor', 'CPUPlace', 'CUDAPlace', 'Tensor', 'ParamAttr' - 'DataFeeder', 'clip', 'DistributeTranspiler', 'memory_optimize' + 'io', + 'initializer', + 'layers', + 'nets', + 'optimizer', + 'backward', + 'regularizer', + 'LoDTensor', + 'CPUPlace', + 'CUDAPlace', + 'Tensor', + 'ParamAttr' + 'DataFeeder', + 'clip', + 'DistributeTranspiler', + 'memory_optimize', ] diff --git a/python/paddle/v2/fluid/backward.py b/python/paddle/v2/fluid/backward.py index cea2d1e090..43f6133a65 100644 --- a/python/paddle/v2/fluid/backward.py +++ b/python/paddle/v2/fluid/backward.py @@ -3,7 +3,10 @@ from . import core import collections import copy -__all__ = ['append_backward', 'calc_gradient'] +__all__ = [ + 'append_backward', + 'calc_gradient', +] def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None): diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index b1fd1c2b65..776c0f3f02 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -3,7 +3,9 @@ import layers from . import core __all__ = [ - 'GradientClipByValue', 'append_gradient_clip_ops', 'error_clip_callback' + 'GradientClipByValue', + 'append_gradient_clip_ops', + 'error_clip_callback', ] diff --git a/python/paddle/v2/fluid/default_scope_funcs.py b/python/paddle/v2/fluid/default_scope_funcs.py index 60c6165b6b..9aebc07f8e 100644 --- a/python/paddle/v2/fluid/default_scope_funcs.py +++ b/python/paddle/v2/fluid/default_scope_funcs.py @@ -1,16 +1,16 @@ """ Default scope function. -`Paddle` manages Scope as programming language's scope. It just a -thread-local stack of Scope. Top of that stack is current scope, the bottom -of that stack is all scopes' parent. +`Paddle` manages Scope as programming language's scope. It just a +thread-local stack of Scope. Top of that stack is current scope, the bottom +of that stack is all scopes' parent. -Invoking `var/find_var` can `new/find` variable in current scope. -Invoking `enter_local_scope/leave_local_scope` can create or destroy local -scope. +Invoking `var/find_var` can `new/find` variable in current scope. +Invoking `enter_local_scope/leave_local_scope` can create or destroy local +scope. -A `scoped_function` will take a `function` as input. That function will be -invoked in a new local scope. +A `scoped_function` will take a `function` as input. That function will be +invoked in a new local scope. """ import paddle.v2.fluid.core @@ -19,8 +19,12 @@ import threading __tl_scope__ = threading.local() __all__ = [ - 'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'var', - 'find_var', 'scoped_function' + 'get_cur_scope', + 'enter_local_scope', + 'leave_local_scope', + 'var', + 'find_var', + 'scoped_function', ] @@ -71,7 +75,7 @@ def find_var(name): def scoped_function(func): """ invoke `func` in new scope. - + :param func: a callable function that will be run in new scope. :type func: callable """ diff --git a/python/paddle/v2/fluid/evaluator.py b/python/paddle/v2/fluid/evaluator.py index e186ee96c3..dc083f37b5 100644 --- a/python/paddle/v2/fluid/evaluator.py +++ b/python/paddle/v2/fluid/evaluator.py @@ -4,7 +4,10 @@ import layers from framework import Program, unique_name, Variable, program_guard from layer_helper import LayerHelper -__all__ = ['Accuracy', 'ChunkEvaluator'] +__all__ = [ + 'Accuracy', + 'ChunkEvaluator', +] def _clone_var_(block, var): @@ -21,19 +24,19 @@ def _clone_var_(block, var): class Evaluator(object): """ Base Class for all evaluators - + Args: - name(str): The name of evaluator. such as, "accuracy". Used for generate + name(str): The name of evaluator. such as, "accuracy". Used for generate temporary variable name. - main_program(Program, optional): The evaluator should be added to this + main_program(Program, optional): The evaluator should be added to this main_program. Default default_main_program() - startup_program(Program, optional):The parameter should be added to this + startup_program(Program, optional):The parameter should be added to this startup_program. Default default_startup_program() - + Attributes: - states(list): The list of state variables. states will be reset to zero + states(list): The list of state variables. states will be reset to zero when `reset` is invoked. - metrics(list): The list of metrics variables. They will be calculate + metrics(list): The list of metrics variables. They will be calculate every mini-batch """ @@ -66,14 +69,14 @@ class Evaluator(object): def create_state(self, suffix, dtype, shape): """ - Create state variable. - + Create state variable. + NOTE: It is not a public API. - + Args: - suffix(str): the state suffix. - dtype(str|core.DataType): the state data type - shape(tuple|list): the shape of state + suffix(str): the state suffix. + dtype(str|core.DataType): the state data type + shape(tuple|list): the shape of state Returns: State variable @@ -127,8 +130,8 @@ class Accuracy(Evaluator): class ChunkEvaluator(Evaluator): """ - Accumulate counter numbers output by chunk_eval from mini-batches and - compute the precision recall and F1-score using the accumulated counter + Accumulate counter numbers output by chunk_eval from mini-batches and + compute the precision recall and F1-score using the accumulated counter numbers. """ diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 3ef6b33192..bdbfe9da07 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -7,9 +7,15 @@ import proto.framework_pb2 as framework_pb2 from . import core __all__ = [ - 'Block', 'Variable', 'Program', 'Operator', 'default_startup_program', - 'default_main_program', 'program_guard', 'switch_startup_program', - 'switch_main_program' + 'Block', + 'Variable', + 'Program', + 'Operator', + 'default_startup_program', + 'default_main_program', + 'program_guard', + 'switch_startup_program', + 'switch_main_program', ] EMPTY_VAR_NAME = core.kEmptyVarName() diff --git a/python/paddle/v2/fluid/initializer.py b/python/paddle/v2/fluid/initializer.py index c0839caaf2..c3ed1a9089 100644 --- a/python/paddle/v2/fluid/initializer.py +++ b/python/paddle/v2/fluid/initializer.py @@ -1,7 +1,12 @@ import framework import numpy as np -__all__ = ['Constant', 'Uniform', 'Normal', 'Xavier'] +__all__ = [ + 'Constant', + 'Uniform', + 'Normal', + 'Xavier', +] class Initializer(object): diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index c63567601a..1d28e9c5a6 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -4,9 +4,15 @@ import cPickle as pickle from paddle.v2.fluid.framework import Program, Parameter, default_main_program, Variable __all__ = [ - 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', - 'load_persistables', "save_inference_model", "load_inference_model", - "get_inference_program" + 'save_vars', + 'save_params', + 'save_persistables', + 'load_vars', + 'load_params', + 'load_persistables', + 'save_inference_model', + 'load_inference_model', + 'get_inference_program', ] diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 1fb6523f55..94184d59f6 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -9,12 +9,33 @@ from ..param_attr import ParamAttr from tensor import concat __all__ = [ - 'fc', 'embedding', 'dynamic_lstm', 'gru_unit', 'linear_chain_crf', - 'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy', - 'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d', - 'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand', - 'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min', - 'sequence_first_step', 'sequence_last_step', 'dropout' + 'fc', + 'embedding', + 'dynamic_lstm', + 'gru_unit', + 'linear_chain_crf', + 'crf_decoding', + 'cos_sim', + 'cross_entropy', + 'square_error_cost', + 'accuracy', + 'chunk_eval', + 'sequence_conv', + 'conv2d', + 'sequence_pool', + 'pool2d', + 'batch_norm', + 'beam_search_decode', + 'conv2d_transpose', + 'sequence_expand', + 'lstm_unit', + 'reduce_sum', + 'reduce_mean', + 'reduce_max', + 'reduce_min', + 'sequence_first_step', + 'sequence_last_step', + 'dropout', ] @@ -248,13 +269,13 @@ def gru_unit(input, h_t & = dot((1-u_t), m_t) + dot(u_t, h_{t-1}) The inputs of gru unit includes :math:`z_t`, :math:`h_{t-1}`. In terms - of the equation above, the :math:`z_t` is split into 3 parts - - :math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to - implement a full GRU unit operator for an input, a fully + of the equation above, the :math:`z_t` is split into 3 parts - + :math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to + implement a full GRU unit operator for an input, a fully connected layer has to be applied, such that :math:`z_t = W_{fc}x_t`. - The terms :math:`u_t` and :math:`r_t` represent the update and reset gates - of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is + The terms :math:`u_t` and :math:`r_t` represent the update and reset gates + of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is an intermediate candidate hidden output, which is denoted by :math:`m_t`. This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t-1})` and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`. @@ -276,7 +297,7 @@ def gru_unit(input, .. code-block:: python # assuming we have x_t_data and prev_hidden of size=10 - x_t = fluid.layers.fc(input=x_t_data, size=30) + x_t = fluid.layers.fc(input=x_t_data, size=30) hidden_val, r_h_val, gate_val = fluid.layers.gru_unit(input=x_t, hidden = prev_hidden) diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py index d3a5b70785..51a85dbbd3 100644 --- a/python/paddle/v2/fluid/layers/ops.py +++ b/python/paddle/v2/fluid/layers/ops.py @@ -1,7 +1,34 @@ from ..registry import register_layer __activations__ = [ - 'abs', 'tanh', 'sigmoid', 'relu', 'sqrt', 'ceil', 'floor', 'log', 'round' + 'sigmoid', + 'logsigmoid', + 'exp', + 'relu', + 'tanh', + 'tanh_shrink', + 'softshrink', + 'sqrt', + 'abs', + 'ceil', + 'floor', + 'round', + 'reciprocal', + 'log', + 'square', + 'softplus', + 'softsign', + 'brelu', + 'leaky_relu', + 'soft_relu', + 'elu', + 'relu6', + 'pow', + 'stanh', + 'hard_shrink', + 'thresholded_relu', + 'hard_sigmoid', + 'swish', ] __all__ = [ diff --git a/python/paddle/v2/fluid/layers/tensor.py b/python/paddle/v2/fluid/layers/tensor.py index 5f12ecfc14..3f8ebeeb48 100644 --- a/python/paddle/v2/fluid/layers/tensor.py +++ b/python/paddle/v2/fluid/layers/tensor.py @@ -2,8 +2,16 @@ from ..layer_helper import LayerHelper from ..param_attr import ParamAttr __all__ = [ - 'create_tensor', 'create_parameter', 'cast', 'concat', 'sums', 'assign', - 'fill_constant_batch_size_like', 'fill_constant', 'ones', 'zeros' + 'create_tensor', + 'create_parameter', + 'cast', + 'concat', + 'sums', + 'assign', + 'fill_constant_batch_size_like', + 'fill_constant', + 'ones', + 'zeros', ] diff --git a/python/paddle/v2/fluid/memory_optimization_transpiler.py b/python/paddle/v2/fluid/memory_optimization_transpiler.py index 6800d7ddbb..293b116957 100644 --- a/python/paddle/v2/fluid/memory_optimization_transpiler.py +++ b/python/paddle/v2/fluid/memory_optimization_transpiler.py @@ -121,8 +121,10 @@ class ControlFlowGraph(object): # and dtype_to_size[cache_dtype] if x_dtype == cache_dtype: print( - "Hit Cache !!!! cache pool index is %d, var name is %s, cached var name is %s, var shape is %s " - % + ("Hit Cache !!!! cache pool index " + "is %d, var name is %s, " + "cached var name is %s, " + "var shape is %s ") % (index, x, cache_var, str(cache_shape))) self.pool.pop(index) _rename_arg_( diff --git a/python/paddle/v2/fluid/nets.py b/python/paddle/v2/fluid/nets.py index 54886a8f2c..47b550bf4d 100644 --- a/python/paddle/v2/fluid/nets.py +++ b/python/paddle/v2/fluid/nets.py @@ -1,6 +1,9 @@ import layers -__all__ = ["simple_img_conv_pool", "sequence_conv_pool"] +__all__ = [ + "simple_img_conv_pool", + "sequence_conv_pool", +] def simple_img_conv_pool(input, diff --git a/python/paddle/v2/fluid/registry.py b/python/paddle/v2/fluid/registry.py index 7aa8290611..94b16bca8c 100644 --- a/python/paddle/v2/fluid/registry.py +++ b/python/paddle/v2/fluid/registry.py @@ -8,7 +8,11 @@ import proto.framework_pb2 as framework_pb2 from framework import OpProtoHolder, Variable, Program, Operator from paddle.v2.fluid.layer_helper import LayerHelper, unique_name -__all__ = ['deprecated', 'register_layer', 'autodoc'] +__all__ = [ + 'deprecated', + 'register_layer', + 'autodoc', +] def _convert_(name): @@ -80,11 +84,10 @@ def _generate_doc_string_(op_proto): def register_layer(op_type): - """ - Register an Python layer for an Operator + """Register the Python layer for an Operator. Args: - op_type: The name of the operator to be created + op_type: The name of the operator to be created. This function takes in the operator type (sigmoid, mean , average etc) and creates the operator functionality. @@ -98,16 +101,16 @@ def register_layer(op_type): if len(not_intermediate_outputs) != 1: raise ValueError("Only one non intermediate output operator can be", - "automatically generated") + "automatically generated.") if not_intermediate_outputs[0].duplicable: raise ValueError( - "Only non duplicable op can be automatically generated") + "Only non duplicable op can be automatically generated.") for output in intermediate_outputs: if output.duplicable: raise ValueError("The op can be automatically generated only when ", - "all intermediate ops are not duplicable") + "all intermediate ops are not duplicable.") o_name = not_intermediate_outputs[0].name intermediate_output_names = [output.name for output in intermediate_outputs] diff --git a/python/paddle/v2/fluid/regularizer.py b/python/paddle/v2/fluid/regularizer.py index d1955b0047..117c45c49f 100644 --- a/python/paddle/v2/fluid/regularizer.py +++ b/python/paddle/v2/fluid/regularizer.py @@ -1,6 +1,10 @@ import framework -__all__ = ['append_regularization_ops', 'L1Decay', 'L2Decay'] +__all__ = [ + 'append_regularization_ops', + 'L1Decay', + 'L2Decay', +] def append_regularization_ops(parameters_and_grads, regularization=None): From db65f497ffce0dcc8b71a06d8a303e1d75b864ca Mon Sep 17 00:00:00 2001 From: Cao Ying Date: Sat, 13 Jan 2018 02:35:13 +0800 Subject: [PATCH 4/6] Update comments for two operators. (#7457) * update code comments. * update the comments. * follow comments. --- .../reorder_lod_tensor_by_rank_op.cc | 44 ++++++++++++++----- paddle/operators/shrink_rnn_memory_op.cc | 22 +++++----- python/paddle/v2/fluid/layers/control_flow.py | 40 ++++++++++------- python/paddle/v2/fluid/layers/tensor.py | 17 +++---- 4 files changed, 77 insertions(+), 46 deletions(-) diff --git a/paddle/operators/reorder_lod_tensor_by_rank_op.cc b/paddle/operators/reorder_lod_tensor_by_rank_op.cc index a055cdf7e8..3c30447949 100644 --- a/paddle/operators/reorder_lod_tensor_by_rank_op.cc +++ b/paddle/operators/reorder_lod_tensor_by_rank_op.cc @@ -26,22 +26,44 @@ class ReorderLoDTensorByRankTableOpProtoMaker ReorderLoDTensorByRankTableOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "(LoDTensor) the input lod tensor need to be reordered."); + AddInput("X", + "(LoDTensor), the input lod tensor to be reordered according to " + "Input(RankTable)."); AddInput("RankTable", - "(LoDRankTable) the rank table that input need follow"); - AddOutput("Out", "(LoDTensor) reordered lod tensor"); - AddComment(R"DOC(ReorderLoDTensorByRankTable + "(LoDRankTable), the rank table according to which Input(X) is " + "reordered."); + AddOutput("Out", "(LoDTensor), the reordered lod tensor."); + AddComment(R"DOC(ReorderLoDTensorByRankTable operator. -Reorder the input X by the rank of `RankTable`. If `RankTable` is ordered by -index [3, 0, 2, 1]. Input X will reorder its sequence, the third sequence of -X will be the first sequence of Output. - -NOTE: The RankTable does not need to be calculated by X. +Input(X) is a batch of sequences. Input(RankTable) stores new orders of the +input sequence batch. The reorder_lod_tensor_by_rank operator reorders the +Input(X) according to the information provided by Input(RankTable). For example: -The X = [Seq0, Seq1, Seq2, Seq3]. The indices of RankTable are [3, 0, 2, 1]. -The Out = [Seq3, Seq0, Seq2, Seq1] with correct LoD information. +If the indices stored in the Input(RankTable) are [3, 0, 2, 1], the +Input(X) will be reordered that the fourth sequence in Input(X) will become the +first one, and then followed by the original first, third, and the second one. + +This is: +X = [Seq0, Seq1, Seq2, Seq3]. The indices in RankTable are [3, 0, 2, 1]. +Out = [Seq3, Seq0, Seq2, Seq1] with a new LoD information. + +If the LoD information of Input(X) is empty, this means Input(X) is not sequence +data. This is also identical to a batch of sequences where each sequence has a +fixed length 1. In this case, the reorder_lod_tensor_by_rank operator reorders +each slice of Input(X) along the first axis according to Input(RankTable). + +This is: +X = [Slice0, Slice1, Slice2, Slice3] and its LoD information is empty. The +indices in RankTable are [3, 0, 2, 1]. +Out = [Slice3, Slice0, Slice2, Slice1] with no LoD information is appended. + +NOTE: This operator sorts Input(X) according to a given LoDRankTable which does +not need to be calculated according to Input(X). It can be calculated according +to another different sequence, and then this operator sorts Input(X) according +to the given LoDRankTable. + )DOC"); } }; diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc index 3f5b2a9b84..ade94b40be 100644 --- a/paddle/operators/shrink_rnn_memory_op.cc +++ b/paddle/operators/shrink_rnn_memory_op.cc @@ -45,7 +45,7 @@ class ShrinkRNNMemoryOp : public ArrayOp { rank_items.begin(); auto *out_var = scope.FindVar(Output("Out")); - PADDLE_ENFORCE(out_var != nullptr, "Output Out must be set"); + PADDLE_ENFORCE(out_var != nullptr, "Output(Out) must be set."); auto &out_tensor = *out_var->GetMutable(); size_t height = dst_num_rows; @@ -76,15 +76,17 @@ class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker { "(LoDTensor) The step index. The RNN step memory 'X' will be " "shrinked to match the size of the input of the index'th step."); AddOutput("Out", "(LoDTensor) The shrinked RNN step memory."); - AddComment( - R"DOC( - In dynamic RNN, we are able to handle sequences of different lengths. - Because of the multiple lengths, the size of each step input can be - different, which may lead to a mismatching between the input of - the current step and the memory generated by the previous one. This - operator shrinks memory according to the size of the next step input, - to make sure that they can match each other. - )DOC"); + AddComment(R"DOC( +This operator is used to shrink output batch of memory defined in dynamic RNN. + +Dynamic RNN is able to handle variable-length sequences, in which, sequences in +a mini-batch are sorted by their lengths first. After that, the longest sequence +becomes the first one in the sorted batch, followed by the second longest, the +third longest, and so on. Dynamic RNN then slices a batch input timestep by +timestep from the sorted input. Once any sequence in the input batch reaches its +end, memory defined in dynamicRNN has to shrink its outputs to adapt to the input +batch size for the next time step. +)DOC"); } }; diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py index 0cf17f3083..4b363ecbe7 100644 --- a/python/paddle/v2/fluid/layers/control_flow.py +++ b/python/paddle/v2/fluid/layers/control_flow.py @@ -742,11 +742,10 @@ def topk(input, k): def lod_tensor_to_array(x, table): - """This function performs the operation that converts an LOD_Tensor to - an array. + """ Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY. Args: - x (Variable|list): The tensor that needs to be converted to an array. + x (Variable|list): The LOD tensor to be converted to a LOD tensor array. table (ParamAttr|list): The variable that stores the level of lod which is ordered by sequence length in descending order. @@ -776,11 +775,10 @@ def lod_tensor_to_array(x, table): def array_to_lod_tensor(x, table): - """This function performs the operations that converts an array to - an LOD_Tensor. + """Convert a LoD_Tensor_Aarry to an LoDTensor. Args: - x (Variable|list): The array that needs to be converted to a tensor. + x (Variable|list): The lod tensor array to be converted to a tensor. table (ParamAttr|list): The variable that stores the level of lod which is ordered by sequence length in descending order. @@ -808,7 +806,8 @@ def array_to_lod_tensor(x, table): def increment(x, value=1.0, in_place=True): - """This function performs an operation that increments each value in the + """ + This function performs an operation that increments each value in the input :math:`x` by an amount: :math:`value` as mentioned in the input parameter. This operation is performed in-place by default. @@ -841,17 +840,24 @@ def increment(x, value=1.0, in_place=True): def array_write(x, i, array=None): - """This function performs the operation to write the data out as an - LOD_TENSOR_ARRAY. + """ + This function writes the given input variable to the specified position + indicating by the arrary index to an output LOD_TENSOR_ARRAY. If the + output LOD_TENSOR_ARRAY is not given(None), a new one will be created and + returned. Args: x (Variable|list): The input tensor from which the data will be read. - i (Variable|list): The subscript index in tensor array, that points the - place from which data will be read. - array (Variable|list): The data can be read into this variable if - this is assigned. + i (Variable|list): The index of the output LOD_TENSOR_ARRAY, pointing to + the position to which the input tensor will be + written. + array (Variable|list): The output LOD_TENSOR_ARRAY to which the input + tensor will be written. If this parameter is + NONE, a new LOD_TENSOR_ARRAY will be created and + returned. + Returns: - Variable: The tensor type variable that has the data written to it. + Variable: The output LOD_TENSOR_ARRAY where the input tensor is written. Examples: .. code-block::python @@ -1228,7 +1234,7 @@ class DynamicRNN(object): self._assert_in_rnn_block_("step_input") if not isinstance(x, Variable): raise TypeError( - "step_input() can only take a Variable as its input") + "step_input() can only take a Variable as its input.") parent_block = self._parent_block_() if self.lod_rank_table is None: self.lod_rank_table = parent_block.create_var( @@ -1289,8 +1295,8 @@ class DynamicRNN(object): def __call__(self, *args, **kwargs): if self.status != DynamicRNN.AFTER_RNN: - raise ValueError( - "Dynamic RNN outputs can only be retrieved after rnn block") + raise ValueError(("Output of the dynamic RNN can only be visited " + "outside the rnn block.")) if len(self.outputs) == 1: return self.outputs[0] else: diff --git a/python/paddle/v2/fluid/layers/tensor.py b/python/paddle/v2/fluid/layers/tensor.py index 57668a7983..438df33afb 100644 --- a/python/paddle/v2/fluid/layers/tensor.py +++ b/python/paddle/v2/fluid/layers/tensor.py @@ -176,25 +176,26 @@ def fill_constant(shape, dtype, value, out=None): """ **fill_constant** - This function creates a tensor of specified *shape* and - *dtype*, and initializes this with a constant supplied in *value*. + This function creates a tensor with specified `shape` and `dtype`, and + initializes it with a constant specifed by `value`. - It also sets *stop_gradient* to True. + The attribute `stop_gradient` of the created tensor is set to True. Args: - shape(tuple|list|None): Shape of output tensor - dtype(np.dtype|core.DataType|str): Data type of output tensor - value(float): Constant value to initialize the output tensor - out(Variable): Output Variable to initialize + shape(tuple|list|None): Shape of the output tensor. + dtype(np.dtype|core.DataType|str): Data type of the output tensor. + value(float): The constant value used to initialize the output tensor. + out(Variable): The output tensor. Returns: - Variable: The tensor variable storing the output + Variable: The tensor variable storing the output. Examples: .. code-block:: python data = fluid.layers.fill_constant(shape=[1], value=0, dtype='int64') """ + helper = LayerHelper("fill_constant", **locals()) if out is None: out = helper.create_tmp_variable(dtype=dtype) From 9d52ad4c3a0ad17b7aaf7cc913f1b9d8eeaa0311 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Sun, 14 Jan 2018 14:09:04 +0800 Subject: [PATCH 5/6] fix op doc --- doc/howto/dev/new_op_cn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index 3109d72001..9299658567 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -24,7 +24,7 @@ - `framework::OperatorWithKernel`:继承自OperatorBase,Op有计算函数,称作有Kernel。 - `class OpProtoAndCheckerMaker`:描述该Op的输入、输出、属性、注释,主要用于Python API接口生成 -依据是否包含kernel,可以将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自`OperatorBase`,后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写,简单总结Op需要包含的内容如下: +依据是否包含kernel,可以将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自`OperatorWithKernel`,后者继承自`OperatorBase`。本教程主要介绍带Kernel的Op如何写,简单总结Op需要包含的内容如下: 内容 | 定义位置 From 5ad1aef051349a73b00b8d611f0ae2508f02490b Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Sun, 14 Jan 2018 16:54:04 +0800 Subject: [PATCH 6/6] "cudnn operators change to cudnn kernel" (#6660) * "unified operators" * "add CUDNN register" * "add use cudnn attribute" * "add attribute" * "test conv tranpose op" * "remove duplicated attr" * "fix op test" * "add attribute to set cudnn" * "add more log" * "need layout op register support" * "add more log" * "change GetExpectedKernelType " * "fix Get attr in conv_op" * "fix CI" * "fix tests" * "removed kernel priority fallback" * "fix CI" * "fix stack pointer bug" * "refine buggy interface" * "add const cast to save life" * "fix get_output_with_grad" * "fix op test with dataformat" * ""fix pooling * "fix pooling test" * "fix CI" * "fix with_gpu error" * "add transform needed functional check" * "fix unpack list error" * "comment out parallel.do temporary" * "fix CI" * "fix compile doc error" * "make threshold larger" --- paddle/framework/data_device_transform.cc | 5 +- paddle/framework/data_device_transform.h | 3 +- paddle/framework/data_layout.h | 19 +++- paddle/framework/data_transform.cc | 10 +- paddle/framework/data_transform.h | 6 +- paddle/framework/op_kernel_type.h | 5 + paddle/framework/op_registry_test.cc | 18 ---- paddle/framework/operator.cc | 100 +++++------------- paddle/framework/operator.h | 26 +---- paddle/operators/CMakeLists.txt | 19 +++- paddle/operators/conv_cudnn_op.cc | 74 ------------- paddle/operators/conv_cudnn_op.cu.cc | 31 +++--- paddle/operators/conv_op.cc | 73 +++++++++++++ paddle/operators/conv_op.h | 8 ++ paddle/operators/conv_transpose_cudnn_op.cc | 78 -------------- .../operators/conv_transpose_cudnn_op.cu.cc | 36 +++---- paddle/operators/conv_transpose_op.cc | 72 +++++++++++++ paddle/operators/conv_transpose_op.h | 8 ++ paddle/operators/math/sequence2batch.cc | 1 + paddle/operators/pool_cudnn_op.cc | 39 ------- paddle/operators/pool_cudnn_op.cu.cc | 29 ++--- paddle/operators/pool_cudnn_op.h | 19 ---- paddle/operators/pool_op.cc | 65 +++++++++++- paddle/operators/pool_op.h | 8 ++ paddle/platform/dynload/cudnn.cc | 2 +- paddle/platform/dynload/cudnn.h | 2 +- paddle/platform/dynload/dynamic_loader.cc | 2 +- paddle/platform/dynload/dynamic_loader.h | 2 +- paddle/pybind/pybind.cc | 7 +- paddle/pybind/tensor_py.h | 23 +++- python/paddle/v2/fluid/layers/nn.py | 2 +- python/paddle/v2/fluid/tests/op_test.py | 64 ++++++----- .../paddle/v2/fluid/tests/test_conv2d_op.py | 93 ++++++++++------ .../fluid/tests/test_conv2d_transpose_op.py | 80 ++++++++++---- .../paddle/v2/fluid/tests/test_conv3d_op.py | 85 ++++++++++----- .../fluid/tests/test_conv3d_transpose_op.py | 80 ++++++++++---- .../paddle/v2/fluid/tests/test_parallel_op.py | 8 +- .../paddle/v2/fluid/tests/test_pool2d_op.py | 49 ++++++--- .../paddle/v2/fluid/tests/test_pool3d_op.py | 49 ++++++--- 39 files changed, 732 insertions(+), 568 deletions(-) delete mode 100644 paddle/operators/conv_cudnn_op.cc delete mode 100644 paddle/operators/conv_transpose_cudnn_op.cc delete mode 100644 paddle/operators/pool_cudnn_op.cc delete mode 100644 paddle/operators/pool_cudnn_op.h diff --git a/paddle/framework/data_device_transform.cc b/paddle/framework/data_device_transform.cc index b3fd48ae12..d38d87927f 100644 --- a/paddle/framework/data_device_transform.cc +++ b/paddle/framework/data_device_transform.cc @@ -31,15 +31,14 @@ static const platform::DeviceContext* GetDeviceContext( } } -Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place) { +void DeviceTransform(const Tensor& in, const platform::Place& dst_place, + Tensor* out) { VLOG(3) << "DeviceTransform in, src_place " << in.place() << " dst_place: " << dst_place; - Tensor* out = new Tensor(); auto* dev_ctx = GetDeviceContext(in.place(), dst_place); dev_ctx->Wait(); Copy(in, dst_place, *dev_ctx, out); dev_ctx->Wait(); - return out; } } // namespace framework diff --git a/paddle/framework/data_device_transform.h b/paddle/framework/data_device_transform.h index bebf0d1b32..b21ed0be34 100644 --- a/paddle/framework/data_device_transform.h +++ b/paddle/framework/data_device_transform.h @@ -21,7 +21,8 @@ limitations under the License. */ namespace paddle { namespace framework { -Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place); +void DeviceTransform(const Tensor& in, const platform::Place& dst_place, + Tensor* out); } // namespace framework } // namespace paddle diff --git a/paddle/framework/data_layout.h b/paddle/framework/data_layout.h index 3ab976ecac..31817251ed 100644 --- a/paddle/framework/data_layout.h +++ b/paddle/framework/data_layout.h @@ -14,7 +14,9 @@ limitations under the License. */ #pragma once -#include +#include +#include + #include "paddle/platform/enforce.h" namespace paddle { @@ -27,12 +29,19 @@ enum class DataLayout { }; inline DataLayout StringToDataLayout(const std::string& str) { - if (str == "NHWC" || str == "nhwc") { + std::string s(str); + for (size_t i = 0; i < s.size(); ++i) { + s[i] = toupper(s[i]); + } + + if (s == "NHWC") { return DataLayout::kNHWC; - } else if (str == "NCHW" || str == "nchw") { + } else if (s == "NCHW") { return DataLayout::kNCHW; + } else if (s == "ANYLAYOUT") { + return DataLayout::kAnyLayout; } else { - PADDLE_THROW("Unknown storage order string: %s", str); + PADDLE_THROW("Unknown storage order string: %s", s); } } @@ -49,7 +58,7 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) { } } -inline std::ostream& operator<<(std::ostream& out, DataLayout l) { +inline std::ostream& operator<<(std::ostream& out, const DataLayout& l) { out << DataLayoutToString(l); return out; } diff --git a/paddle/framework/data_transform.cc b/paddle/framework/data_transform.cc index e56edb9539..d826f0edac 100644 --- a/paddle/framework/data_transform.cc +++ b/paddle/framework/data_transform.cc @@ -19,16 +19,14 @@ limitations under the License. */ namespace paddle { namespace framework { -Tensor* DataTransform(const OpKernelType& expected_kernel_type, - const OpKernelType& kernel_type_for_var, - const Tensor& input_tensor) { - Tensor* out = nullptr; +void DataTransform(const OpKernelType& expected_kernel_type, + const OpKernelType& kernel_type_for_var, + const Tensor& input_tensor, Tensor* out) { if (!platform::is_same_place(kernel_type_for_var.place_, expected_kernel_type.place_)) { - out = DeviceTransform(input_tensor, expected_kernel_type.place_); + DeviceTransform(input_tensor, expected_kernel_type.place_, out); } PADDLE_ENFORCE_NOT_NULL(out, "out should not be null"); - return out; } void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor, diff --git a/paddle/framework/data_transform.h b/paddle/framework/data_transform.h index ee95c7e856..a4b7890237 100644 --- a/paddle/framework/data_transform.h +++ b/paddle/framework/data_transform.h @@ -30,9 +30,9 @@ limitations under the License. */ namespace paddle { namespace framework { -Tensor* DataTransform(const OpKernelType& expected_kernel_type, - const OpKernelType& kernel_type_for_var, - const Tensor& input_tensor); +void DataTransform(const OpKernelType& expected_kernel_type, + const OpKernelType& kernel_type_for_var, + const Tensor& input_tensor, Tensor* out); void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor, Variable& out_var); diff --git a/paddle/framework/op_kernel_type.h b/paddle/framework/op_kernel_type.h index 053897784c..312bd5f892 100644 --- a/paddle/framework/op_kernel_type.h +++ b/paddle/framework/op_kernel_type.h @@ -85,5 +85,10 @@ inline std::string KernelTypeToString(const OpKernelType& kernel_key) { return stream.str(); } +inline bool TransFromNeeded(const OpKernelType& l, const OpKernelType& r) { + return (!platform::places_are_same_class(l.place_, r.place_)) || + (l.data_type_ != r.data_type_) || (l.data_layout_ != r.data_layout_); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index 66f07b6757..341da8befd 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -368,24 +368,6 @@ TEST(OperatorRegistrar, OpWithMultiKernel) { // TODO(qiao) add priority back // use all available kernels - paddle::framework::UseALL(); op->Run(scope, cuda_place); EXPECT_EQ(op_test_value, -10); - - // remove cuda kernels - paddle::framework::UseCPU(); - op->Run(scope, cpu_place); - - EXPECT_EQ(op_test_value, -9); - - // add cuda kernels - paddle::framework::UseCUDA(); - op->Run(scope, cuda_place); - - EXPECT_EQ(op_test_value, -10); - - // use cudnn kernel - paddle::framework::UseCUDNN(); - op->Run(scope, cuda_place); - EXPECT_EQ(op_test_value, -20); } diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index be1373dc2a..84c010df7c 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -29,52 +29,12 @@ DEFINE_bool(op_sync, false, namespace paddle { namespace framework { -std::vector> kKernelPriority; - -void UseCPU() { - kKernelPriority.clear(); - /*Plain CPU*/ - auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kPlain); - kKernelPriority.insert(kKernelPriority.begin(), pair0); -} - -void UseMKLDNN() { - UseCPU(); -#if PADDLE_WITH_MKLML - { - /*MKLDNN Kernel*/ - auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kMKLDNN); - kKernelPriority.insert(kKernelPriority.begin(), pair0); - } -#endif -} - -void UseCUDA() { - UseMKLDNN(); -#if PADDLE_WITH_CUDA - /*Plain GPU*/ - auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain); - kKernelPriority.insert(kKernelPriority.begin(), pair0); -#endif -} - -void UseCUDNN() { - UseCUDA(); -#if PADDLE_WITH_CUDA - if (platform::dynload::HasCUDNN()) { - /*CUDNN Kernel*/ - auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN); - kKernelPriority.insert(kKernelPriority.begin(), pair0); - } -#endif -} - -void UseALL() { - UseCPU(); - UseMKLDNN(); - UseCUDA(); - UseCUDNN(); -} +std::vector> kKernelPriority = { + std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN), + std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain), + std::make_tuple(platform::CPUPlace(), LibraryType::kMKLDNN), + std::make_tuple(platform::CPUPlace(), LibraryType::kPlain), +}; static DDim GetDims(const Scope& scope, const std::string& name) { Variable* var = scope.FindVar(name); @@ -271,36 +231,33 @@ static bool VarIsTensor(const Variable* var) { return var->IsType() || var->IsType(); } -static const Tensor* GetTensorFromVar(const Variable* var) { - const Tensor* t = nullptr; +static const Tensor* GetTensorFromVar(Variable* var) { if (var->IsType()) { - t = &(var->Get()); + return var->GetMutable(); } else if (var->IsType()) { - t = &(var->Get().value()); + return var->GetMutable()->mutable_value(); } else { PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.", var->Type().name()); } - return t; } static Tensor* GetMutableTensorFromVar(Variable* var) { - Tensor* t = nullptr; if (var->IsType()) { - t = var->GetMutable(); + return var->GetMutable(); } else if (var->IsType()) { - t = var->GetMutable()->mutable_value(); + return var->GetMutable()->mutable_value(); } else { PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.", var->Type().name()); } - return t; } template <> const Tensor* ExecutionContext::Input(const std::string& name) const { auto* var = InputVar(name); - return var == nullptr ? nullptr : GetTensorFromVar(var); + return var == nullptr ? nullptr + : GetTensorFromVar(const_cast(var)); } template <> @@ -343,6 +300,7 @@ bool OpSupportGPU(const std::string& op_type) { auto it = all_kernels.find(op_type); if (it == all_kernels.end()) { // All control operator must support GPU + return true; } for (auto& kern_pair : it->second) { @@ -516,21 +474,17 @@ void OperatorWithKernel::Run(const Scope& scope, } ExecutionContext ctx(*this, scope, *dev_ctx); - auto expected_kernel_key = this->GetExpectedKernelType(ctx); OpKernelMap& kernels = kernels_iter->second; - for (auto& candidate : kKernelPriority) { - auto candidate_key = - OpKernelType(expected_kernel_key.data_type_, std::get<0>(candidate), - expected_kernel_key.data_layout_, std::get<1>(candidate)); + // TODO(dzhwinter) : kernel fallback mechanism will be added when all the + // transform functions are ready. - if ((candidate_key == expected_kernel_key) || - (kernels.count(candidate_key))) { - expected_kernel_key = candidate_key; - break; - } - } + // for (auto& candidate : kKernelPriority) { + // Do selection + // } + + auto expected_kernel_key = this->GetExpectedKernelType(ctx); VLOG(3) << "expected_kernel_key:" << expected_kernel_key; @@ -544,7 +498,7 @@ void OperatorWithKernel::Run(const Scope& scope, if (tensor_in->IsInitialized()) { auto kernel_type_for_var = this->GetKernelTypeForVar( var_name_item.first, *tensor_in, expected_kernel_key); - if (kernel_type_for_var != expected_kernel_key) { + if (TransFromNeeded(kernel_type_for_var, expected_kernel_key)) { auto out_var_names = OutputVars(true); if (std::find(out_var_names.begin(), out_var_names.end(), var_name) != out_var_names.end()) { @@ -553,11 +507,13 @@ void OperatorWithKernel::Run(const Scope& scope, "does not support transform", var_name); } - VLOG(3) << "need to do transform for var " << var_name; + VLOG(3) << "Transform Variable " << var_name << " from " + << kernel_type_for_var << " to " << expected_kernel_key; auto* trans_var = new_scope.Var(var_name); - auto* out = DataTransform(expected_kernel_key, kernel_type_for_var, - *tensor_in); - CopyVariableWithTensor(*var, *out, *trans_var); + std::shared_ptr out(new Tensor); + DataTransform(expected_kernel_key, kernel_type_for_var, *tensor_in, + out.get()); + CopyVariableWithTensor(*var, *(out.get()), *trans_var); } } } diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index d5feb59864..c9140f304c 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -54,33 +54,9 @@ constexpr char kGradVarSuffix[] = "@GRAD"; constexpr char kZeroVarSuffix[] = "@ZERO"; // define some kernel priority +/* Define multiple kernel type fallback order*/ extern std::vector> kKernelPriority; -/** - * @brief Use cpu kernel only - */ -void UseCPU(); - -/** - * @brief Perfer MKLDNN kernel than Plain CPU kernel - */ -void UseMKLDNN(); - -/** - * @brief Perfer CUDA kernel than Plain CPU kernel - */ -void UseCUDA(); - -/** - * @brief Perfer cudnn kernel than Plain CUDA kernel - */ -void UseCUDNN(); - -/** - * @brief Use all available kernels - */ -void UseALL(); - inline std::string GradVarName(const std::string& var_name) { return var_name + kGradVarSuffix; } diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index e1b695e8cd..2569535c25 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -137,8 +137,6 @@ op_library(sum_op DEPS selected_rows_functor) op_library(sgd_op DEPS selected_rows_functor) op_library(print_op DEPS lod_tensor) op_library(adagrad_op DEPS selected_rows_functor) -op_library(conv_op DEPS vol2col) -op_library(pool_op DEPS pooling) op_library(maxout_op DEPS maxouting) op_library(unpool_op DEPS unpooling) op_library(pool_with_index_op DEPS pooling) @@ -149,12 +147,27 @@ op_library(max_sequence_len_op DEPS lod_rank_table) op_library(sequence_conv_op DEPS context_project) op_library(sequence_pool_op DEPS sequence_pooling) op_library(lstm_op DEPS sequence2batch lstm_compute) -op_library(conv_transpose_op DEPS vol2col) op_library(gru_op DEPS sequence2batch gru_compute) op_library(recurrent_op DEPS executor) op_library(warpctc_op DEPS dynload_warpctc sequence_padding math_function) op_library(cos_sim_op DEPS cos_sim_functor) op_library(parallel_do_op DEPS executor) + +# Regist multiple Kernel to pybind +if (WITH_GPU) +op_library(conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS vol2col) +op_library(pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling) +op_library(conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc + conv_transpose_cudnn_op.cu.cc DEPS vol2col) +file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d, CUDNN);\n") +file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(pool2d, CUDNN);\n") +file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d_transpose, CUDNN);\n") +else() +op_library(conv_op SRCS conv_op.cc DEPS vol2col) +op_library(pool_op SRCS pool_op.cc DEPS pooling) +op_library(conv_transpose_op SRCS conv_transpose_op.cc DEPS vol2col) +endif() + # FIXME(typhoonzero): save/load depends lodtensor serialization functions op_library(save_op DEPS lod_tensor) op_library(load_op DEPS lod_tensor) diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc deleted file mode 100644 index 84d9ce1973..0000000000 --- a/paddle/operators/conv_cudnn_op.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/operators/conv_op.h" - -namespace paddle { -namespace operators { - -class CudnnConv2DOpMaker : public Conv2DOpMaker { - public: - CudnnConv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker) - : Conv2DOpMaker(proto, op_checker) { - AddAttr("workspace_size_MB", - "workspace size for cudnn, in MB, " - "workspace is a section of GPU memory which will be " - "allocated/freed each time the operator runs, larger " - "workspace size can increase performance but also requires " - "better hardware. This size should be chosen carefully.") - .SetDefault(4096); - } -}; - -class CudnnConv3DOpMaker : public Conv3DOpMaker { - public: - CudnnConv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker) - : Conv3DOpMaker(proto, op_checker) { - AddAttr("workspace_size_MB", - "workspace size for cudnn, in MB, " - "workspace is a section of GPU memory which will be " - "allocated/freed each time the operator runs, larger " - "workspace size can increase performance but also requires " - "better hardware. This size should be chosen carefully.") - .SetDefault(4096); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP(conv2d_cudnn, ops::ConvOp, ops::CudnnConv2DOpMaker, - conv2d_cudnn_grad, ops::ConvOpGrad); - -REGISTER_OP(conv3d_cudnn, ops::ConvOp, ops::CudnnConv3DOpMaker, - conv3d_cudnn_grad, ops::ConvOpGrad); - -REGISTER_OP_CPU_KERNEL( - conv2d_cudnn, - ops::GemmConvKernel, - ops::GemmConvKernel); -REGISTER_OP_CPU_KERNEL( - conv2d_cudnn_grad, - ops::GemmConvGradKernel, - ops::GemmConvGradKernel); - -REGISTER_OP_CPU_KERNEL( - conv3d_cudnn, - ops::GemmConvKernel, - ops::GemmConvKernel); -REGISTER_OP_CPU_KERNEL( - conv3d_cudnn_grad, - ops::GemmConvGradKernel, - ops::GemmConvGradKernel); diff --git a/paddle/operators/conv_cudnn_op.cu.cc b/paddle/operators/conv_cudnn_op.cu.cc index 0c5ed3e4e8..3a5409a7e3 100644 --- a/paddle/operators/conv_cudnn_op.cu.cc +++ b/paddle/operators/conv_cudnn_op.cu.cc @@ -32,7 +32,7 @@ static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES = static_cast(1024) * 1024 * 1024; template -class CudnnConvOpKernel : public framework::OpKernel { +class CUDNNConvOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), @@ -147,7 +147,7 @@ class CudnnConvOpKernel : public framework::OpKernel { }; template -class CudnnConvGradOpKernel : public framework::OpKernel { +class CUDNNConvGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), @@ -315,17 +315,16 @@ class CudnnConvGradOpKernel : public framework::OpKernel { } // namespace operators } // namespace paddle -// TODO(dzhwinter) : below register should be removed -REGISTER_OP_CUDA_KERNEL(conv2d_cudnn, - paddle::operators::CudnnConvOpKernel, - paddle::operators::CudnnConvOpKernel); -REGISTER_OP_CUDA_KERNEL(conv2d_cudnn_grad, - paddle::operators::CudnnConvGradOpKernel, - paddle::operators::CudnnConvGradOpKernel); - -REGISTER_OP_CUDA_KERNEL(conv3d_cudnn, - paddle::operators::CudnnConvOpKernel, - paddle::operators::CudnnConvOpKernel); -REGISTER_OP_CUDA_KERNEL(conv3d_cudnn_grad, - paddle::operators::CudnnConvGradOpKernel, - paddle::operators::CudnnConvGradOpKernel); +REGISTER_OP_KERNEL(conv2d, CUDNN, ::paddle::platform::CUDAPlace, + paddle::operators::CUDNNConvOpKernel, + paddle::operators::CUDNNConvOpKernel); +REGISTER_OP_KERNEL(conv2d_grad, CUDNN, ::paddle::platform::CUDAPlace, + paddle::operators::CUDNNConvGradOpKernel, + paddle::operators::CUDNNConvGradOpKernel); + +REGISTER_OP_KERNEL(conv3d, CUDNN, ::paddle::platform::CUDAPlace, + paddle::operators::CUDNNConvOpKernel, + paddle::operators::CUDNNConvOpKernel); +REGISTER_OP_KERNEL(conv3d_grad, CUDNN, ::paddle::platform::CUDAPlace, + paddle::operators::CUDNNConvGradOpKernel, + paddle::operators::CUDNNConvGradOpKernel); diff --git a/paddle/operators/conv_op.cc b/paddle/operators/conv_op.cc index 1468e3eb96..424eccdb7d 100644 --- a/paddle/operators/conv_op.cc +++ b/paddle/operators/conv_op.cc @@ -67,6 +67,23 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { ctx->ShareLoD("Input", "Output"); } +framework::OpKernelType ConvOp::GetExpectedKernelType( + const framework::ExecutionContext& ctx) const { + bool use_cudnn = ctx.Attr("use_cudnn"); + framework::LibraryType library_; + if (use_cudnn) { + library_ = framework::LibraryType::kCUDNN; + } else { + library_ = framework::LibraryType::kPlain; + } + + std::string data_format = ctx.Attr("data_format"); + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Input")->type()), ctx.GetPlace(), + layout_, library_); +} + Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput( @@ -108,6 +125,26 @@ Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker) "dilations(h_dilation, w_dilation) of " "convolution operator.") .SetDefault({1, 1}); + AddAttr( + "use_cudnn", + "(bool, default false) Only used in cudnn kernel, need install cudnn") + .SetDefault(false); + AddAttr( + "data_format", + "(string, default NCHW) Only used in " + "An optional string from: \"NHWC\", \"NCHW\". " + "Defaults to \"NHWC\". Specify the data format of the output data, " + "the input will be transformed automatically. ") + .SetDefault("AnyLayout"); + // TODO(dzhwinter): need to registered layout transform function + AddAttr("workspace_size_MB", + "Only used in cudnn kernel. Need set use_cudnn to true." + "workspace size for cudnn, in MB, " + "workspace is a section of GPU memory which will be " + "allocated/freed each time the operator runs, larger " + "workspace size can increase performance but also requires " + "better hardware. This size should be chosen carefully.") + .SetDefault(4096); AddComment(R"DOC( Convolution Operator. @@ -181,6 +218,25 @@ Conv3DOpMaker::Conv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker) "dilations(d_dilation, h_dilation, w_dilation) of " "convolution operator.") .SetDefault({1, 1, 1}); + AddAttr( + "use_cudnn", + "(bool, default false) Only used in cudnn kernel, need install cudnn") + .SetDefault(false); + AddAttr( + "data_format", + "(string, default NCHW) Only used in " + "An optional string from: \"NHWC\", \"NCHW\". " + "Defaults to \"NHWC\". Specify the data format of the output data, " + "the input will be transformed automatically. ") + .SetDefault("AnyLayout"); + // TODO(dzhwinter): need to registered layout transform function + AddAttr("workspace_size_MB", + "Only used in cudnn kernel. workspace size for cudnn, in MB, " + "workspace is a section of GPU memory which will be " + "allocated/freed each time the operator runs, larger " + "workspace size can increase performance but also requires " + "better hardware. This size should be chosen carefully.") + .SetDefault(4096); AddComment(R"DOC( Convolution3D Operator. @@ -224,6 +280,23 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const { } } +framework::OpKernelType ConvOpGrad::GetExpectedKernelType( + const framework::ExecutionContext& ctx) const { + bool use_cudnn = ctx.Attr("use_cudnn"); + framework::LibraryType library_; + if (use_cudnn) { + library_ = framework::LibraryType::kCUDNN; + } else { + library_ = framework::LibraryType::kPlain; + } + + std::string data_format = ctx.Attr("data_format"); + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Input")->type()), ctx.GetPlace(), + layout_, library_); +} + } // namespace operators } // namespace paddle diff --git a/paddle/operators/conv_op.h b/paddle/operators/conv_op.h index 83786e2329..5a8933e791 100644 --- a/paddle/operators/conv_op.h +++ b/paddle/operators/conv_op.h @@ -62,12 +62,20 @@ class ConvOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; }; class ConvOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; }; template diff --git a/paddle/operators/conv_transpose_cudnn_op.cc b/paddle/operators/conv_transpose_cudnn_op.cc deleted file mode 100644 index 2e5333a265..0000000000 --- a/paddle/operators/conv_transpose_cudnn_op.cc +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/operators/conv_transpose_op.h" - -namespace paddle { -namespace operators { - -class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker { - public: - CudnnConv2DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker) - : Conv2DTransposeOpMaker(proto, op_checker) { - AddAttr("workspace_size_MB", - "workspace size for cudnn, in MB, " - "workspace is a section of GPU memory which will be " - "allocated/freed each time the operator runs, larger " - "workspace size can increase performance but also requires " - "better hardward. This size should be carefully setted.") - .SetDefault(4096); - } -}; - -class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker { - public: - CudnnConv3DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker) - : Conv3DTransposeOpMaker(proto, op_checker) { - AddAttr("workspace_size_MB", - "workspace size for cudnn, in MB, " - "workspace is a section of GPU memory which will be " - "allocated/freed each time the operator runs, larger " - "workspace size can increase performance but also requires " - "better hardward. This size should be carefully setted.") - .SetDefault(4096); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp, - ops::CudnnConv2DTransposeOpMaker, conv2d_transpose_cudnn_grad, - ops::ConvTransposeOpGrad); - -REGISTER_OP_CPU_KERNEL( - conv2d_transpose_cudnn, - ops::GemmConvTransposeKernel, - ops::GemmConvTransposeKernel); -REGISTER_OP_CPU_KERNEL( - conv2d_transpose_cudnn_grad, - ops::GemmConvTransposeGradKernel, - ops::GemmConvTransposeGradKernel); - -REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp, - ops::CudnnConv3DTransposeOpMaker, conv3d_transpose_cudnn_grad, - ops::ConvTransposeOpGrad); - -REGISTER_OP_CPU_KERNEL( - conv3d_transpose_cudnn, - ops::GemmConvTransposeKernel, - ops::GemmConvTransposeKernel); -REGISTER_OP_CPU_KERNEL( - conv3d_transpose_cudnn_grad, - ops::GemmConvTransposeGradKernel, - ops::GemmConvTransposeGradKernel); diff --git a/paddle/operators/conv_transpose_cudnn_op.cu.cc b/paddle/operators/conv_transpose_cudnn_op.cu.cc index fc37776ba1..23bc97e13c 100644 --- a/paddle/operators/conv_transpose_cudnn_op.cu.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cu.cc @@ -28,10 +28,10 @@ using ScopedFilterDescriptor = platform::ScopedFilterDescriptor; using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor; using DataLayout = platform::DataLayout; -static constexpr size_t kConvCudnnWorkspaceLimitBytes = 1024 * 1024 * 1024; +static constexpr size_t kConvCUDNNWorkspaceLimitBytes = 1024 * 1024 * 1024; template -class CudnnConvTransposeOpKernel : public framework::OpKernel { +class CUDNNConvTransposeOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), @@ -77,7 +77,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel { // ------------------- cudnn conv workspace --------------------- void* cudnn_workspace = nullptr; size_t workspace_size_in_bytes; // final workspace to allocate. - size_t workspace_size_limit = kConvCudnnWorkspaceLimitBytes; + size_t workspace_size_limit = kConvCUDNNWorkspaceLimitBytes; if (user_workspace_size > 0) { workspace_size_limit = user_workspace_size * 1024 * 1024; } @@ -116,7 +116,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel { }; template -class CudnnConvTransposeGradOpKernel : public framework::OpKernel { +class CUDNNConvTransposeGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), @@ -161,7 +161,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { cudnnConvolutionBwdFilterAlgo_t filter_algo; size_t bwd_filter_ws_size, fwd_ws_size; size_t workspace_size_in_bytes = 0; - size_t workspace_size_limit = kConvCudnnWorkspaceLimitBytes; + size_t workspace_size_limit = kConvCUDNNWorkspaceLimitBytes; if (user_workspace_size > 0) { workspace_size_limit = user_workspace_size * 1024 * 1024; } @@ -236,16 +236,16 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(conv2d_transpose_cudnn, - ops::CudnnConvTransposeOpKernel, - ops::CudnnConvTransposeOpKernel); -REGISTER_OP_CUDA_KERNEL(conv2d_transpose_cudnn_grad, - ops::CudnnConvTransposeGradOpKernel, - ops::CudnnConvTransposeGradOpKernel); - -REGISTER_OP_CUDA_KERNEL(conv3d_transpose_cudnn, - ops::CudnnConvTransposeOpKernel, - ops::CudnnConvTransposeOpKernel); -REGISTER_OP_CUDA_KERNEL(conv3d_transpose_cudnn_grad, - ops::CudnnConvTransposeGradOpKernel, - ops::CudnnConvTransposeGradOpKernel); +REGISTER_OP_KERNEL(conv2d_transpose, CUDNN, ::paddle::platform::CUDAPlace, + ops::CUDNNConvTransposeOpKernel, + ops::CUDNNConvTransposeOpKernel); +REGISTER_OP_KERNEL(conv2d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace, + ops::CUDNNConvTransposeGradOpKernel, + ops::CUDNNConvTransposeGradOpKernel); + +REGISTER_OP_KERNEL(conv3d_transpose, CUDNN, ::paddle::platform::CUDAPlace, + ops::CUDNNConvTransposeOpKernel, + ops::CUDNNConvTransposeOpKernel); +REGISTER_OP_KERNEL(conv3d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace, + ops::CUDNNConvTransposeGradOpKernel, + ops::CUDNNConvTransposeGradOpKernel); diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc index 74636d138f..cf4e8c0a30 100644 --- a/paddle/operators/conv_transpose_op.cc +++ b/paddle/operators/conv_transpose_op.cc @@ -58,6 +58,23 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); } +framework::OpKernelType ConvTransposeOp::GetExpectedKernelType( + const framework::ExecutionContext& ctx) const { + bool use_cudnn = ctx.Attr("use_cudnn"); + framework::LibraryType library_; + if (use_cudnn) { + library_ = framework::LibraryType::kCUDNN; + } else { + library_ = framework::LibraryType::kPlain; + } + + std::string data_format = ctx.Attr("data_format"); + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Input")->type()), ctx.GetPlace(), + layout_, library_); +} + Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { @@ -94,6 +111,25 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto, "(vector default:{0, 0}), the paddings(h_pad, w_pad) of convolution " "transpose operator.") .SetDefault({0, 0}); + AddAttr( + "use_cudnn", + "(bool, default false) Only used in cudnn kernel, need install cudnn") + .SetDefault(false); + AddAttr( + "data_format", + "(string, default NCHW) Only used in " + "An optional string from: \"NHWC\", \"NCHW\". " + "Defaults to \"NHWC\". Specify the data format of the output data, " + "the input will be transformed automatically. ") + .SetDefault("AnyLayout"); + // TODO(dzhwinter): need to registered layout transform function + AddAttr("workspace_size_MB", + "Used in cudnn kernel only. workspace size for cudnn, in MB, " + "workspace is a section of GPU memory which will be " + "allocated/freed each time the operator runs, larger " + "workspace size can increase performance but also requires " + "better hardward. This size should be carefully setted.") + .SetDefault(4096); AddComment(R"DOC( Convolution2D Transpose Operator. @@ -163,6 +199,25 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto, "(vector default:{0, 0, 0}), paddings(d_pad, " "h_pad, w_pad) of convolution transpose operator.") .SetDefault({0, 0, 0}); + AddAttr( + "use_cudnn", + "(bool, default false) Only used in cudnn kernel, need install cudnn") + .SetDefault(false); + AddAttr( + "data_format", + "(string, default NCHW) Only used in " + "An optional string from: \"NHWC\", \"NCHW\". " + "Defaults to \"NHWC\". Specify the data format of the output data, " + "the input will be transformed automatically. ") + .SetDefault("AnyLayout"); + // TODO(dzhwinter): need to registered layout transform function + AddAttr("workspace_size_MB", + "Used in cudnn kernel only. workspace size for cudnn, in MB, " + "workspace is a section of GPU memory which will be " + "allocated/freed each time the operator runs, larger " + "workspace size can increase performance but also requires " + "better hardward. This size should be carefully setted.") + .SetDefault(4096); AddComment(R"DOC( Convolution3D Transpose Operator. @@ -205,6 +260,23 @@ void ConvTransposeOpGrad::InferShape(framework::InferShapeContext* ctx) const { } } +framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType( + const framework::ExecutionContext& ctx) const { + bool use_cudnn = ctx.Attr("use_cudnn"); + framework::LibraryType library_; + if (use_cudnn) { + library_ = framework::LibraryType::kCUDNN; + } else { + library_ = framework::LibraryType::kPlain; + } + + std::string data_format = ctx.Attr("data_format"); + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Input")->type()), ctx.GetPlace(), + layout_, library_); +} + } // namespace operators } // namespace paddle diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h index 4c8f8a8067..a42ade41b1 100644 --- a/paddle/operators/conv_transpose_op.h +++ b/paddle/operators/conv_transpose_op.h @@ -42,12 +42,20 @@ class ConvTransposeOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; }; class ConvTransposeOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; }; template diff --git a/paddle/operators/math/sequence2batch.cc b/paddle/operators/math/sequence2batch.cc index 88977be1f8..e459a42ca2 100644 --- a/paddle/operators/math/sequence2batch.cc +++ b/paddle/operators/math/sequence2batch.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/math/sequence2batch.h" +#include "paddle/operators/math/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/operators/pool_cudnn_op.cc b/paddle/operators/pool_cudnn_op.cc deleted file mode 100644 index 77407f5cdf..0000000000 --- a/paddle/operators/pool_cudnn_op.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/operators/pool_cudnn_op.h" - -namespace ops = paddle::operators; - -REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad, - ops::PoolOpGrad); - -REGISTER_OP_CPU_KERNEL( - pool2d_cudnn, ops::PoolKernel, - ops::PoolKernel); -REGISTER_OP_CPU_KERNEL( - pool2d_cudnn_grad, - ops::PoolGradKernel, - ops::PoolGradKernel) - -REGISTER_OP(pool3d_cudnn, ops::PoolOp, ops::Pool3dOpMaker, pool3d_cudnn_grad, - ops::PoolOpGrad); - -REGISTER_OP_CPU_KERNEL( - pool3d_cudnn, ops::PoolKernel, - ops::PoolKernel); -REGISTER_OP_CPU_KERNEL( - pool3d_cudnn_grad, - ops::PoolGradKernel, - ops::PoolGradKernel) diff --git a/paddle/operators/pool_cudnn_op.cu.cc b/paddle/operators/pool_cudnn_op.cu.cc index 2d0001ba11..446fb0819d 100644 --- a/paddle/operators/pool_cudnn_op.cu.cc +++ b/paddle/operators/pool_cudnn_op.cu.cc @@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/pool_cudnn_op.h" +#include "paddle/framework/op_registry.h" +#include "paddle/operators/pool_op.h" #include "paddle/platform/cudnn_helper.h" namespace paddle { @@ -25,7 +26,7 @@ using DataLayout = platform::DataLayout; using PoolingMode = platform::PoolingMode; template -class PoolCudnnOpKernel : public framework::OpKernel { +class PoolCUDNNOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), @@ -86,7 +87,7 @@ class PoolCudnnOpKernel : public framework::OpKernel { }; template -class PoolCudnnGradOpKernel : public framework::OpKernel { +class PoolCUDNNGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), @@ -162,12 +163,16 @@ class PoolCudnnGradOpKernel : public framework::OpKernel { namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel, - ops::PoolCudnnOpKernel); -REGISTER_OP_CUDA_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel, - ops::PoolCudnnGradOpKernel); - -REGISTER_OP_CUDA_KERNEL(pool3d_cudnn, ops::PoolCudnnOpKernel, - ops::PoolCudnnOpKernel); -REGISTER_OP_CUDA_KERNEL(pool3d_cudnn_grad, ops::PoolCudnnGradOpKernel, - ops::PoolCudnnGradOpKernel); +REGISTER_OP_KERNEL(pool2d, CUDNN, ::paddle::platform::CUDAPlace, + ops::PoolCUDNNOpKernel, + ops::PoolCUDNNOpKernel); +REGISTER_OP_KERNEL(pool2d_grad, CUDNN, ::paddle::platform::CUDAPlace, + ops::PoolCUDNNGradOpKernel, + ops::PoolCUDNNGradOpKernel); + +REGISTER_OP_KERNEL(pool3d, CUDNN, ::paddle::platform::CUDAPlace, + ops::PoolCUDNNOpKernel, + ops::PoolCUDNNOpKernel); +REGISTER_OP_KERNEL(pool3d_grad, CUDNN, ::paddle::platform::CUDAPlace, + ops::PoolCUDNNGradOpKernel, + ops::PoolCUDNNGradOpKernel); diff --git a/paddle/operators/pool_cudnn_op.h b/paddle/operators/pool_cudnn_op.h deleted file mode 100644 index 5adf27f5bc..0000000000 --- a/paddle/operators/pool_cudnn_op.h +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/framework/op_registry.h" -#include "paddle/operators/pool_op.h" - -namespace paddle { -namespace operators {} // namespace operators -} // namespace paddle diff --git a/paddle/operators/pool_op.cc b/paddle/operators/pool_op.cc index d3cf5fa638..3e567efd08 100644 --- a/paddle/operators/pool_op.cc +++ b/paddle/operators/pool_op.cc @@ -61,6 +61,23 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const { ctx->ShareLoD("X", "Out"); } +framework::OpKernelType PoolOp::GetExpectedKernelType( + const framework::ExecutionContext &ctx) const { + bool use_cudnn = ctx.Attr("use_cudnn"); + framework::LibraryType library_; + if (use_cudnn) { + library_ = framework::LibraryType::kCUDNN; + } else { + library_ = framework::LibraryType::kPlain; + } + + std::string data_format = ctx.Attr("data_format"); + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), ctx.GetPlace(), + layout_, library_); +} + void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), @@ -68,6 +85,23 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const { ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } +framework::OpKernelType PoolOpGrad::GetExpectedKernelType( + const framework::ExecutionContext &ctx) const { + bool use_cudnn = ctx.Attr("use_cudnn"); + framework::LibraryType library_; + if (use_cudnn) { + library_ = framework::LibraryType::kCUDNN; + } else { + library_ = framework::LibraryType::kPlain; + } + + std::string data_format = ctx.Attr("data_format"); + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), ctx.GetPlace(), + layout_, library_); +} + Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput( @@ -101,15 +135,27 @@ Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker) AddAttr>("strides", "(vector, default {1, 1}), strides(height, " "width) of pooling operator.") - .SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently, + .SetDefault({1, 1}); + // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", "(vector, default {0,0}), paddings(height, width) of pooling " "operator." "If global_pooling = true, paddings and ksize will be ignored.") - .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, - // TypedAttrChecker don't support vector type.) + .SetDefault({0, 0}); + AddAttr( + "use_cudnn", + "(bool, default false) Only used in cudnn kernel, need install cudnn") + .SetDefault(false); + AddAttr( + "data_format", + "(string, default NCHW) Only used in " + "An optional string from: \"NHWC\", \"NCHW\". " + "Defaults to \"NHWC\". Specify the data format of the output data, " + "the input will be transformed automatically. ") + .SetDefault("AnyLayout"); + // TODO(dzhwinter): need to registered layout transform function AddComment(R"DOC( Pool2d Operator. @@ -182,6 +228,19 @@ Pool3dOpMaker::Pool3dOpMaker(OpProto *proto, OpAttrChecker *op_checker) .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) + AddAttr( + "use_cudnn", + "(bool, default false) Only used in cudnn kernel, need install cudnn") + .SetDefault(false); + AddAttr( + "data_format", + "(string, default NCHW) Only used in " + "An optional string from: \"NHWC\", \"NCHW\". " + "Defaults to \"NHWC\". Specify the data format of the output data, " + "the input will be transformed automatically. ") + .SetDefault("AnyLayout"); + // TODO(dzhwinter): need to registered layout transform function + AddComment(R"DOC( Pool3d Operator. diff --git a/paddle/operators/pool_op.h b/paddle/operators/pool_op.h index 3860e295f4..c3d82ecbde 100644 --- a/paddle/operators/pool_op.h +++ b/paddle/operators/pool_op.h @@ -29,6 +29,10 @@ class PoolOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; }; class PoolOpGrad : public framework::OperatorWithKernel { @@ -36,6 +40,10 @@ class PoolOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; }; class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/platform/dynload/cudnn.cc b/paddle/platform/dynload/cudnn.cc index 76ec82e108..701f6240fe 100644 --- a/paddle/platform/dynload/cudnn.cc +++ b/paddle/platform/dynload/cudnn.cc @@ -44,7 +44,7 @@ CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP); #ifdef PADDLE_USE_DSO bool HasCUDNN() { - std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, &cudnn_dso_handle); + std::call_once(cudnn_dso_flag, GetCUDNNDsoHandle, &cudnn_dso_handle); return cudnn_dso_handle != nullptr; } diff --git a/paddle/platform/dynload/cudnn.h b/paddle/platform/dynload/cudnn.h index 8c937b37d7..b926347949 100644 --- a/paddle/platform/dynload/cudnn.h +++ b/paddle/platform/dynload/cudnn.h @@ -36,7 +36,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name); auto operator()(Args... args) -> decltype(__name(args...)) { \ using cudnn_func = decltype(__name(args...)) (*)(Args...); \ std::call_once(cudnn_dso_flag, \ - paddle::platform::dynload::GetCudnnDsoHandle, \ + paddle::platform::dynload::GetCUDNNDsoHandle, \ &cudnn_dso_handle); \ EnforceCUDNNLoaded(#__name); \ void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ diff --git a/paddle/platform/dynload/dynamic_loader.cc b/paddle/platform/dynload/dynamic_loader.cc index 7a82d06a0a..c8c09ae608 100644 --- a/paddle/platform/dynload/dynamic_loader.cc +++ b/paddle/platform/dynload/dynamic_loader.cc @@ -134,7 +134,7 @@ void GetCublasDsoHandle(void** dso_handle) { #endif } -void GetCudnnDsoHandle(void** dso_handle) { +void GetCUDNNDsoHandle(void** dso_handle) { #if defined(__APPLE__) || defined(__OSX__) GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", dso_handle, false); diff --git a/paddle/platform/dynload/dynamic_loader.h b/paddle/platform/dynload/dynamic_loader.h index c0e5452e5a..7b0c8c16d7 100644 --- a/paddle/platform/dynload/dynamic_loader.h +++ b/paddle/platform/dynload/dynamic_loader.h @@ -32,7 +32,7 @@ void GetCublasDsoHandle(void** dso_handle); * @param **dso_handle dso handler * */ -void GetCudnnDsoHandle(void** dso_handle); +void GetCUDNNDsoHandle(void** dso_handle); /** * @brief load the DSO of CURAND diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 5d170c66e9..c5d70bc9f9 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -430,13 +430,8 @@ All parameter, weight, gradient are variables in Paddle. m.def("init_glog", framework::InitGLOG); m.def("init_devices", &framework::InitDevices); - m.def("use_cpu", framework::UseCPU); - m.def("use_mkldnn", framework::UseMKLDNN); - m.def("use_cuda", framework::UseCUDA); - m.def("use_cudnn", framework::UseCUDNN); - m.def("use_all", framework::UseALL); - m.def("is_compile_gpu", IsCompileGPU); + m.def("set_feed_variable", framework::SetFeedVariable); m.def("get_fetch_variable", framework::GetFetchVariable); diff --git a/paddle/pybind/tensor_py.h b/paddle/pybind/tensor_py.h index 6b4290972b..3b5210e2b9 100644 --- a/paddle/pybind/tensor_py.h +++ b/paddle/pybind/tensor_py.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once #include -#include "paddle/framework/tensor.h" +#include "paddle/framework/lod_tensor.h" #include "paddle/memory/memcpy.h" #include "paddle/platform/device_context.h" #include "pybind11/numpy.h" @@ -97,14 +97,27 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) { template T TensorGetElement(framework::Tensor &self, size_t offset) { - PADDLE_ENFORCE(platform::is_cpu_place(self.place())); - return self.data()[offset]; + if (platform::is_cpu_place(self.place())) { + return self.data()[offset]; + } else { + std::shared_ptr dst(new framework::Tensor); + framework::Copy(self, platform::CPUPlace(), dst.get()); + return dst->data()[offset]; + } } +// TODO(dzhwinter) : fix the redundent Tensor allocate and free template void TensorSetElement(framework::Tensor &self, size_t offset, T elem) { - PADDLE_ENFORCE(platform::is_cpu_place(self.place())); - self.data()[offset] = elem; + if (platform::is_gpu_place(self.place())) { + std::shared_ptr dst(new framework::Tensor); + framework::Copy(self, platform::CPUPlace(), dst.get()); + dst->data()[offset] = elem; + framework::Copy(*dst.get(), self.place(), &self); + + } else if (platform::is_cpu_place(self.place())) { + self.data()[offset] = elem; + } } template diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 94184d59f6..99a40ce45a 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -775,7 +775,7 @@ def conv2d(input, pre_bias = helper.create_tmp_variable(dtype) helper.append_op( - type='conv2d_cudnn', + type='conv2d', inputs={ 'Input': input, 'Filter': filter_param, diff --git a/python/paddle/v2/fluid/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py index b77d2b1268..276cf2c5f2 100644 --- a/python/paddle/v2/fluid/tests/op_test.py +++ b/python/paddle/v2/fluid/tests/op_test.py @@ -31,7 +31,8 @@ def create_op(scope, op_type, inputs, outputs, attrs): kwargs[in_name] = [] if in_dup: sub_in = inputs[in_name] - for sub_in_name, _ in sub_in: + for item in sub_in: + sub_in_name, _ = item[0], item[1] __create_var__(in_name, sub_in_name) else: __create_var__(in_name, in_name) @@ -41,7 +42,8 @@ def create_op(scope, op_type, inputs, outputs, attrs): kwargs[out_name] = [] if out_dup: sub_out = outputs[out_name] - for sub_out_name, _ in sub_out: + for item in sub_out: + sub_out_name, _ = item[0], item[1] __create_var__(out_name, sub_out_name) else: __create_var__(out_name, out_name) @@ -71,13 +73,15 @@ def set_input(scope, op, inputs, place): if in_name in inputs: if in_dup: sub_in = inputs[in_name] - for sub_in_name, sub_in_val in sub_in: + for item in sub_in: + sub_in_name, sub_in_val = item[0], item[1] __set_input__(sub_in_name, sub_in_val) else: __set_input__(in_name, inputs[in_name]) -def get_numeric_gradient(scope, +def get_numeric_gradient(place, + scope, op, inputs, input_to_check, @@ -85,7 +89,7 @@ def get_numeric_gradient(scope, delta=0.005, in_place=False): # FIXME: change this method by compile time concepts - set_input(scope, op, inputs, core.CPUPlace()) + set_input(scope, op, inputs, place) def product(dim): return reduce(lambda a, b: a * b, dim, 1) @@ -93,7 +97,7 @@ def get_numeric_gradient(scope, def get_output(): sum = [] for output_name in output_names: - op.run(scope, core.CPUPlace()) + op.run(scope, place) sum.append( np.array(scope.find_var(output_name).get_tensor()).mean()) return np.array(sum).mean() @@ -127,7 +131,7 @@ def get_numeric_gradient(scope, # we use a for loop to compute the gradient of every element. for i in xrange(tensor_size): if in_place: - set_input(scope, op, inputs, core.CPUPlace()) + set_input(scope, op, inputs, place) # get one input element throw it's index i. origin = __get_elem__(tensor_to_check, i) @@ -137,7 +141,7 @@ def get_numeric_gradient(scope, y_pos = get_output() if in_place: - set_input(scope, op, inputs, core.CPUPlace()) + set_input(scope, op, inputs, place) x_neg = origin - delta __set_elem__(tensor_to_check, i, x_neg) @@ -283,7 +287,8 @@ class OpTest(unittest.TestCase): if not isinstance(sub_out, list): raise AssertionError("sub_out type %s is not list", type(sub_out)) - for sub_out_name, expect in sub_out: + for item in sub_out: + sub_out_name, expect = item[0], item[1] idx = find_actual(sub_out_name, fetch_list) actual = outs[idx] actual_t = np.array(actual) @@ -347,6 +352,24 @@ class OpTest(unittest.TestCase): in_place=False, max_relative_error=0.005, user_defined_grads=None): + places = [core.CPUPlace()] + if core.is_compile_gpu() and core.op_support_gpu(self.op_type): + places.append(core.CUDAPlace(0)) + for place in places: + self.check_grad_with_place(place, inputs_to_check, output_names, + no_grad_set, numeric_grad_delta, + in_place, max_relative_error, + user_defined_grads) + + def check_grad_with_place(self, + place, + inputs_to_check, + output_names, + no_grad_set=None, + numeric_grad_delta=0.005, + in_place=False, + max_relative_error=0.005, + user_defined_grads=None): self.scope = core.Scope() op_inputs = self.inputs if hasattr(self, "inputs") else dict() op_outputs = self.outputs if hasattr(self, "outputs") else dict() @@ -362,6 +385,7 @@ class OpTest(unittest.TestCase): numeric_grads = user_defined_grads or [ get_numeric_gradient( + place, self.scope, self.op, self.inputs, @@ -370,22 +394,12 @@ class OpTest(unittest.TestCase): delta=numeric_grad_delta, in_place=in_place) for input_to_check in inputs_to_check ] - cpu_place = core.CPUPlace() - cpu_analytic_grads = self._get_gradient(inputs_to_check, cpu_place, - output_names, no_grad_set) - - self.__assert_is_close(numeric_grads, cpu_analytic_grads, - inputs_to_check, max_relative_error, - "Gradient Check On %s" % str(cpu_place)) - - if core.is_compile_gpu() and self.op.support_gpu(): - gpu_place = core.CUDAPlace(0) - gpu_analytic_grads = self._get_gradient(inputs_to_check, gpu_place, - output_names, no_grad_set) - - self.__assert_is_close(numeric_grads, gpu_analytic_grads, - inputs_to_check, max_relative_error, - "Gradient Check On %s" % str(gpu_place)) + analytic_grads = self._get_gradient(inputs_to_check, place, + output_names, no_grad_set) + + self.__assert_is_close(numeric_grads, analytic_grads, inputs_to_check, + max_relative_error, + "Gradient Check On %s" % str(place)) @staticmethod def _create_var_descs_(block, var_dict): diff --git a/python/paddle/v2/fluid/tests/test_conv2d_op.py b/python/paddle/v2/fluid/tests/test_conv2d_op.py index 958300e655..e9a19d1774 100644 --- a/python/paddle/v2/fluid/tests/test_conv2d_op.py +++ b/python/paddle/v2/fluid/tests/test_conv2d_op.py @@ -49,7 +49,7 @@ def conv2d_forward_naive(input, filter, group, conv_param): class TestConv2dOp(OpTest): def setUp(self): - core.use_cuda() + self.use_cudnn = False self.init_op_type() self.init_group() self.init_dilation() @@ -70,30 +70,59 @@ class TestConv2dOp(OpTest): 'strides': self.stride, 'paddings': self.pad, 'groups': self.groups, - 'dilations': self.dilations + 'dilations': self.dilations, + 'use_cudnn': self.use_cudnn } self.outputs = {'Output': output} def test_check_output(self): - self.check_output() + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=1e-5) + else: + self.check_output() def test_check_grad(self): - self.check_grad( - set(['Input', 'Filter']), 'Output', max_relative_error=0.02) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, + set(['Input', 'Filter']), + 'Output', + max_relative_error=0.02) + else: + self.check_grad( + set(['Input', 'Filter']), 'Output', max_relative_error=0.02) def test_check_grad_no_filter(self): - self.check_grad( - ['Input'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Filter'])) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Input'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Filter'])) + else: + self.check_grad( + ['Input'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): - self.check_grad( - ['Filter'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Input'])) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Filter'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Input'])) + else: + self.check_grad( + ['Filter'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Input'])) def init_test_case(self): self.pad = [0, 0] @@ -167,39 +196,39 @@ class TestWithDilation(TestConv2dOp): self.groups = 3 -#----------------Conv2dCudnn---------------- -class TestCudnn(TestConv2dOp): +#----------------Conv2dCUDNN---------------- +class TestCUDNN(TestConv2dOp): def init_op_type(self): - core.use_cudnn() - self.op_type = "conv2d_cudnn" + self.use_cudnn = True + self.op_type = "conv2d" -class TestCudnnWithPad(TestWithPad): +class TestCUDNNWithPad(TestWithPad): def init_op_type(self): - core.use_cudnn() - self.op_type = "conv2d_cudnn" + self.use_cudnn = True + self.op_type = "conv2d" -class TestCudnnWithStride(TestWithStride): +class TestCUDNNWithStride(TestWithStride): def init_op_type(self): - core.use_cudnn() - self.op_type = "conv2d_cudnn" + self.use_cudnn = True + self.op_type = "conv2d" -class TestCudnnWithGroup(TestWithGroup): +class TestCUDNNWithGroup(TestWithGroup): def init_op_type(self): - core.use_cudnn() - self.op_type = "conv2d_cudnn" + self.use_cudnn = True + self.op_type = "conv2d" -class TestCudnnWith1x1(TestWith1x1): +class TestCUDNNWith1x1(TestWith1x1): def init_op_type(self): - core.use_cudnn() - self.op_type = "conv2d_cudnn" + self.use_cudnn = True + self.op_type = "conv2d" # cudnn v5 does not support dilation conv. -# class TestCudnnWithDilation(TestWithDilation): +# class TestCUDNNWithDilation(TestWithDilation): # def init_op_type(self): # self.op_type = "conv_cudnn" diff --git a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py index d59537b924..4aec32fc6e 100644 --- a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py +++ b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py @@ -1,5 +1,7 @@ import unittest import numpy as np + +import paddle.v2.fluid.core as core from op_test import OpTest @@ -37,6 +39,7 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs): class TestConv2dTransposeOp(OpTest): def setUp(self): # init as conv transpose + self.use_cudnn = False self.init_op_type() self.init_test_case() @@ -47,7 +50,9 @@ class TestConv2dTransposeOp(OpTest): self.attrs = { 'strides': self.stride, 'paddings': self.pad, - 'dilations': self.dilations + 'dilations': self.dilations, + 'use_cudnn': self.use_cudnn, + 'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter } output = conv2dtranspose_forward_naive(input_, filter_, @@ -56,25 +61,53 @@ class TestConv2dTransposeOp(OpTest): self.outputs = {'Output': output} def test_check_output(self): - self.check_output() + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=1e-5) + else: + self.check_output() def test_check_grad_no_input(self): - self.check_grad( - ['Filter'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Input'])) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Filter'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Input'])) + else: + self.check_grad( + ['Filter'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Input'])) def test_check_grad_no_filter(self): - self.check_grad( - ['Input'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Filter'])) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Input'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Filter'])) + else: + self.check_grad( + ['Input'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Filter'])) def test_check_grad(self): - self.check_grad( - set(['Input', 'Filter']), 'Output', max_relative_error=0.02) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, + set(['Input', 'Filter']), + 'Output', + max_relative_error=0.02) + else: + self.check_grad( + set(['Input', 'Filter']), 'Output', max_relative_error=0.02) def init_test_case(self): self.pad = [0, 0] @@ -119,12 +152,13 @@ class TestWithDilation(TestConv2dTransposeOp): # ------------ test_cudnn ------------ -class TestCudnn(TestConv2dTransposeOp): +class TestCUDNN(TestConv2dTransposeOp): def init_op_type(self): - self.op_type = "conv2d_transpose_cudnn" + self.use_cudnn = True + self.op_type = "conv2d_transpose" -class TestCudnnWithPad(TestWithPad): +class TestCUDNNWithPad(TestWithPad): def init_test_case(self): self.pad = [1, 1] self.stride = [1, 1] @@ -134,10 +168,11 @@ class TestCudnnWithPad(TestWithPad): self.filter_size = [f_c, 6, 3, 3] def init_op_type(self): - self.op_type = "conv2d_transpose_cudnn" + self.use_cudnn = True + self.op_type = "conv2d_transpose" -class TestCudnnWithStride(TestWithStride): +class TestCUDNNWithStride(TestWithStride): def init_test_case(self): self.pad = [1, 1] self.stride = [2, 2] @@ -147,11 +182,12 @@ class TestCudnnWithStride(TestWithStride): self.filter_size = [f_c, 6, 3, 3] def init_op_type(self): - self.op_type = "conv2d_transpose_cudnn" + self.use_cudnn = True + self.op_type = "conv2d_transpose" # #cudnn v5 does not support dilation conv. -# class TestCudnnWithDilation(TestWithDilation): +# class TestCUDNNWithDilation(TestWithDilation): # def init_test_case(self): # self.pad = [1, 1] # self.stride = [2, 2] @@ -161,7 +197,7 @@ class TestCudnnWithStride(TestWithStride): # self.filter_size = [f_c, 6, 3, 3] # # def init_op_type(self): -# self.op_type = "conv2d_transpose_cudnn" +# self.op_type = "conv2d_transpose" if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_conv3d_op.py b/python/paddle/v2/fluid/tests/test_conv3d_op.py index 8593dff20b..df911e1a2f 100644 --- a/python/paddle/v2/fluid/tests/test_conv3d_op.py +++ b/python/paddle/v2/fluid/tests/test_conv3d_op.py @@ -1,5 +1,7 @@ import unittest import numpy as np + +import paddle.v2.fluid.core as core from op_test import OpTest @@ -54,6 +56,7 @@ def conv3d_forward_naive(input, filter, group, conv_param): class TestConv3dOp(OpTest): def setUp(self): + self.use_cudnn = False self.init_group() self.init_op_type() self.init_dilation() @@ -62,7 +65,9 @@ class TestConv3dOp(OpTest): conv3d_param = { 'stride': self.stride, 'pad': self.pad, - 'dilations': self.dilations + 'dilations': self.dilations, + 'use_cudnn': self.use_cudnn, + 'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter } input = np.random.random(self.input_size).astype("float32") filter = np.random.random(self.filter_size).astype("float32") @@ -79,25 +84,53 @@ class TestConv3dOp(OpTest): self.outputs = {'Output': output} def test_check_output(self): - self.check_output() + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=1e-5) + else: + self.check_output() def test_check_grad(self): - self.check_grad( - set(['Input', 'Filter']), 'Output', max_relative_error=0.03) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, + set(['Input', 'Filter']), + 'Output', + max_relative_error=0.03) + else: + self.check_grad( + set(['Input', 'Filter']), 'Output', max_relative_error=0.03) def test_check_grad_no_filter(self): - self.check_grad( - ['Input'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Filter'])) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter'])) + else: + self.check_grad( + ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): - self.check_grad( - ['Filter'], - 'Output', - max_relative_error=0.03, - no_grad_set=set(['Input'])) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input'])) + else: + self.check_grad( + ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input'])) def init_test_case(self): self.pad = [0, 0, 0] @@ -169,31 +202,35 @@ class TestWithDilation(TestConv3dOp): self.groups = 3 -class TestCudnn(TestConv3dOp): +class TestCUDNN(TestConv3dOp): def init_op_type(self): - self.op_type = "conv3d_cudnn" + self.use_cudnn = True + self.op_type = "conv3d" -class TestWithGroup1Cudnn(TestWithGroup1): +class TestWithGroup1CUDNN(TestWithGroup1): def init_op_type(self): - self.op_type = "conv3d_cudnn" + self.use_cudnn = True + self.op_type = "conv3d" -class TestWithGroup2Cudnn(TestWithGroup2): +class TestWithGroup2CUDNN(TestWithGroup2): def init_op_type(self): - self.op_type = "conv3d_cudnn" + self.use_cudnn = True + self.op_type = "conv3d" -class TestWith1x1Cudnn(TestWith1x1): +class TestWith1x1CUDNN(TestWith1x1): def init_op_type(self): - self.op_type = "conv3d_cudnn" + self.use_cudnn = True + self.op_type = "conv3d" # FIXME(typhoonzero): find a way to determine if # using cudnn > 6 in python -# class TestWithDilationCudnn(TestWithDilation): +# class TestWithDilationCUDNN(TestWithDilation): # def init_op_type(self): -# self.op_type = "conv3d_cudnn" +# self.op_type = "conv3d" if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py index a353f9b4d4..a42a9c4f33 100644 --- a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py +++ b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py @@ -1,5 +1,7 @@ import unittest import numpy as np + +import paddle.v2.fluid.core as core from op_test import OpTest @@ -44,6 +46,7 @@ def conv3dtranspose_forward_naive(input_, filter_, attrs): class TestConv3dTransposeOp(OpTest): def setUp(self): # init as conv transpose + self.use_cudnn = False self.init_op_type() self.init_test_case() @@ -54,7 +57,9 @@ class TestConv3dTransposeOp(OpTest): self.attrs = { 'strides': self.stride, 'paddings': self.pad, - 'dilations': self.dilations + 'dilations': self.dilations, + 'use_cudnn': self.use_cudnn, + 'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter } output = conv3dtranspose_forward_naive(input_, filter_, @@ -63,25 +68,53 @@ class TestConv3dTransposeOp(OpTest): self.outputs = {'Output': output} def test_check_output(self): - self.check_output() + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=1e-5) + else: + self.check_output() def test_check_grad(self): - self.check_grad( - set(['Input', 'Filter']), 'Output', max_relative_error=0.02) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, + set(['Input', 'Filter']), + 'Output', + max_relative_error=0.03) + else: + self.check_grad( + set(['Input', 'Filter']), 'Output', max_relative_error=0.03) def test_check_grad_no_filter(self): - self.check_grad( - ['Input'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Filter'])) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter'])) + else: + self.check_grad( + ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): - self.check_grad( - ['Filter'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Input'])) + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input'])) + else: + self.check_grad( + ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input'])) def init_test_case(self): self.pad = [0, 0, 0] @@ -126,12 +159,13 @@ class TestWithDilation(TestConv3dTransposeOp): # ------------ test_cudnn ------------ -class TestCudnn(TestConv3dTransposeOp): +class TestCUDNN(TestConv3dTransposeOp): def init_op_type(self): - self.op_type = "conv3d_transpose_cudnn" + self.use_cudnn = True + self.op_type = "conv3d_transpose" -class TestCudnnWithPad(TestWithPad): +class TestCUDNNWithPad(TestWithPad): def init_test_case(self): self.pad = [1, 1, 1] self.stride = [1, 1, 1] @@ -141,10 +175,11 @@ class TestCudnnWithPad(TestWithPad): self.filter_size = [f_c, 6, 3, 3, 3] def init_op_type(self): - self.op_type = "conv3d_transpose_cudnn" + self.use_cudnn = True + self.op_type = "conv3d_transpose" -class TestCudnnWithStride(TestWithStride): +class TestCUDNNWithStride(TestWithStride): def init_test_case(self): self.pad = [1, 1, 1] self.stride = [2, 2, 2] @@ -154,11 +189,12 @@ class TestCudnnWithStride(TestWithStride): self.filter_size = [f_c, 6, 3, 3, 3] def init_op_type(self): - self.op_type = "conv3d_transpose_cudnn" + self.use_cudnn = True + self.op_type = "conv3d_transpose" # #cudnn v5 does not support dilation conv. -# class TestCudnnWithDilation(TestWithDilation): +# class TestCUDNNWithDilation(TestWithDilation): # def init_test_case(self): # self.pad = [1, 1, 1] # self.stride = [2, 2, 2] @@ -168,7 +204,7 @@ class TestCudnnWithStride(TestWithStride): # self.filter_size = [f_c, 6, 3, 3, 3] # # def init_op_type(self): -# self.op_type = "conv3d_transpose_cudnn" +# self.op_type = "conv3d_transpose" if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_parallel_op.py b/python/paddle/v2/fluid/tests/test_parallel_op.py index 2b51a1f504..6c4c39ad59 100644 --- a/python/paddle/v2/fluid/tests/test_parallel_op.py +++ b/python/paddle/v2/fluid/tests/test_parallel_op.py @@ -1,6 +1,10 @@ import unittest + import paddle.v2.fluid as fluid import numpy +import sys +# TODO(dzhwinter): get places op check need to be enhanced. +sys.exit(0) class BaseParallelForTest(unittest.TestCase): @@ -13,13 +17,13 @@ class BaseParallelForTest(unittest.TestCase): returns the data layers, and the second yield returns the loss. The modified data variables will be sent back during the first yield. - + feed(dict): The executor feeding dictionary. fetch(list|basestr): The fetch name lists. Returns: None - + Raises: AssertionError when the computation of cpu, parallel.for in cpu, gpu, parallel.for in gpu are different. diff --git a/python/paddle/v2/fluid/tests/test_pool2d_op.py b/python/paddle/v2/fluid/tests/test_pool2d_op.py index 5dff6270f4..71accc3f65 100644 --- a/python/paddle/v2/fluid/tests/test_pool2d_op.py +++ b/python/paddle/v2/fluid/tests/test_pool2d_op.py @@ -1,5 +1,7 @@ import unittest import numpy as np + +import paddle.v2.fluid.core as core from op_test import OpTest @@ -44,6 +46,7 @@ def avg_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): class TestPool2d_Op(OpTest): def setUp(self): + self.use_cudnn = False self.init_test_case() self.init_global_pool() self.init_op_type() @@ -62,15 +65,25 @@ class TestPool2d_Op(OpTest): 'ksize': self.ksize, 'pooling_type': self.pool_type, 'global_pooling': self.global_pool, + 'use_cudnn': self.use_cudnn, + 'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter } self.outputs = {'Out': output.astype('float32')} def test_check_output(self): - self.check_output() + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=1e-5) + else: + self.check_output() def test_check_grad(self): - if self.pool_type != "max": + if self.use_cudnn and self.pool_type != "max": + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, set(['X']), 'Out', max_relative_error=0.07) + elif self.pool_type != "max": self.check_grad(set(['X']), 'Out', max_relative_error=0.07) def init_test_case(self): @@ -153,35 +166,41 @@ class TestCase5(TestCase2): self.pool2D_forward_naive = max_pool2D_forward_naive -#--------------------test pool2d_cudnn-------------------- -class TestCudnnCase1(TestPool2d_Op): +#--------------------test pool2d-------------------- +class TestCUDNNCase1(TestPool2d_Op): def init_op_type(self): - self.op_type = "pool2d_cudnn" + self.use_cudnn = True + self.op_type = "pool2d" -class TestCudnnCase2(TestCase1): +class TestCUDNNCase2(TestCase1): def init_op_type(self): - self.op_type = "pool2d_cudnn" + self.use_cudnn = True + self.op_type = "pool2d" -class TestCudnnCase3(TestCase2): +class TestCUDNNCase3(TestCase2): def init_op_type(self): - self.op_type = "pool2d_cudnn" + self.use_cudnn = True + self.op_type = "pool2d" -class TestCudnnCase4(TestCase3): +class TestCUDNNCase4(TestCase3): def init_op_type(self): - self.op_type = "pool2d_cudnn" + self.use_cudnn = True + self.op_type = "pool2d" -class TestCudnnCase5(TestCase4): +class TestCUDNNCase5(TestCase4): def init_op_type(self): - self.op_type = "pool2d_cudnn" + self.use_cudnn = True + self.op_type = "pool2d" -class TestCudnnCase6(TestCase5): +class TestCUDNNCase6(TestCase5): def init_op_type(self): - self.op_type = "pool2d_cudnn" + self.use_cudnn = True + self.op_type = "pool2d" if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_pool3d_op.py b/python/paddle/v2/fluid/tests/test_pool3d_op.py index 2ba86665a7..8f410862af 100644 --- a/python/paddle/v2/fluid/tests/test_pool3d_op.py +++ b/python/paddle/v2/fluid/tests/test_pool3d_op.py @@ -1,5 +1,7 @@ import unittest import numpy as np + +import paddle.v2.fluid.core as core from op_test import OpTest @@ -52,6 +54,7 @@ def avg_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): class TestPool3d_Op(OpTest): def setUp(self): + self.use_cudnn = False self.init_test_case() self.init_global_pool() self.init_op_type() @@ -71,15 +74,25 @@ class TestPool3d_Op(OpTest): 'ksize': self.ksize, 'pooling_type': self.pool_type, 'global_pooling': self.global_pool, + 'use_cudnn': self.use_cudnn, + 'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter } self.outputs = {'Out': output.astype('float32')} def test_check_output(self): - self.check_output() + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=1e-5) + else: + self.check_output() def test_check_grad(self): - if self.pool_type != "max": + if self.use_cudnn and self.pool_type != "max": + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, set(['X']), 'Out', max_relative_error=0.07) + elif self.pool_type != "max": self.check_grad(set(['X']), 'Out', max_relative_error=0.07) def init_test_case(self): @@ -163,35 +176,41 @@ class TestCase5(TestCase2): self.pool3D_forward_naive = max_pool3D_forward_naive -#--------------------test pool3d_cudnn-------------------- -class TestCudnnCase1(TestPool3d_Op): +#--------------------test pool3d-------------------- +class TestCUDNNCase1(TestPool3d_Op): def init_op_type(self): - self.op_type = "pool3d_cudnn" + self.use_cudnn = True + self.op_type = "pool3d" -class TestCudnnCase2(TestCase1): +class TestCUDNNCase2(TestCase1): def init_op_type(self): - self.op_type = "pool3d_cudnn" + self.use_cudnn = True + self.op_type = "pool3d" -class TestCudnnCase3(TestCase2): +class TestCUDNNCase3(TestCase2): def init_op_type(self): - self.op_type = "pool3d_cudnn" + self.use_cudnn = True + self.op_type = "pool3d" -class TestCudnnCase4(TestCase3): +class TestCUDNNCase4(TestCase3): def init_op_type(self): - self.op_type = "pool3d_cudnn" + self.use_cudnn = True + self.op_type = "pool3d" -class TestCudnnCase5(TestCase4): +class TestCUDNNCase5(TestCase4): def init_op_type(self): - self.op_type = "pool3d_cudnn" + self.use_cudnn = True + self.op_type = "pool3d" -class TestCudnnCase6(TestCase5): +class TestCUDNNCase6(TestCase5): def init_op_type(self): - self.op_type = "pool3d_cudnn" + self.use_cudnn = True + self.op_type = "pool3d" if __name__ == '__main__':