From 8c2a834ef3791170b4b9e0d29ef763866e58ad4b Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 15 Jun 2018 18:46:49 +0800 Subject: [PATCH 01/46] add doc for inference_transpiler --- .../fluid/transpiler/inference_transpiler.py | 61 +++++++++++++------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py index 202aa76084..0629f2916b 100644 --- a/python/paddle/fluid/transpiler/inference_transpiler.py +++ b/python/paddle/fluid/transpiler/inference_transpiler.py @@ -19,16 +19,30 @@ from ..executor import global_scope class InferenceTranspiler: + ''' + Convert the fluid program to optimized inference program. + + There are several optimizations, only fuse batch normalization is supported now. + + Examples: + + .. code-block:: python + + # As InferenceTranspiler will modify the original program, + # please clone before use it. + inference_transpiler_program = program.clone() + t = fluid.InferenceTranspiler() + t.transpile(inference_transpiler_program, place) + ''' + def transpile(self, program, place, scope=None): ''' - Transpile the program. Support only fuse batch normalization now. - - :param program: program to transpile - :type program: Program - :param place: inference place - :type place: Place - :param scope: inference scope - :type scope: Scope or None + Run the transpiler. + + Args: + program (Program): program to transpile + place (Place): inference place + scope (Scope|None): inference Scope ''' if not isinstance(program, Program): raise TypeError("program should be as Program type") @@ -49,36 +63,43 @@ class InferenceTranspiler: can be integrated with them. Doing so will give us a forward acceleration, especially in environments like mobile or embedded. - For input X: - - Conv process: X = input * W + bias - - Batch norm process: X' = (X - mean) / std - - Scale Process: Y = a * X' + b + For input :math:`X`: + + - Conv process: :math:`X = input * W + bias` + - Batch norm process: :math:`X' = (X - mean) / std` + - Scale Process: :math:`Y = a * X' + b` After fuse into one operation: - Y = (input * W + bias - mean) / std * a + b - = input * a * W / std + ((bias - mean) / std * a + b) + .. math:: + + Y &= (input * W + bias - mean) / std * a + b \\\\ + &= input * a * W / std + ((bias - mean) / std * a + b) The operator transformation is: + - before: + - conv->batch_norm->any_other_op (bias == 0) - conv->elementwise_add->batch_norm->any_other_op (bias != 0) + - after: + - conv->elementwise_add->any_other_op The transpile stages are: + 1. insert elementwise_add op when bias == 0. 2. fuse the batch_norm's parameters to conv and elementwise_add operators. 3. remove batch_norm ops which are not used in any other ops. 4. adjust the input of any_other_op to be the output of elementwise_add operator. 5. remove unused variables. - :param program: program to transpile - :type program: Program - :param place: inference place - :type place: Place - :param scope: inference scope - :type scope: Scope + Args: + program (Program): program to transpile + place (Place): inference place + scope (Scope): inference Scope + ''' self.scope = scope self.place = place From a8c2ff316f21d3defd211386c2034a241debed96 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sat, 16 Jun 2018 12:58:36 +0800 Subject: [PATCH 02/46] refine the initial cpu memory flag for mkldnn --- paddle/fluid/platform/cpu_info.cc | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index 40dc7c9a0b..c708337f8f 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -28,9 +28,13 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1, "Default use 100% of CPU memory for PaddlePaddle," "reserve the rest for page tables, etc"); -DEFINE_uint64( - initial_cpu_memory_in_mb, 500, - "Default initial 500MB of CPU memory for PaddlePaddle, in MD unit."); +DEFINE_uint64(initial_cpu_memory_in_mb, +#ifdef PADDLE_WITH_MKLDNN + 1000, +#else + 500, +#endif + "Initial CPU memory for PaddlePaddle, in MD unit."); DEFINE_double( fraction_of_cuda_pinned_memory_to_use, 0.5, @@ -59,10 +63,7 @@ inline size_t CpuTotalPhysicalMemory() { size_t CpuMaxAllocSize() { // For distributed systems, it requires configuring and limiting // the fraction of memory to use. - return std::min( - static_cast(FLAGS_fraction_of_cpu_memory_to_use * - CpuTotalPhysicalMemory()), - static_cast(FLAGS_initial_cpu_memory_in_mb * 1 << 20)); + return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory(); } size_t CpuMinChunkSize() { @@ -71,8 +72,11 @@ size_t CpuMinChunkSize() { } size_t CpuMaxChunkSize() { - // Allow to allocate the maximum chunk size is roughly 3% of CPU memory. - return CpuMaxAllocSize() / 32; + // Allow to allocate the maximum chunk size is roughly 3% of CPU memory, + // or the initial_cpu_memory_in_mb. + return std::min( + static_cast(CpuMaxAllocSize() / 32), + static_cast(FLAGS_initial_cpu_memory_in_mb * 1 << 20)); } size_t CUDAPinnedMaxAllocSize() { From 9c128fe656e16c0be9167b97a9118dfe65649c96 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sat, 16 Jun 2018 16:22:15 +0800 Subject: [PATCH 03/46] concat support data as input --- paddle/fluid/operators/concat_op.h | 41 +++++++++++++++++---------- paddle/fluid/operators/math/concat.cc | 25 +++++++++------- paddle/fluid/operators/math/concat.h | 3 +- 3 files changed, 43 insertions(+), 26 deletions(-) diff --git a/paddle/fluid/operators/concat_op.h b/paddle/fluid/operators/concat_op.h index 1b1b8bf5ed..a496301526 100644 --- a/paddle/fluid/operators/concat_op.h +++ b/paddle/fluid/operators/concat_op.h @@ -60,34 +60,45 @@ template class ConcatGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { - auto* in = ctx.Input(framework::GradVarName("Out")); + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto ins = ctx.MultiInput("X"); + auto out_var_names = ctx.Outputs(framework::GradVarName("X")); auto outs = ctx.MultiOutput(framework::GradVarName("X")); int64_t axis = static_cast(ctx.Attr("axis")); + // get output tensor that the name is not kEmptyVarName + std::vector outputs; + for (size_t j = 0; j < outs.size(); ++j) { + if (out_var_names[j] != framework::kEmptyVarName) { + outs[j]->mutable_data(ctx.GetPlace()); + outputs.push_back(outs[j]); + } else { + outputs.push_back(nullptr); + } + } + // Sometimes direct copies will be faster, this maybe need deeply analysis. if (axis == 0 && outs.size() < 10) { size_t input_offset = 0; - auto in_stride = framework::stride_numel(in->dims()); + const auto in_stride = framework::stride_numel(out_grad->dims()); - for (auto& out : outs) { - out->mutable_data(ctx.GetPlace()); - auto out_stride = framework::stride_numel(out->dims()); - StridedNumelCopyWithAxis(ctx.device_context(), axis, out->data(), - out_stride, in->data() + input_offset, - in_stride, out_stride[axis]); + for (size_t i = 0; i < outs.size(); ++i) { + auto out_stride = framework::stride_numel(ins[i]->dims()); + auto* out = outputs[i]; + if (out != nullptr) { + StridedNumelCopyWithAxis( + ctx.device_context(), axis, out->data(), out_stride, + out_grad->data() + input_offset, in_stride, out_stride[axis]); + } input_offset += out_stride[axis]; } } else { - std::vector outputs(outs.size()); - for (size_t j = 0; j < outs.size(); ++j) { - outs[j]->mutable_data(ctx.GetPlace()); - outputs[j] = *outs[j]; - } - auto& dev_ctx = ctx.template device_context(); paddle::operators::math::ConcatGradFunctor concat_grad_functor; - concat_grad_functor(dev_ctx, *in, static_cast(axis), &outputs); + concat_grad_functor(dev_ctx, *out_grad, ins, static_cast(axis), + &outputs); } } }; diff --git a/paddle/fluid/operators/math/concat.cc b/paddle/fluid/operators/math/concat.cc index cc69212466..c10cff9c9b 100644 --- a/paddle/fluid/operators/math/concat.cc +++ b/paddle/fluid/operators/math/concat.cc @@ -70,35 +70,40 @@ template class ConcatGradFunctor { public: void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, const int axis, - std::vector* outputs) { + const framework::Tensor& input, + const std::vector& ref_inputs, + const int axis, std::vector* outputs) { // TODO(zcd): Add input data validity checking - int num = outputs->size(); + size_t num = outputs->size(); int input_rows = 1; - auto dim_0 = outputs->at(0).dims(); + auto dim_0 = ref_inputs[0]->dims(); for (int i = 0; i < axis; ++i) { input_rows *= dim_0[i]; } + int input_cols = 0; std::vector output_cols(outputs->size()); - for (int i = 0; i < num; ++i) { - int t_cols = outputs->at(i).numel() / input_rows; + for (size_t i = 0; i < num; ++i) { + int t_cols = ref_inputs[i]->numel() / input_rows; input_cols += t_cols; output_cols[i] = t_cols; } auto cpu_place = boost::get(context.GetPlace()); // computation - for (int k = 0; k < input_rows; ++k) { + for (size_t k = 0; k < input_rows; ++k) { const T* src_ptr = input.data() + k * input_cols; int col_idx = 0; for (int j = 0; j < num; ++j) { int col_len = output_cols[j]; - T* dst_ptr = outputs->at(j).data() + k * col_len; - memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx, - sizeof(T) * col_len); + auto* out_tensor = (*outputs)[j]; + if (out_tensor != nullptr) { + T* dst_ptr = out_tensor->data() + k * col_len; + memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx, + sizeof(T) * col_len); + } col_idx += col_len; } } diff --git a/paddle/fluid/operators/math/concat.h b/paddle/fluid/operators/math/concat.h index 041ce8bf8a..9e080f2e8b 100644 --- a/paddle/fluid/operators/math/concat.h +++ b/paddle/fluid/operators/math/concat.h @@ -57,7 +57,8 @@ template class ConcatGradFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& input, - const int axis, std::vector* outputs); + const std::vector& ref_inputs, + const int axis, std::vector* outputs); }; } // namespace math From a0c5fd83b26a2603e46011d9e6a1e6b1e850e323 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sat, 16 Jun 2018 13:11:55 +0800 Subject: [PATCH 04/46] enable setting initial memory from env --- paddle/testing/paddle_gtest_main.cc | 5 +++-- python/paddle/fluid/__init__.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index 7772dc97f5..555be3d00e 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -30,8 +30,9 @@ int main(int argc, char** argv) { new_argv.push_back( strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory")); #else - new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn")); - new_argv.push_back(strdup("--undefok=use_mkldnn")); + new_argv.push_back(strdup( + "--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_mb")); + new_argv.push_back(strdup("--undefok=use_mkldnn,initial_cpu_memory_in_mb")); #endif int new_argc = static_cast(new_argv.size()); char** new_argv_address = new_argv.data(); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index bd985ad733..5af5bc9c47 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -117,7 +117,7 @@ def __bootstrap__(): read_env_flags = [ 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', - 'eager_delete_scope', 'use_mkldnn' + 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb' ] if core.is_compiled_with_cuda(): read_env_flags += [ From ad1ad738d89bb6b347ee0c53ef0245acb86f158d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 17 Jun 2018 10:48:34 +0800 Subject: [PATCH 05/46] add gpu support for concat --- paddle/fluid/operators/math/concat.cc | 2 +- paddle/fluid/operators/math/concat.cu | 41 ++++++++++++++++----------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/operators/math/concat.cc b/paddle/fluid/operators/math/concat.cc index c10cff9c9b..14964fc62a 100644 --- a/paddle/fluid/operators/math/concat.cc +++ b/paddle/fluid/operators/math/concat.cc @@ -98,7 +98,7 @@ class ConcatGradFunctor { int col_idx = 0; for (int j = 0; j < num; ++j) { int col_len = output_cols[j]; - auto* out_tensor = (*outputs)[j]; + auto* out_tensor = outputs->at(j); if (out_tensor != nullptr) { T* dst_ptr = out_tensor->data() + k * col_len; memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx, diff --git a/paddle/fluid/operators/math/concat.cu b/paddle/fluid/operators/math/concat.cu index 4285d38dcd..f66baa6573 100644 --- a/paddle/fluid/operators/math/concat.cu +++ b/paddle/fluid/operators/math/concat.cu @@ -102,10 +102,12 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row, int local_col = tid_x - curr_offset; int segment_width = curr_col_offset - curr_offset; T* output_ptr = outputs_data[curr_segment]; - int tid_y = blockIdx.y * blockDim.y + threadIdx.y; - for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y) - output_ptr[tid_y * segment_width + local_col] = - input_data[tid_y * in_col + tid_x]; + if (output_ptr != nullptr) { + int tid_y = blockIdx.y * blockDim.y + threadIdx.y; + for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y) + output_ptr[tid_y * segment_width + local_col] = + input_data[tid_y * in_col + tid_x]; + } } } @@ -118,10 +120,12 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row, int split = tid_x / fixed_out_col; int in_offset = tid_x - split * fixed_out_col; T* output_ptr = outputs_data[split]; - int tid_y = blockIdx.y * blockDim.y + threadIdx.y; - for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y) - output_ptr[tid_y * fixed_out_col + in_offset] = - input_data[tid_y * in_col + tid_x]; + if (output_ptr != nullptr) { + int tid_y = blockIdx.y * blockDim.y + threadIdx.y; + for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y) + output_ptr[tid_y * fixed_out_col + in_offset] = + input_data[tid_y * in_col + tid_x]; + } } } @@ -203,17 +207,18 @@ template class ConcatGradFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, const int axis, - std::vector* outputs) { + const framework::Tensor& input, + const std::vector& ref_inputs, + const int axis, std::vector* outputs) { // TODO(zcd): Add input data validity checking int o_num = outputs->size(); int out_row = 1; - auto dim_0 = outputs->at(0).dims(); + auto dim_0 = ref_inputs[0]->dims(); for (int i = 0; i < axis; ++i) { out_row *= dim_0[i]; } - int out_col = outputs->at(0).numel() / out_row; + int out0_col = ref_inputs[0]->numel() / out_row; int in_col = 0, in_row = out_row; bool sameShape = true; @@ -223,13 +228,17 @@ class ConcatGradFunctor { outputs_cols[0] = 0; for (int i = 0; i < o_num; ++i) { - int t_col = outputs->at(i).numel() / out_row; + int t_col = outputs->at(i)->numel() / out_row; if (sameShape) { - if (t_col != out_col) sameShape = false; + if (t_col != out0_col) sameShape = false; } in_col += t_col; outputs_cols[i + 1] = in_col; - outputs_ptr[i] = outputs->at(i).data(); + if (outputs->at(i) != nullptr) { + outputs_ptr[i] = outputs->at(i)->data(); + } else { + outputs_ptr[i] = nullptr; + } } T** dev_out_gpu_data = @@ -255,7 +264,7 @@ class ConcatGradFunctor { if (sameShape) { KernelConcatGrad<<>>( - input.data(), in_row, in_col, out_col, dev_out_gpu_data); + input.data(), in_row, in_col, out0_col, dev_out_gpu_data); } else { const int* dev_outs_col_data = outputs_cols.CUDAData(context.GetPlace()); KernelConcatGrad<<>>( From d2b791a0cc9ac509433403d50cf4e93a1683ff7c Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 17 Jun 2018 19:19:41 +0800 Subject: [PATCH 06/46] add SGD and momentum optimizer doc --- python/paddle/fluid/optimizer.py | 66 +++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 54fe935627..214e0a7645 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -28,8 +28,8 @@ from contextlib import contextmanager __all__ = [ 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', - 'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer', - 'Adadelta', 'ModelAverage', 'Optimizer' + 'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'AdadeltaOptimizer', + 'RMSPropOptimizer', 'Adadelta', 'ModelAverage', 'Optimizer' ] @@ -192,15 +192,15 @@ class Optimizer(object): """Add optimization operators to update gradients to variables. Args: - loss: the target that this optimization is for. - parameters_and_grads: a list of (variable, gradient) pair to update. + loss(Variable): the target that this optimization is for. + parameters_and_grads(list(tuple(Variable, Variable))): + a list of (variable, gradient) pair to update. Returns: return_op_list: a list of operators that will complete one step of optimization. This will include parameter update ops, global step update ops and any other custom ops required by subclasses to manage their internal state. - :param startup_program: """ # This is a default implementation of create_optimization_pass that # can be shared by most optimizers. This implementation assumes that @@ -268,7 +268,22 @@ class Optimizer(object): class SGDOptimizer(Optimizer): - """ Simple SGD optimizer without any state. + """ + Optimizer of the stochastic gradient descent algorithm. + + .. math:: + + param\_out = param - learning\_rate * grad + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + + Examples: + .. code-block:: python + + sgd_optimizer = SGDOptimizer(learning_rate=0.2) + sgd_optimizer.minimize(cost) """ def __init__(self, learning_rate, **kwargs): @@ -294,7 +309,37 @@ class SGDOptimizer(Optimizer): class MomentumOptimizer(Optimizer): - """Simple Momentum optimizer with velocity state + """ + + Simple Momentum optimizer with velocity state + + This optimizer has a flag for Nestrov Momentum. + + The update equations are as follows: + + .. math:: + + & velocity = mu * velocity + gradient + + & if (use\_nesterov): + + & param = param - gradient * learning\_rate + mu * velocity * learning\_rate + + & else: + + & param = param - learning\_rate * velocity + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + momentum (float): momentum factor + use_nesterov (bool): enables Nesterov momentum + + Examples: + .. code-block:: python + + optimizer = MomentumOptimizer(learning_rate=0.2, momentum=0.1) + optimizer.minimize(cost) """ _velocity_acc_str = "velocity" @@ -614,6 +659,7 @@ class DecayedAdagradOptimizer(Optimizer): class AdadeltaOptimizer(Optimizer): """ **Adadelta Optimizer** + Simple Adadelta optimizer with average squared grad state and average squared update state. The details of adadelta please refer to this @@ -703,7 +749,7 @@ class RMSPropOptimizer(Optimizer): .. math:: - r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\ + r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\ w & = w - \\frac{\\eta} {\\sqrt{r(w,t) + \\epsilon}} \\nabla Q_{i}(w) @@ -844,7 +890,9 @@ class ModelAverage(Optimizer): max_average_window: The maximum size of average window. Examples: - ... + + .. code-block:: python + optimizer = fluid.optimizer.Momentum() _, params_grads = optimizer.minimize(cost) model_average = fluid.optimizer.ModelAverage(params_grads, 0.15, From 5e8646ab30f5899fc5cab2b57c6b66b718ad004b Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 17 Jun 2018 19:58:01 +0800 Subject: [PATCH 07/46] add doc for AdagradOptimizer --- python/paddle/fluid/optimizer.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 214e0a7645..f40c4cb927 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -282,7 +282,7 @@ class SGDOptimizer(Optimizer): Examples: .. code-block:: python - sgd_optimizer = SGDOptimizer(learning_rate=0.2) + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.2) sgd_optimizer.minimize(cost) """ @@ -338,7 +338,7 @@ class MomentumOptimizer(Optimizer): Examples: .. code-block:: python - optimizer = MomentumOptimizer(learning_rate=0.2, momentum=0.1) + optimizer = fluid.optimizer.Momentum(learning_rate=0.2, momentum=0.1) optimizer.minimize(cost) """ _velocity_acc_str = "velocity" @@ -383,7 +383,32 @@ class MomentumOptimizer(Optimizer): class AdagradOptimizer(Optimizer): - """Simple Adagrad optimizer with moment state + """ + **Adaptive Gradient Algorithm (Adagrad)** + + The update is done as follows: + + .. math:: + + moment\_out &= moment + grad * grad + + param\_out &= param - \\frac{learning\_rate * grad}{\sqrt{moment\_out} + \epsilon} + + The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) + does not have the epsilon attribute. It is added here in our implementation + as also proposed here: http://cs231n.github.io/neural-networks-3/#ada + for numerical stability to avoid the division by zero error. + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + epsilon (float): a small float value for numerical stability. + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Adagrad(learning_rate=0.2) + optimizer.minimize(cost) """ _moment_acc_str = "moment" From 156617d34b2e7207054769cdf4dd21015b2a187d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 17 Jun 2018 20:12:59 +0800 Subject: [PATCH 08/46] polish doc of RMSPropOptimizer --- python/paddle/fluid/optimizer.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index f40c4cb927..46828af1b3 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -774,26 +774,26 @@ class RMSPropOptimizer(Optimizer): .. math:: - r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\ + r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 w & = w - \\frac{\\eta} {\\sqrt{r(w,t) + \\epsilon}} \\nabla Q_{i}(w) The first equation calculates moving average of the squared gradient for - each weight. Then dividing the gradient by :math: `sqrt{v(w,t)}`. + each weight. Then dividing the gradient by :math:`sqrt{v(w,t)}`. In some cases, adding a momentum term :math: `\\beta` is beneficial. In our implementation, Nesterov momentum is used: .. math:: - r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\ + r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 v(w, t) & = \\beta v(w, t-1) + \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w) w & = w - v(w, t) - where, :math: `\\rho` is a hyperparameter and typical values are 0.9, 0.95 + where, :math:`\\rho` is a hyperparameter and typical values are 0.9, 0.95 and so on. :math: `beta` is the momentum term. :math: `\\epsilon` is a smoothing term to avoid division by zero, usually set somewhere in range from 1e-4 to 1e-8. @@ -801,10 +801,10 @@ class RMSPropOptimizer(Optimizer): Args: learning_rate(float): global leraning rate. - rho(float): rho is :math: `\\rho` in equation, set 0.95 by default. - epsilon(float): :math: `\\epsilon` in equation is smoothing term to + rho(float): rho is :math:`\\rho` in equation, set 0.95 by default. + epsilon(float): :math:`\\epsilon` in equation is smoothing term to avoid division by zero, set 1e-6 by default. - momentum(float): :math: `\\beta` in equation is the momentum term, + momentum(float): :math:`\\beta` in equation is the momentum term, set 0.0 by default. Raises: From 2053b6b756aea64c142d5c7daf81aa8644bd6b95 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 17 Jun 2018 21:20:23 +0800 Subject: [PATCH 09/46] add doc fo AdamOptimizer --- python/paddle/fluid/optimizer.py | 35 +++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 46828af1b3..c8758b2ea9 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -449,7 +449,40 @@ class AdagradOptimizer(Optimizer): class AdamOptimizer(Optimizer): - """Implements the Adam Optimizer + """ + This implements the Adam optimizer from Section 2 of the Adam + paper : https://arxiv.org/abs/1412.6980. + Adam is a first-order gradient-based optimization method based on + adaptive estimates of lower-order moments. + + Adam updates: + + .. math:: + + t & = t + 1 + + moment\_1\_out & = {\\beta}_1 * moment\_1 + (1 - {\\beta}_1) * grad + + moment\_2\_out & = {\\beta}_2 * moment\_2 + (1 - {\\beta}_2) * grad * grad + + learning\_rate & = learning\_rate * \\ + \\frac{\sqrt{1 - {\\beta}_2^t}}{1 - {\\beta}_1^t} + + param\_out & = param - learning\_rate * \\frac{moment\_1}{\sqrt{moment\_2} + \epsilon} + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + beta1 (float): The exponential decay rate for the 1st moment estimates. + beta2 (float): The exponential decay rate for the 2nd moment estimates. + epsilon (float): a small float value for numerical stability. + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Adam(learning_rate=0.2) + optimizer.minimize(cost) + """ _moment1_acc_str = "moment1" _moment2_acc_str = "moment2" From 1bee5129c9523278edbebdac725cb78d3dfd11f8 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 17 Jun 2018 21:43:48 +0800 Subject: [PATCH 10/46] add doc for AdamaxOptimizer --- python/paddle/fluid/optimizer.py | 37 +++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index c8758b2ea9..12cb206fac 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -587,7 +587,42 @@ class AdamOptimizer(Optimizer): class AdamaxOptimizer(Optimizer): - """Implements the Adamax Optimizer + """ + We implement the Adamax optimizer from Section 7 of the Adam + paper: https://arxiv.org/abs/1412.6980. Adamax is a variant of the + Adam algorithm based on the infinity norm. + + Adamax updates: + + .. math:: + + t & = t + 1 + + moment\_out & = {\\beta}_1 * moment + (1 - {\\beta}_1) * grad + + inf\_norm\_out & = max({\\beta}_2 * inf\_norm + \epsilon, |grad|) + + learning\_rate & = \\frac{learning\_rate}{1 - {\\beta}_1^t} + + param\_out & = param - learning\_rate * \\frac{moment\_out}{inf\_norm\_out} + + + The original paper does not have an epsilon attribute. + However, it is added here for numerical stability to prevent the + division by 0 error. + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + beta1 (float): The exponential decay rate for the 1st moment estimates. + beta2 (float): The exponential decay rate for the 2nd moment estimates. + epsilon (float): a small float value for numerical stability. + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Adamax(learning_rate=0.2) + optimizer.minimize(cost) """ _moment_acc_str = "moment" _inf_norm_acc_str = "inf_norm" From 69d568bd3c8c509e844c401f6c5bbc9a77869e41 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 17 Jun 2018 22:07:11 +0800 Subject: [PATCH 11/46] add doc for DecayedAdagradOptimizer --- python/paddle/fluid/optimizer.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 12cb206fac..8c402cf9d5 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -706,7 +706,34 @@ class AdamaxOptimizer(Optimizer): class DecayedAdagradOptimizer(Optimizer): - """Simple Decayed Adagrad optimizer with moment state + """ + **Decayed Adagrad Optimizer** + + The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) + + The update is done as follows: + + .. math:: + + moment\_out & = decay * moment + (1 - decay) * grad * grad + + param\_out & = param - \\frac{learning\_rate * grad}{\sqrt{moment\_out} + \epsilon} + + The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) + does not have an epsilon attribute. It is added here for numerical + stability to avoid the division by zero error. + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + decay (float): decay rate. + epsilon (float): a small float value for numerical stability. + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.DecayedAdagrad(learning_rate=0.2) + optimizer.minimize(cost) """ _moment_acc_str = "moment" From 86092a9704bfe992dd5c4c78ec5551633d8957c7 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 18 Jun 2018 08:14:33 +0800 Subject: [PATCH 12/46] add doc for XavierInitializer --- python/paddle/fluid/initializer.py | 61 ++++++++++++++++++------------ 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index c36ad324e7..6b8b0aab3e 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -21,7 +21,8 @@ from core import VarDesc __all__ = [ 'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'force_init_on_cpu', 'init_on_cpu', 'ConstantInitializer', 'UniformInitializer', - 'NormalInitializer', 'XavierInitializer', 'BilinearInitializer' + 'NormalInitializer', 'XavierInitializer', 'BilinearInitializer', + 'MSRAInitializer' ] _force_init_on_cpu_ = False @@ -246,39 +247,49 @@ class NormalInitializer(Initializer): class XavierInitializer(Initializer): - """Implements the Xavier initializer - + """ This class implements the Xavier weight initializer from the paper - Understanding the difficulty of training deep feedforward neural - networks[1] by Xavier Glorot and Yoshua Bengio. + `Understanding the difficulty of training deep feedforward neural + networks `_ + by Xavier Glorot and Yoshua Bengio. This initializer is designed to keep the scale of the gradients approximately same in all the layers. In case of Uniform distribution, - the range is [-x, x], where x = sqrt(6 / (fan_in + fan_out)). + the range is [-x, x], where + + .. math:: + + x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}} + In case of Normal distribution, the mean is 0 and the standard deviation - is sqrt(2/ (fan_in + fan_out)). + is - References: - [1] Understanding the difficulty of training deep feedforward neural - networks. International conference on artificial intelligence and - statistics. - (http://proceedings.mlr.press/v9/glorot10a.html) - """ + .. math:: - def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0): - """Constructor for XavierInitializer + \sqrt{\\frac{2.0}{fan\_in + fan\_out}} - Args: - uniform: whether to use uniform or normal distribution - fan_in: fan_in for Xavier initialization. If None, it is - inferred from the variable. - fan_out: fan_out for Xavier initialization. If None, it is - inferred from the variable. - seed: random seed - Note: It is recommended to set fan_in and fan_out to None for - most cases. - """ + Args: + uniform (bool): whether to use uniform or normal distribution + fan_in (float): fan_in for Xavier initialization. If None, it is + inferred from the variable. + fan_out (float): fan_out for Xavier initialization. If None, it is + inferred from the variable. + seed (int): random seed + + Note: + It is recommended to set fan_in and fan_out to None for most cases. + + Examples: + .. code-block:: python + + fc = fluid.layers.fc( + input=queries, size=10, + param_attr=fluid.initializer.Xavier(uniform=False)) + + """ + + def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0): assert uniform is not None assert seed is not None super(XavierInitializer, self).__init__() From 323a048348bca6aefc749a1dcfbf241531291430 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 18 Jun 2018 09:07:18 +0800 Subject: [PATCH 13/46] add doc for BilinearInitializer MSRAInitializer --- python/paddle/fluid/initializer.py | 105 ++++++++++++++++------------- 1 file changed, 60 insertions(+), 45 deletions(-) diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 6b8b0aab3e..df42449dcd 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -19,10 +19,10 @@ from framework import convert_np_dtype_to_dtype_ from core import VarDesc __all__ = [ - 'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'force_init_on_cpu', - 'init_on_cpu', 'ConstantInitializer', 'UniformInitializer', - 'NormalInitializer', 'XavierInitializer', 'BilinearInitializer', - 'MSRAInitializer' + 'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'MSRA', + 'force_init_on_cpu', 'init_on_cpu', 'ConstantInitializer', + 'UniformInitializer', 'NormalInitializer', 'XavierInitializer', + 'BilinearInitializer', 'MSRAInitializer' ] _force_init_on_cpu_ = False @@ -353,30 +353,42 @@ class MSRAInitializer(Initializer): """Implements the MSRA initializer a.k.a. Kaiming Initializer This class implements the weight initialization from the paper - Delving Deep into Rectifiers: Surpassing Human-Level Performance on - ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren - and Jian Sun. This is a robust initialization method that particularly - considers the rectifier nonlinearities. In case of Uniform distribution, - the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal - distribution, the mean is 0 and the standard deviation - is sqrt(2/ fan_in). - - References: - [1] Delving Deep into Rectifiers: Surpassing Human-Level Performance - on ImageNet Classification - (https://arxiv.org/abs/1502.01852) + `Delving Deep into Rectifiers: Surpassing Human-Level Performance on + ImageNet Classification `_ + by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a + robust initialization method that particularly considers the rectifier + nonlinearities. In case of Uniform distribution, the range is [-x, x], where + + .. math:: + + x = \sqrt{\\frac{6.0}{fan\_in}} + + In case of Normal distribution, the mean is 0 and the standard deviation + is + + .. math:: + + \sqrt{\\frac{2.0}{fan\_in}} + + Args: + uniform (bool): whether to use uniform or normal distribution + fan_in (float): fan_in for MSRAInitializer. If None, it is\ + inferred from the variable. + seed (int): random seed + + Note: + It is recommended to set fan_in to None for most cases. + + Examples: + .. code-block:: python + + fc = fluid.layers.fc( + input=queries, size=10, + param_attr=fluid.initializer.MSRA(uniform=False)) """ def __init__(self, uniform=True, fan_in=None, seed=0): """Constructor for MSRAInitializer - - Args: - uniform: whether to use uniform or normal distribution - fan_in: fan_in for MSRAInitializer. If None, it is - inferred from the variable. - seed: random seed - - Note: It is recommended to set fan_in to None for most cases. """ assert uniform is not None assert seed is not None @@ -436,34 +448,37 @@ class MSRAInitializer(Initializer): class BilinearInitializer(Initializer): - """Implements the bilinear initializer. - + """ This initializer can be used in transposed convolution operator to act as upsampling. Users can upsample a feature map with shape of (B, C, H, W) by any integer factor. The usage is: - - >>> factor = 2 - >>> w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.), - >>> initializer=Bilinear()) - >>> conv_up = fluid.layers.conv2d_transpose( - >>> input, - >>> num_filters=C, - >>> output_size=None, - >>> filter_size=2 * factor - factor % 2, - >>> padding=ceil((factor - 1) / 2.), - >>> stride=factor, - >>> groups=C, - >>> param_attr=w_attr, - >>> bias_attr=False) - - - Where, `num_filters=C` and `groups=C` means this is channel-wise tranposed + + Examples: + + .. code-block:: python + + factor = 2 + w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.), + initializer=Bilinear()) + conv_up = fluid.layers.conv2d_transpose( + input, + num_filters=C, + output_size=None, + filter_size=2 * factor - factor % 2, + padding=ceil((factor - 1) / 2.), + stride=factor, + groups=C, + param_attr=w_attr, + bias_attr=False) + + Where, `num_filters=C` and `groups=C` means this is channel-wise transposed convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`, This initializer will set a (K, K) interpolation kernel for every channel of the filter identically. The resulting shape of the output feature map will be (B, C, factor * H, factor * W). Note that the learning rate and the weight decay are set to 0 in order to keep coefficient values of bilinear - interpolation unchanged during training. + interpolation unchanged during training. + """ def __init__(self): @@ -480,7 +495,7 @@ class BilinearInitializer(Initializer): be added. Returns: - the initialization op + Operator: the initialization op Raises: ValueError: If type of `var` and `block` is not right. From 4907ed3e11c27cbc0be78515f13d6762f173565a Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 18 Jun 2018 09:40:43 +0800 Subject: [PATCH 14/46] add doc for Constant Uniform and Normal initializer --- python/paddle/fluid/initializer.py | 52 ++++++++++++++++++------------ 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index df42449dcd..caa687a040 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -105,14 +105,18 @@ class Initializer(object): class ConstantInitializer(Initializer): """Implements the constant initializer + + Args: + value (float): constant value to initialize the variable + + Examples: + .. code-block:: python + + fc = fluid.layers.fc(input=x, size=10, + param_attr=fluid.initializer.Constant(value=2.0)) """ def __init__(self, value=0.0, force_cpu=False): - """Constructor for ConstantInitializer - - Args: - value: constant value to initialize the variable - """ assert value is not None super(ConstantInitializer, self).__init__() self._value = value @@ -147,16 +151,20 @@ class ConstantInitializer(Initializer): class UniformInitializer(Initializer): """Implements the random uniform distribution initializer + + Args: + low (float): lower boundary of the uniform distribution + high (float): upper boundary of the uniform distribution + seed (int): random seed + + Examples: + .. code-block:: python + + fc = fluid.layers.fc(input=x, size=10, + param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5)) """ def __init__(self, low=-1.0, high=1.0, seed=0): - """Constructor for UniformInitializer - - Args: - low: lower boundary of the uniform distribution - high: upper boundary of the uniform distribution - seed: random seed - """ assert low is not None assert high is not None assert high >= low @@ -197,17 +205,21 @@ class UniformInitializer(Initializer): class NormalInitializer(Initializer): - """Implements the random Normal(Gaussian) distribution initializer + """Implements the Random Normal(Gaussian) distribution initializer + + Args: + loc (float): mean of the normal distribution + scale (float): standard deviation of the normal distribution + seed (int): random seed + + Examples: + .. code-block:: python + + fc = fluid.layers.fc(input=x, size=10, + param_attr=fluid.initializer.Normal(loc=0.0, scale=2.0)) """ def __init__(self, loc=0.0, scale=1.0, seed=0): - """Constructor for NormalInitializer - - Args: - loc: mean of the normal distribution - scale: standard deviation of the normal distribution - seed: random seed - """ assert loc is not None assert scale is not None assert seed is not None From e3578ab14c59fa4d8188e3fe95ba220f2b17faa3 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 18 Jun 2018 09:49:02 +0800 Subject: [PATCH 15/46] add doc for init_on_cpu --- python/paddle/fluid/initializer.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index caa687a040..373e9c060d 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -29,17 +29,29 @@ _force_init_on_cpu_ = False def force_init_on_cpu(): + """ + The flag of whether force to init variables on CPU. + + Examples: + .. code-block:: python + + if force_init_on_cpu(): + pass + + """ return _force_init_on_cpu_ @contextlib.contextmanager def init_on_cpu(): """ - Switch program with `with` statement + Force the variable to be inited on CPU. Examples: - >>> with init_on_cpu(): - >>> step = layers.create_global_var() + .. code-block:: python + + with init_on_cpu(): + step = layers.create_global_var() """ global _force_init_on_cpu_ From 03f4beb8777bcd24b46a71b5229960c3d76a3f66 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 18 Jun 2018 11:42:24 +0800 Subject: [PATCH 16/46] add doc for ErrorClipByValue GradientClipByValue and GradientClipByGlobalNorm --- python/paddle/fluid/clip.py | 96 ++++++++++++++++++++++++++++++++----- 1 file changed, 85 insertions(+), 11 deletions(-) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 66c3fc6b66..adfad3b402 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -24,8 +24,6 @@ __all__ = [ 'GradientClipByValue', 'GradientClipByNorm', 'GradientClipByGlobalNorm', - 'append_gradient_clip_ops', - 'error_clip_callback', ] @@ -38,6 +36,25 @@ class BaseErrorClipAttr(object): class ErrorClipByValue(BaseErrorClipAttr): + """ + Clips tensor values to the range [min, max]. + + Given a tensor t, this operation clips its value to min and max inplace. + + - Any values less than min are set to min. + - Any values greater than max are set to max. + + Args: + max (float): The maximum value to clip by. + min (float, optional): The minimum value to clip by. if not set by user, \ + will be set to -max by framework. + + Examples: + .. code-block:: python + + var = fluid.framework.Variable(..., error_clip=ErrorClipByValue(max=5.0), ...) + """ + def __init__(self, max, min=None): max = float(max) if min is None: @@ -99,6 +116,31 @@ class NullGradientClipAttr(BaseGradientClipAttr): class GradientClipByValue(BaseGradientClipAttr): + """ + Clips gradient values to the range [min, max]. + + Given a tensor t, this operation clips its value to min and max inplace. + + - Any values less than min are set to min. + - Any values greater than max are set to max. + + Args: + max (float): The maximum value to clip by. + min (float, optional): The minimum value to clip by. if not set by user, \ + will be set to -max by framework. + + Examples: + .. code-block:: python + + w_param_attrs = ParamAttr(name=None, + initializer=UniformInitializer(low=-1.0, high=1.0, seed=0), + learning_rate=1.0, + regularizer=L1Decay(1.0), + trainable=True, + clip=GradientClipByValue(-1.0, 1.0)) + y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs) + """ + def __init__(self, max, min=None): max = float(max) if min is None: @@ -120,6 +162,37 @@ class GradientClipByValue(BaseGradientClipAttr): class GradientClipByNorm(BaseGradientClipAttr): + """ + Clips tensor values to a maximum L2-norm. + + This operator limits the L2 norm of the input :math:`X` within :math:`max\_norm`. + If the L2 norm of :math:`X` is less than or equal to :math:`max\_norm`, :math:`Out` + will be the same as :math:`X`. If the L2 norm of :math:`X` is greater than + :math:`max\_norm`, :math:`X` will be linearly scaled to make the L2 norm of + :math:`Out` equal to :math:`max\_norm`, as shown in the following formula: + + .. math:: + + Out = \\frac{max\_norm * X}{norm(X)}, + + where :math:`norm(X)` represents the L2 norm of :math:`X`. + + Args: + clip_norm (float): The maximum norm value + + Examples: + .. code-block:: python + + w_param_attrs = ParamAttr(name=None, + initializer=UniformInitializer(low=-1.0, high=1.0, seed=0), + learning_rate=1.0, + regularizer=L1Decay(1.0), + trainable=True, + clip=GradientClipByNorm(clip_norm=2.0)) + y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs) + + """ + def __init__(self, clip_norm): self.clip_norm = clip_norm @@ -183,15 +256,16 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): def set_gradient_clip(clip, param_list=None, program=None): """ - To specify parameters that require gradient clip. - Args: - clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr, - which describes the type and detailed attributes of required gradient clip. - param_list(list, None by default): Parameters that require gradient clip. - It can be a list of parameter or a list of parameter's name. - When it's None, all parameters in the program will be included. - program(Program, None by default): The program where parameters are. - Will be the default main program when assigned with None. + To specify parameters that require gradient clip. + + Args: + clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr, + which describes the type and detailed attributes of required gradient clip. + param_list(list(Variable)): Parameters that require gradient clip. + It can be a list of parameter or a list of parameter's name. + When it's None, all parameters in the program will be included. + program(Program): The program where parameters are. + Will be the default main program when assigned with None. """ if not isinstance(clip, BaseGradientClipAttr): raise TypeError( From 5b6a48e77e69d8a4dbec5d75f3ef03cb25386759 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 18 Jun 2018 12:48:55 +0800 Subject: [PATCH 17/46] add doc for GradientClipByGlobalNorm --- python/paddle/fluid/clip.py | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index adfad3b402..18e2f3045e 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -208,6 +208,44 @@ class GradientClipByNorm(BaseGradientClipAttr): class GradientClipByGlobalNorm(BaseGradientClipAttr): + """ + Clips values of multiple tensors by the ratio of the sum of their norms. + + Given a list of tensors t_list, and a clipping ratio clip_norm, this + operation returns a list of clipped tensors list_clipped and the global + norm (global_norm) of all tensors in t_list. + + To perform the clipping, the values :math:`t\_list[i]` are set to: + + .. math:: + + t\_list[i] = t\_list[i] * \\frac{clip\_norm}{\max(global\_norm, clip\_norm)} + + where: + + .. math:: + + global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2} + + If :math:`clip\_norm > global\_norm` then the entries in t_list remain as they are, + otherwise they're all shrunk by the global ratio. + + Args: + clip_norm (float): The maximum norm value + group_name (str, optional): The group name for this clip. + + Examples: + .. code-block:: python + + p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip) + + with fluid.program_guard(main_program=prog_clip): + fluid.clip.set_gradient_clip( + fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0)) + p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip) + + """ + def __init__(self, clip_norm, group_name="default_group"): if not isinstance(group_name, basestring): raise TypeError("'group_name' must be a basestring.") From f69d2d9f75741ab5612d8f5d9df6a70f0b76f470 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 18 Jun 2018 13:12:13 +0800 Subject: [PATCH 18/46] add doc for L1DecayRegularizer and L2DecayRegularizer --- python/paddle/fluid/regularizer.py | 46 ++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index c4d6829599..dac474d5ee 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -16,8 +16,8 @@ import framework from . import core __all__ = [ - 'append_regularization_ops', 'WeightDecayRegularizer', 'L1Decay', 'L2Decay', - 'L1DecayRegularizer', 'L2DecayRegularizer' + 'append_regularization_ops', 'L1Decay', 'L2Decay', 'L1DecayRegularizer', + 'L2DecayRegularizer' ] @@ -36,7 +36,8 @@ def append_regularization_ops(parameters_and_grads, regularization=None): set. It will be applied with regularizer. Returns: - list of (parameters, gradients) pair with the regularized gradient + list[(Variable, Variable)]: list of (parameters, gradients) \ + pair with the regularized gradient Raises: Exception: Unknown regularization type @@ -100,6 +101,24 @@ class WeightDecayRegularizer(object): class L2DecayRegularizer(WeightDecayRegularizer): """Implements the L2 Weight Decay Regularization + + Small values of L2 can help prevent over fitting the training data. + + .. math:: + + L2WeightDecay = reg\_coeff * parameter + + Args: + regularization_coeff(float): regularization coeff + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Adagrad( + learning_rate=1e-4, + regularization=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.1)) + optimizer.minimize(avg_cost) """ def __init__(self, regularization_coeff=0.0): @@ -154,6 +173,27 @@ class L2DecayRegularizer(WeightDecayRegularizer): class L1DecayRegularizer(WeightDecayRegularizer): """Implements the L1 Weight Decay Regularization + + L1 regularization encourages sparsity. + + .. math:: + + L1WeightDecay = reg\_coeff * sign(parameter) + + Args: + regularization_coeff(float): regularization coeff + + Examples: + .. code-block:: python + + program = fluid.framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + regularizer=fluid.regularizer.L1DecayRegularizer(0.5)) """ def __init__(self, regularization_coeff=0.0): From 4363d2e4bd32330514651f9a49e8985d7cadb1de Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 18 Jun 2018 13:23:51 +0800 Subject: [PATCH 19/46] add doc for Inferencer --- python/paddle/fluid/inferencer.py | 46 +++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/inferencer.py b/python/paddle/fluid/inferencer.py index 6baac00905..a81e39695b 100644 --- a/python/paddle/fluid/inferencer.py +++ b/python/paddle/fluid/inferencer.py @@ -27,13 +27,30 @@ __all__ = ['Inferencer', ] class Inferencer(object): + """ + Inferencer High Level API. + + Args: + infer_func (Python func): Infer function that will return predict Variable + param_path (str): The path where the inference model is saved by fluid.io.save_params + place (Place): place to do the inference + parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU. + + Examples: + .. code-block:: python + + def inference_program(): + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + return y_predict + + place = fluid.CPUPlace() + inferencer = fluid.Inferencer( + infer_func=inference_program, param_path="/tmp/model", place=place) + + """ + def __init__(self, infer_func, param_path, place=None, parallel=False): - """ - :param infer_func: a function that will return predict Variable - :param param_path: the path where the inference model is saved by fluid.io.save_params - :param place: place to do the inference - :param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU. - """ self.param_path = param_path self.scope = core.Scope() self.parallel = parallel @@ -60,9 +77,20 @@ class Inferencer(object): def infer(self, inputs, return_numpy=True): """ - :param inputs: a map of {"input_name": input_var} that will be feed into the inference program - to get the predict value - :return: the predict value of the inference model + Do Inference for Inputs + + Args: + inputs (map): a map of {"input_name": input_var} that will be feed into the inference program + return_numpy (bool): transform return value into numpy or not + + Returns: + Tensor or Numpy: the predict value of the inference model for the inputs + + Examples: + .. code-block:: python + + tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") + results = inferencer.infer({'x': tensor_x}) """ if not isinstance(inputs, dict): raise ValueError( From 792d3b240605d50dfad53a95f1f3283dd3fa7871 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Mon, 11 Jun 2018 10:11:24 +0200 Subject: [PATCH 20/46] MKLDNN layout: Support for activation operator --- .../fluid/operators/activation_mkldnn_op.cc | 316 +++++++++++------- paddle/fluid/operators/activation_op.cc | 29 +- 2 files changed, 212 insertions(+), 133 deletions(-) diff --git a/paddle/fluid/operators/activation_mkldnn_op.cc b/paddle/fluid/operators/activation_mkldnn_op.cc index 46ed99bcf2..137bca5e2b 100644 --- a/paddle/fluid/operators/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/activation_mkldnn_op.cc @@ -12,16 +12,20 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "mkldnn.hpp" #include "paddle/fluid/operators/activation_op.h" -#include "paddle/fluid/operators/mkldnn_activation_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" namespace paddle { namespace operators { -using paddle::framework::Tensor; -using paddle::platform::MKLDNNDeviceContext; +using framework::DataLayout; +using framework::Tensor; +using mkldnn::memory; +using mkldnn::primitive; +using mkldnn::stream; +using platform::GetMKLDNNFormat; +using platform::MKLDNNDeviceContext; +using platform::to_void_cast; namespace { std::string gethash(const mkldnn::memory::dims &operand_dims, @@ -35,188 +39,260 @@ std::string gethash(const mkldnn::memory::dims &operand_dims, }; return dim2str(operand_dims) + std::to_string(algorithm); } +} // namespace + +template +class MKLDNNActivationKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *x = ctx.Input("X"); + PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN && + x->format() != memory::format::format_undef, + "Wrong layout/format set for Input x tensor"); + + Functor functor; + + auto attrs = functor.GetAttrs(); + for (auto &attr : attrs) { + *attr.second = ctx.Attr(attr.first); + } + functor(ctx); + } +}; -template -void eltwise_forward(const ExecContext &ctx, mkldnn::algorithm algorithm, - const T alpha = 0, const T beta = 0) { +template +class MKLDNNActivationGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *diff_y = ctx.Input(framework::GradVarName("Out")); + PADDLE_ENFORCE(diff_y->layout() == DataLayout::kMKLDNN && + diff_y->format() != memory::format::format_undef, + "Wrong layout/format set for Input OutGrad tensor"); + + Functor functor; + + auto attrs = functor.GetAttrs(); + for (auto &attr : attrs) { + *attr.second = ctx.Attr(attr.first); + } + functor(ctx); + } +}; + +template +void eltwise_forward(const framework::ExecutionContext &ctx, + mkldnn::algorithm algorithm, const T alpha = 0, + const T beta = 0) { PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); - auto &dev_ctx = ctx.template device_context(); const auto &mkldnn_engine = dev_ctx.GetEngine(); - // get buffers - const auto *src = ctx.template Input("X"); - const auto *src_data = src->template data(); + const auto *x = ctx.Input("X"); + auto *y = ctx.Output("Out"); - auto *dst = ctx.template Output("Out"); - T *dst_data = dst->template mutable_data(ctx.GetPlace()); + const T *x_data = x->data(); + T *y_data = y->mutable_data(ctx.GetPlace()); - // get memory dim - PADDLE_ENFORCE(src->dims().size() == 2 || src->dims().size() == 4, + PADDLE_ENFORCE(x->dims().size() == 2 || x->dims().size() == 4, "Input dim must be with 2 or 4"); - std::vector src_tz = framework::vectorize2int(src->dims()); + + std::vector src_tz = framework::vectorize2int(x->dims()); + + auto src_format = + src_tz.size() == 2 ? mkldnn::memory::format::nc : x->format(); const std::string key = gethash(src_tz, algorithm); const std::string key_src_data = key + ctx.op().Output("Out") + "@eltwise_fwd_src_data"; - const std::string key_src_mem = key + "@eltwise_fwd_src_mem"; - const std::string key_dst_mem = key + "@eltwise_fwd_dst_mem"; - const std::string key_fwd = key + "@eltwise_fwd"; + const std::string key_src_layout = + key + ctx.op().Output("Out") + "@eltwise_fwd_src_layout"; + const std::string key_with_layout = key + std::to_string(src_format); + const std::string key_src_mem = key_with_layout + "@eltwise_fwd_src_mem"; + const std::string key_dst_mem = key_with_layout + "@eltwise_fwd_dst_mem"; + const std::string key_fwd = key_with_layout + "@eltwise_fwd"; + const std::string key_fwd_pd = key_with_layout + "@eltwise_fwd_pd"; + + // save input data and layout to be referred in backward path + auto p_src_data = std::make_shared(x_data); + dev_ctx.SetBlob(key_src_data, p_src_data); + auto p_src_layout = std::make_shared(src_format); + dev_ctx.SetBlob(key_src_layout, p_src_layout); auto p_fwd = std::static_pointer_cast( dev_ctx.GetBlob(key_fwd)); - // save input data to be referred in backward path - auto p_src_data = std::make_shared(src_data); - dev_ctx.SetBlob(key_src_data, p_src_data); + std::shared_ptr dst_memory; if (p_fwd == nullptr) { - // create memory description - auto data_md = src_tz.size() == 2 - ? platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nc) - : platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nchw); - - // create memory primitives - auto p_src_mem = std::make_shared(mkldnn::memory( - {data_md, mkldnn_engine}, platform::to_void_cast(src_data))); - dev_ctx.SetBlob(key_src_mem, p_src_mem); - - auto p_dst_mem = std::make_shared(mkldnn::memory( - {data_md, mkldnn_engine}, platform::to_void_cast(dst_data))); - dev_ctx.SetBlob(key_dst_mem, p_dst_mem); - - auto fwd_desc = mkldnn::eltwise_forward::desc( - mkldnn::prop_kind::forward_training, algorithm, data_md, alpha, beta); - auto p_fwd_pd = std::make_shared( - fwd_desc, mkldnn_engine); - const std::string key_fwd_pd = key + "eltwise_fwd_pd"; - dev_ctx.SetBlob(key_fwd_pd, p_fwd_pd); - p_fwd = std::make_shared( - *p_fwd_pd, *(p_src_mem.get()), *(p_dst_mem.get())); + // create mkldnn memory for input X + auto src_md = platform::MKLDNNMemDesc( + src_tz, platform::MKLDNNGetDataType(), src_format); + auto src_memory = std::shared_ptr( + new memory({src_md, mkldnn_engine}, to_void_cast(x_data))); + // save src_memory to be referred in backward path + dev_ctx.SetBlob(key_src_mem, src_memory); + + // create primitive descriptor for activation forward and save it + auto forward_desc = mkldnn::eltwise_forward::desc( + mkldnn::prop_kind::forward_training, algorithm, + src_memory->get_primitive_desc().desc(), alpha, beta); + auto forward_pd = std::make_shared( + forward_desc, mkldnn_engine); + + // save prim desc into global device context to be referred in backward path + dev_ctx.SetBlob(key_fwd_pd, forward_pd); + + // create mkldnn memory for output y + dst_memory = + std::make_shared(forward_pd->dst_primitive_desc(), y_data); + + dev_ctx.SetBlob(key_dst_mem, dst_memory); + + // create activation primitive + p_fwd = std::make_shared(*forward_pd, *src_memory, + *dst_memory); dev_ctx.SetBlob(key_fwd, p_fwd); } else { // primitives already exist - auto p_src_mem = + auto src_memory = std::static_pointer_cast(dev_ctx.GetBlob(key_src_mem)); - PADDLE_ENFORCE(p_src_mem != nullptr, - "Fail to find eltwise p_src_mem in device context."); - auto p_dst_mem = + PADDLE_ENFORCE(src_memory != nullptr, + "Fail to find eltwise src_memory in device context."); + dst_memory = std::static_pointer_cast(dev_ctx.GetBlob(key_dst_mem)); - PADDLE_ENFORCE(p_dst_mem != nullptr, - "Fail to find eltwise p_src_mem in device context."); + PADDLE_ENFORCE(dst_memory != nullptr, + "Fail to find eltwise dst_memory in device context."); - p_src_mem->set_data_handle(platform::to_void_reinterpret_cast(src_data)); - p_dst_mem->set_data_handle(dst_data); + src_memory->set_data_handle(platform::to_void_cast(x_data)); + dst_memory->set_data_handle(y_data); } // push primitive to stream and wait until it's executed - std::vector pipeline = {*(p_fwd.get())}; - mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); + std::vector pipeline; + pipeline.push_back(*p_fwd); + stream(stream::kind::eager).submit(pipeline).wait(); + + y->set_layout(DataLayout::kMKLDNN); + y->set_format(GetMKLDNNFormat(*dst_memory)); } -template -void eltwise_grad(const ExecContext &ctx, mkldnn::algorithm algorithm, - const T alpha = 0, const T beta = 0) { +template +void eltwise_grad(const framework::ExecutionContext &ctx, + mkldnn::algorithm algorithm, const T alpha = 0, + const T beta = 0) { auto &dev_ctx = ctx.template device_context(); const auto &mkldnn_engine = dev_ctx.GetEngine(); - // get buffers - const auto *out = ctx.template Input("Out"); - - auto *dout = ctx.template Input(framework::GradVarName("Out")); - const auto *diff_dst = dout->template data(); + const auto *diff_y = ctx.Input(framework::GradVarName("Out")); + auto *diff_x = ctx.Output(framework::GradVarName("X")); - auto *dx = - ctx.template Output(framework::GradVarName("X")); - const T *diff_src = dx->template mutable_data(ctx.GetPlace()); + const T *diff_y_data = diff_y->data(); + T *diff_x_data = diff_x->mutable_data(ctx.GetPlace()); - // get memory dim - std::vector src_tz = framework::vectorize2int(out->dims()); + std::vector diff_dst_tz = framework::vectorize2int(diff_y->dims()); - const std::string key = gethash(src_tz, algorithm); - const std::string key_diff_src_mem = key + "@eltwise_diff_src_mem"; - const std::string key_diff_dst_mem = key + "@eltwise_diff_dst_mem"; - const std::string key_grad = key + "@eltwise_grad"; + auto diff_y_format = + diff_dst_tz.size() == 2 ? mkldnn::memory::format::nc : diff_y->format(); + const std::string key = gethash(diff_dst_tz, algorithm); const std::string key_src_data = key + ctx.op().Input("Out") + "@eltwise_fwd_src_data"; + const std::string key_src_layout = + key + ctx.op().Input("Out") + "@eltwise_fwd_src_layout"; + const auto p_src_layout = + std::static_pointer_cast(dev_ctx.GetBlob(key_src_layout)); + const std::string key_src_mem = + key + std::to_string(*p_src_layout) + "@eltwise_fwd_src_mem"; + const std::string key_fwd_pd = + key + std::to_string(*p_src_layout) + "@eltwise_fwd_pd"; + const std::string key_with_layouts = + key + std::to_string(*p_src_layout) + "-" + std::to_string(diff_y_format); + const std::string key_diff_src_mem = + key_with_layouts + "@eltwise_diff_src_mem"; + const std::string key_diff_dst_mem = + key_with_layouts + "@eltwise_diff_dst_mem"; + const std::string key_grad = key_with_layouts + "@eltwise_grad"; + const auto p_src_data = std::static_pointer_cast(dev_ctx.GetBlob(key_src_data)); - const std::string key_src_mem = key + "@eltwise_fwd_src_mem"; - auto p_src_mem = + auto src_memory = std::static_pointer_cast(dev_ctx.GetBlob(key_src_mem)); - p_src_mem->set_data_handle(*p_src_data.get()); + PADDLE_ENFORCE(src_memory != nullptr, + "Fail to find src_memory in device context"); + src_memory->set_data_handle(*p_src_data.get()); + + std::shared_ptr diff_src_memory; - auto p_grad = std::static_pointer_cast( + auto p_grad = std::static_pointer_cast( dev_ctx.GetBlob(key_grad)); if (p_grad == nullptr) { - // create memory description - auto data_md = src_tz.size() == 2 - ? platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nc) - : platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nchw); - - // create memory primitives - std::shared_ptr p_diff_src_mem = - std::make_shared(mkldnn::memory( - {data_md, mkldnn_engine}, platform::to_void_cast(diff_src))); - dev_ctx.SetBlob(key_diff_src_mem, p_diff_src_mem); - std::shared_ptr p_diff_dst_mem = - std::make_shared(mkldnn::memory( - {data_md, mkldnn_engine}, platform::to_void_cast(diff_dst))); - dev_ctx.SetBlob(key_diff_dst_mem, p_diff_dst_mem); - - auto bwd_desc = mkldnn::eltwise_backward::desc(algorithm, data_md, data_md, - alpha, beta); - - const std::string key_fwd_pd = key + "eltwise_fwd_pd"; - auto *p_fwd_pd = static_cast( - dev_ctx.GetBlob(key_fwd_pd).get()); - - auto eltwise_bwd_prim_desc = mkldnn::eltwise_backward::primitive_desc( - bwd_desc, mkldnn_engine, *p_fwd_pd); - + // create mkldnn memory for input diff_y + auto diff_dst_md = platform::MKLDNNMemDesc( + diff_dst_tz, platform::MKLDNNGetDataType(), diff_y_format); + auto diff_dst_memory = std::shared_ptr( + new memory({diff_dst_md, mkldnn_engine}, to_void_cast(diff_y_data))); + dev_ctx.SetBlob(key_diff_dst_mem, diff_dst_memory); + + // retrieve eltwise primitive desc from device context + auto forward_pd = + std::static_pointer_cast( + dev_ctx.GetBlob(key_fwd_pd)); + PADDLE_ENFORCE(forward_pd != nullptr, + "Fail to find eltwise_fwd_pd in device context"); + + // ceate primitive descriptor for activation backward + auto backward_desc = mkldnn::eltwise_backward::desc( + algorithm, diff_dst_memory->get_primitive_desc().desc(), + src_memory->get_primitive_desc().desc(), alpha, beta); + auto backward_pd = mkldnn::eltwise_backward::primitive_desc( + backward_desc, mkldnn_engine, *forward_pd); + + // create mkldnn memory for output diff_src + diff_src_memory = std::make_shared( + backward_pd.diff_src_primitive_desc(), diff_x_data); + dev_ctx.SetBlob(key_diff_src_mem, diff_src_memory); + + // create activation backward primitive p_grad = std::make_shared( - eltwise_bwd_prim_desc, *static_cast(p_src_mem.get()), - *(static_cast(p_diff_dst_mem.get())), - *(static_cast(p_diff_src_mem.get()))); + backward_pd, *src_memory, *diff_dst_memory, *diff_src_memory); + dev_ctx.SetBlob(key_grad, p_grad); } else { // primitives already exist - auto p_diff_src_mem = std::static_pointer_cast( + diff_src_memory = std::static_pointer_cast( dev_ctx.GetBlob(key_diff_src_mem)); - auto p_diff_dst_mem = std::static_pointer_cast( + auto diff_dst_memory = std::static_pointer_cast( dev_ctx.GetBlob(key_diff_dst_mem)); - p_diff_src_mem->set_data_handle( - platform::to_void_reinterpret_cast(diff_src)); - p_diff_dst_mem->set_data_handle( - platform::to_void_reinterpret_cast(diff_dst)); + diff_src_memory->set_data_handle( + platform::to_void_reinterpret_cast(diff_x_data)); + diff_dst_memory->set_data_handle( + platform::to_void_reinterpret_cast(diff_y_data)); } // push primitive to stream and wait until it's executed - std::vector pipeline = {*(p_grad.get())}; - mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); + std::vector pipeline; + pipeline.push_back(*p_grad); + stream(stream::kind::eager).submit(pipeline).wait(); + + diff_x->set_layout(DataLayout::kMKLDNN); + diff_x->set_format(GetMKLDNNFormat(*diff_src_memory)); } -} // anonymous namespace template struct MKLDNNActivationFunc : public BaseActivationFunctor { - template - void operator()(const ExecContext &ctx) const { + void operator()(const framework::ExecutionContext &ctx) const { eltwise_forward(ctx, algorithm); } }; template struct MKLDNNActivationGradFunc : public BaseActivationFunctor { - template - void operator()(const ExecContext &ctx) const { + void operator()(const framework::ExecutionContext &ctx) const { eltwise_grad(ctx, algorithm); } }; diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index a06ca7952f..b6b498a616 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -19,18 +19,20 @@ limitations under the License. */ namespace paddle { namespace operators { -#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ - class OP_NAME##OpMaker \ - : public ::paddle::framework::OpProtoAndCheckerMaker { \ - public: \ - void Make() override { \ - AddInput("X", "Input of " #OP_NAME " operator"); \ - AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \ - AddAttr("use_mkldnn", \ - "(default false) Only used in mkldnn kernel") \ - .SetDefault(false); \ - AddComment(OP_COMMENT); \ - } \ +using paddle::framework::Tensor; + +#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ + class OP_NAME##OpMaker \ + : public ::paddle::framework::OpProtoAndCheckerMaker { \ + public: \ + void Make() override { \ + AddInput("X", "Input of " #OP_NAME " operator"); \ + AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \ + AddAttr("use_mkldnn", \ + "(bool, default false) Only used in mkldnn kernel") \ + .SetDefault(false); \ + AddComment(#OP_COMMENT); \ + } \ } #define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \ @@ -58,7 +60,6 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, const framework::OperatorWithKernel& oper, const std::string& name) { framework::LibraryType library{framework::LibraryType::kPlain}; - framework::DataLayout layout = framework::DataLayout::kAnyLayout; #ifdef PADDLE_WITH_MKLDNN auto it = oper.Attrs().find("use_mkldnn"); @@ -82,6 +83,7 @@ class ActivationOp : public framework::OperatorWithKernel { ctx->ShareLoD("X", /*->*/ "Out"); } + protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return GetKernelType(ctx, *this, "X"); @@ -96,6 +98,7 @@ class ActivationOpGrad : public framework::OperatorWithKernel { ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Out")); } + protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return GetKernelType(ctx, *this, "Out"); From ca341db2588f7a9687edb800faf7946a71a61efd Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 18 Jun 2018 23:12:11 +0800 Subject: [PATCH 21/46] add FtrlOptimizer and it's doc --- python/paddle/fluid/optimizer.py | 116 +++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 54fe935627..3e4f16e1c3 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -26,10 +26,10 @@ from clip import append_gradient_clip_ops, error_clip_callback from contextlib import contextmanager __all__ = [ - 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', + 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', 'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer', - 'Adadelta', 'ModelAverage', 'Optimizer' + 'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'Optimizer' ] @@ -628,7 +628,7 @@ class AdadeltaOptimizer(Optimizer): E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2 Args: - learning_rate(float): global leraning rate + learning_rate(float): global learning rate rho(float): rho in equation epsilon(float): epsilon in equation @@ -729,7 +729,7 @@ class RMSPropOptimizer(Optimizer): Args: - learning_rate(float): global leraning rate. + learning_rate(float): global learning rate. rho(float): rho is :math: `\\rho` in equation, set 0.95 by default. epsilon(float): :math: `\\epsilon` in equation is smoothing term to avoid division by zero, set 1e-6 by default. @@ -810,6 +810,113 @@ class RMSPropOptimizer(Optimizer): return rmsprop_op +class FtrlOptimizer(Optimizer): + """ + FTRL (Follow The Regularized Leader) Optimizer. + + The paper that proposed Follow The Regularized Leader (FTRL): + (https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf) + + .. math:: + + &new\_accum = squared\_accum + grad^2 + + &if (lr\_power == -0.5): + + &\quad linear\_accum += grad - \\frac{\\sqrt{new\_accum} - \\sqrt{squared\_accum}}{learning\_rate * param} + + &else: + + &\quad linear\_accum += grad - \\frac{new\_accum^{-lr\_power} - accum^{-lr\_power}}{learning\_rate * param} + + + &x = l1 * sign(linear\_accum) - linear\_accum + + &if (lr\_power == -0.5): + + &\quad y = \\frac{\\sqrt{new\_accum}}{learning\_rate} + (2 * l2) + + &\quad pre\_shrink = \\frac{x}{y} + + &\quad param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0) + + &else: + + &\quad y = \\frac{new\_accum^{-lr\_power}}{learning\_rate} + (2 * l2) + + &\quad pre\_shrink = \\frac{x}{y} + + &\quad param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0) + + &squared\_accum += grad^2 + + Args: + learning_rate (float|Variable): global learning rate. + l1 (float): + l2 (float): + lr_power (float): + + Raises: + ValueError: If learning_rate, rho, epsilon, momentum are None. + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Ftrl(0.0001) + _, params_grads = optimizer.minimize(cost) + """ + + _squared_acc_str = "squared" + _linear_acc_str = "linear" + + def __init__(self, learning_rate, l1=0.0, l2=0.0, lr_power=-0.5, **kwargs): + super(FtrlOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) + if learning_rate is None: + raise ValueError("learning_rate is not set.") + + self.type = "ftrl" + self._l1 = l1 + self._l2 = l2 + self._lr_power = lr_power + + def _create_accumulators(self, block, parameters): + if not isinstance(block, framework.Block): + raise TypeError("block is not instance of framework.Block.") + + for p in parameters: + self._add_accumulator(self._squared_acc_str, p) + self._add_accumulator(self._linear_acc_str, p) + + def _append_optimize_op(self, block, param_and_grad): + if not isinstance(block, framework.Block): + raise TypeError("block is not instance of framework.Block.") + + squared_acc = self._get_accumulator(self._squared_acc_str, + param_and_grad[0]) + linear_acc = self._get_accumulator(self._linear_acc_str, + param_and_grad[0]) + ftrl_op = block.append_op( + type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "SquaredAccumulator": squared_acc, + "LinearAccumulator": linear_acc, + "LearningRate": self._create_param_lr(param_and_grad), + }, + outputs={ + "ParamOut": param_and_grad[0], + "SquaredAccumOut": squared_acc, + "LinearAccumOut": linear_acc + }, + attrs={"l1": self._l1, + "l2": self._l1, + "lr_power": self._lr_power}) + + return ftrl_op + + # We short the class name, since users will use the optimizer with the package # name. The sample code: # @@ -826,6 +933,7 @@ Adamax = AdamaxOptimizer DecayedAdagrad = DecayedAdagradOptimizer Adadelta = AdadeltaOptimizer RMSProp = RMSPropOptimizer +Ftrl = FtrlOptimizer class ModelAverage(Optimizer): From 6caea459418480de8aeb5cdf4fc54e8e16abe6e4 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 18 Jun 2018 23:30:08 +0800 Subject: [PATCH 22/46] add TestFtrlOptimizer --- .../fluid/tests/unittests/test_optimizer.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index e775db1d10..7286c7c450 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -434,5 +434,71 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) +class TestFtrlOptimizer(unittest.TestCase): + class MockFtrl(optimizer.FtrlOptimizer): + def get_accumulators(self): + return self._accumulators + + def get_squared_str(self): + return self._squared_acc_str + + def get_linear_str(self): + return self._linear_acc_str + + def test_ftrl_optimizer(self): + init_program = framework.Program() + program = framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr={'learning_rate': 1.1}) + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + learning_rate = 0.01 + ftrl_optimizer = self.MockFtrl( + learning_rate=learning_rate, l1=0.0, l2=0.0, lr_power=-0.5) + params_grads = append_backward(mean_out) + self.assertEqual(len(params_grads), 1) + self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0) + opts = ftrl_optimizer.create_optimization_pass(params_grads, mul_out, + init_program) + self.assertEqual(len(opts), 3) + self.assertEqual([op.type for op in opts], + ["fill_constant", "elementwise_mul", "ftrl"]) + + # Check accumulators + accumulators = ftrl_optimizer.get_accumulators() + self.assertEqual(len(accumulators), 2) + self.assertTrue(ftrl_optimizer.get_squared_str() in accumulators) + self.assertTrue(ftrl_optimizer.get_linear_str() in accumulators) + squared_acc = accumulators[ftrl_optimizer.get_squared_str()] + linear_acc = accumulators[ftrl_optimizer.get_linear_str()] + self.assertEqual(len(squared_acc), 1) + self.assertEqual(len(linear_acc), 1) + self.assertTrue(mul_x.name in squared_acc) + self.assertTrue(mul_x.name in linear_acc) + + # Check init_program + init_ops = init_program.global_block().ops + self.assertEqual(len(init_ops), 3) + self.assertEqual(init_ops[0].type, "fill_constant") + self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) + + if __name__ == '__main__': unittest.main() From 4dda54aa5af6cc7f53d4ee5e5212293d9a67023b Mon Sep 17 00:00:00 2001 From: gongweibao Date: Mon, 18 Jun 2018 19:59:43 -0500 Subject: [PATCH 23/46] Fix unlikely (#11537) --- paddle/fluid/inference/analysis/argument.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 7d7131ed7a..f7f4e03968 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -21,6 +21,8 @@ * big. */ +#pragma once + #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h" @@ -43,7 +45,7 @@ struct Argument { #define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) #define ANALYSIS_ARGUMENT_CHECK_FIELD(field__) \ - if (!UNLIKELY(field__)) { \ + if (UNLIKELY(!(field__))) { \ LOG(ERROR) << "field " << #field__ << " should be set."; \ return false; \ } From d00a0436b1c562060b49aa2981be094d78bcbdf5 Mon Sep 17 00:00:00 2001 From: "Yang Yang(Tony)" Date: Mon, 18 Jun 2018 18:50:19 -0700 Subject: [PATCH 24/46] Remove tape (#11548) * Remove tape * remove tape in cmake * fix CI --- paddle/contrib/CMakeLists.txt | 1 - paddle/contrib/tape/CMakeLists.txt | 25 -- paddle/contrib/tape/README.md | 252 -------------------- paddle/contrib/tape/computation_graph.png | Bin 96637 -> 0 bytes paddle/contrib/tape/function.h | 131 ----------- paddle/contrib/tape/tape.cc | 265 ---------------------- paddle/contrib/tape/tape.h | 64 ------ paddle/contrib/tape/test_tape.cc | 61 ----- paddle/contrib/tape/variable.cc | 33 --- paddle/contrib/tape/variable.h | 85 ------- 10 files changed, 917 deletions(-) delete mode 100644 paddle/contrib/tape/CMakeLists.txt delete mode 100644 paddle/contrib/tape/README.md delete mode 100644 paddle/contrib/tape/computation_graph.png delete mode 100644 paddle/contrib/tape/function.h delete mode 100644 paddle/contrib/tape/tape.cc delete mode 100644 paddle/contrib/tape/tape.h delete mode 100644 paddle/contrib/tape/test_tape.cc delete mode 100644 paddle/contrib/tape/variable.cc delete mode 100644 paddle/contrib/tape/variable.h diff --git a/paddle/contrib/CMakeLists.txt b/paddle/contrib/CMakeLists.txt index 70e3a0583d..4b19256ef4 100644 --- a/paddle/contrib/CMakeLists.txt +++ b/paddle/contrib/CMakeLists.txt @@ -14,4 +14,3 @@ # add_subdirectory(inference) -add_subdirectory(tape) diff --git a/paddle/contrib/tape/CMakeLists.txt b/paddle/contrib/tape/CMakeLists.txt deleted file mode 100644 index 5450359d85..0000000000 --- a/paddle/contrib/tape/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -if(APPLE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move") -endif(APPLE) - -cc_library(tape_variable SRCS variable.cc DEPS ${FLUID_CORE_MODULES} device_context framework_proto proto_desc operator) -cc_library(tape SRCS tape.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB} tape_variable) - -cc_test(test_tape - SRCS test_tape.cc - DEPS tape tape_variable) diff --git a/paddle/contrib/tape/README.md b/paddle/contrib/tape/README.md deleted file mode 100644 index 16c22a45d5..0000000000 --- a/paddle/contrib/tape/README.md +++ /dev/null @@ -1,252 +0,0 @@ -# Dynamic Graph on Fluid - -PaddlePaddle Fluid is targeting the autodiff without tape, which, however, is very -challenging and we are still way from there. DyNet and PyTorch provide a good design -idea, the *tape*, that significantly eases the challenge. Also, DyNet provides -a C++ API that is as convenient as Python but with higher efficiency and could -conveniently integrate with industrial/production systems. This package, `tape`, -combines the good of - -1. tape from PyTorch and DyNet -2. C++ API and core from DyNet -3. rich set of operators from PaddlePaddle - -## Overview - -We can implement Dynet-like Tape(See this [survey](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/survey/dynamic_graph.md)) -by wrapping Paddle Fluid's `Operator` and `Variable`. - -The user API is straight forward since - -1. it is imperative. And it uses host language's control flow logic. -1. it avoids extra concepts such as `Scope` and `Executor`. - -All of these benefits come at the cost of just adding one line `reset_global_tape` -at every iteration. - -## Code Structure - -In short, the `Tape` contains a vector of `OpHandle`s. And an `OpHandle` contains its -`type`, the pointers to the `Variable`s, and necessary attributes. - -```c++ -class Variable { -public: - VriableHandle Grad(); // returns its gradient variable -private: - framework::VarDesc desc_; // compile time infershape, necessary for lazy execution - framework::Variable var_; // run time variable, holds data memory -}; - -using VariableHandle = shared_ptr; - -struct OpHandle { - string type_; - map> inputs_; - map> outputs_; - AttributeMap attrs_; -}; - -class Tape { -public: - void AddOp(OpHandle); // add op - void Forward(); // execute the tape_ - void Backward(); // execute the backward of the tape_ -private: - vector tape_; -}; -``` - -We uses `Function` to indicate layers. It takes care of parameter -initialization and `AddOp` to the Tape when it is called. - -```c++ -class Linear { - public: - Linear(int in_dim, int out_dim, const std::string &act) - : w_(new Variable("LinearWeight")), - b_(new Variable("LinearBias")), - act_(act) { - Tape init_tape; - - std::string initializer = "fill_constant"; - framework::AttributeMap attrs; - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{in_dim, out_dim}; - attrs["value"] = 1.0f; - init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs); - - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{out_dim}; - attrs["value"] = 1.0f; - init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs); - - init_tape.Forward(); - } - - VariableHandle operator()(VariableHandle input) { - VariableHandle pre_bias(new Variable("linear")); - get_global_tape().AddOp("mul", - {{"X", {input}}, {"Y", {w_}}}, - {{"Out", {pre_bias}}}, - {{"x_num_col_dims", 1}, {"y_num_col_dims", 1}}); - VariableHandle pre_act(new Variable("linear")); - get_global_tape().AddOp("elementwise_add", - {{"X", {pre_bias}}, {"Y", {b_}}}, - {{"Out", {pre_act}}}, - {{"axis", 1}}); - VariableHandle post_act(new Variable("linear")); - get_global_tape().AddOp(act_, - {{"X", {pre_act}}}, - {{"Out", {post_act}}}, - {}); - return post_act; - } - - std::vector Params() { return {w_, b_}; } - - private: - VariableHandle w_; - VariableHandle b_; - std::string act_; -}; -``` - -## User API - -```c++ -// Model function -paddle::tape::Linear linear1(3, 3, "relu"); // init weight and bias -paddle::tape::Linear linear2(3, 3, "relu"); // init weight and bias -paddle::tape::Mean mean; - -// Optimizer -paddle::tape::SGD sgd(0.001); - -// Data Feeder -paddle::tape::Fill data_feeder(...); -VariableHandle input(new paddle::tape::Variable("input")); -VariableHandle label(new paddle::tape::Variable("label")); - -for (int i = 0; i < 2; ++i) { - reset_global_tape(); - - data_feeder(input, label); - - auto loss = softmax(linear2(linear1(input)), label); // compile time InferShape & InferVarType - LOG(INFO) << loss.value(); // Run forward up to loss - - // Run backward, store gradient of w at w->Grad() - get_global_tape.Backward(loss); - - // Update w - sgd(linear1.Params()); - sgd(linear2.Params()); -} -``` - -
- -digraph G { - - subgraph cluster_0 { - node [shape=record,style=filled]; - style=filled; - color=lightgrey; - linear1 [label="{type: mul | {input | {X: before_mul1 | Y: weight1}} | {output | Out: before_bias1}}"]; - elementwise_add1 [label="{type: elementwise_add | {input | {X: before_bias1 | Y: bias1}} | {output | Out: before_act1}}"]; - relu1 [label="{type: relu | {input | {X: before_act1 }} | {output | Out: after_act1}}"]; - - linear1 -> elementwise_add1->relu1; - label = "forward tape"; - } - - linear1:before_mul1->before_mul1 - linear1:weight1->weight1 - linear1:before_bias1->before_bias1 - - elementwise_add1:bias1->bias1 - elementwise_add1:before_bias1->before_bias1 - elementwise_add1:before_act1->before_act1 - - relu1:before_act1->before_act1 - relu1:after_act1->after_act1 - - subgraph cluster_1 { - node [shape=record,style=filled]; - style=filled; - color=lightgrey; - linear1_grad [label="{type: mul_grad | {input | {X: before_mul1 | Y: weight1| Out_grad: before_bias1_grad}} | {output |{X_grad: before_mul1_grad | Y_grad: weight1_grad}}}"]; - - elementwise_add1_grad [label="{type: elementwise_add_grad | {input | Out_grad: before_act1_grad} | {output |{X_grad: before_bias1_grad | Y_grad: bias1_grad}}}"]; - - relu1_grad [label="{type: relu_grad | {input | Out_grad: after_act1_grad} | {ouput | {X_grad: before_act1_grad }}}"]; - - linear1_grad -> elementwise_add1_grad ->relu1_grad [dir=back]; - label = "backward tape"; - } - - relu1_grad:after_act1_grad->after_act1_grad - relu1_grad:before_act1_grad->before_act1_grad - - elementwise_add1_grad:before_act1_grad->before_act1_grad - elementwise_add1_grad:before_bias1_grad->before_bias1_grad - elementwise_add1_grad:bias1_grad->bias1_grad - - linear1_grad:before_mul1->before_mul1 - linear1_grad:weight1->weight1 - linear1_grad:before_bias1_grad->before_bias1_grad - linear1_grad:before_mul1_grad->before_mul1_grad - linear1_grad:weight1_grad->weight1_grad - - - subgraph cluster_2 { - node [shape=record]; - label = "Linear1"; - weight1 - bias1 - } - - weight1 -> weight1_grad [ label="Grad()", style="dashed" ]; - bias1 -> bias1_grad [ label="Grad()", style="dashed"]; - - - -} -
- -![Image](https://github.com/tonyyang-svail/Paddle/blob/cpp_tap/paddle/contrib/tape/computation_graph.png) - -## Code Reuse - -We want to stay close to Paddle Fluid as much as possible. - -### Reuse All Operators - -As all Ops are registered at `OpInfoMap`, the effort of adding a new `Function` -is about 10 lines of code, similar to expose an operator to Python. - -### Reuse Compile Time InferShape and InferVarType - -Note that all the symbolic information is stored at `tape::Varaible::desc_`, instead -of `ProgramDesc.block.vars`, we create a temporary `BlockDesc` to do `InferShape` and -`InferVarType` every time we `AddOp` to the tape. - -### Reuse Operator::Run - -We use smart pointer, instead of `Scope`, to manage memory. So we create a temporary -`Scope` for every `Operator::Run()`. - -## Possible Feature - -### Release Memory on Backward - -We can release memory aggressively. During backward, we can delete the OpHandle once -we have finished its backward. Since all the variable is managed by smart pointer, the -memory is automatically released when its `ref_count` goes to 0. - -### Kernel Fusion - -As a symbolic representation of the Tape is constructed first before the actual -execution, it would be possible to perform graph optimization. One use case is kernel -fusion. diff --git a/paddle/contrib/tape/computation_graph.png b/paddle/contrib/tape/computation_graph.png deleted file mode 100644 index 6cf5ead735d5d18b204b079771e53d44483cf016..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 96637 zcmcG01yq&Y_a-VJk^%xsOM`Svie5?SZlt@rL6J);-5}lF4I&^?UgFXv-5}j_uHW~Y z`L8vzX3eZM<62(0Z=L<_v(Mhoe)c|uD9A}*p%bGcAt7N&NkWv6kRFhO|7U3T!4VdG z9BT06fzxX#6*M%onI-u}@b#JVTMcJrJ5y)3_l_n=X0~=VCM-@yjwU9yPUd#b`w!Yh zkdP>mq#$A{?x{O-e%{(Ht)d5ReqSDRAy+*7^k{%=%G!8d=sYT1Y-^6#MCJyj)1hMD zm|*2JPSRuDU>PbzyqHXNyfKkd@&02}2m1#YQgV|p@g8@pL3WHve&_qkr?Xsqjx7Vq zq#=~_M#onY^&EAs+whu<8V}*Ssd%S9FZb~se0GmK)}1WHB_tjJk&%98#=G1@{NnU5 zivaw1HASg~_#TV(|KRJG{GXQkc~1-+$4}@g+S*=lO|!}}pufd0ed5KfUT5*rxfU!a zi`?ei$vr*da8)oaDd!Z5*gH)z6k;;6ZoecXBn8D^Rw&?v0xwhz{6I4KVE(`Rsyq%W z`XY1ed?6r|E-7xJ8y3BZb(w1FHy=?S=<-$s?R6ur>auDp}_lfb}tAx&I6iz5M9 zEw~9d+Yzf`SEE$j=M1a(^~%_}IfWm-nwl8>Y=A(62%!~v)oz$&PC@i17lN29({BMA z3Q;c4z;Sz}7@tpzvsuzNOOkeFIgU$_k^E*3VXXM84?kpXRQ$A(>La=!{5)*3B-QsF z!jbsfXG!fPxy+vKb$79zmrydDm6um@@!&Unfg z+Wc?FWTV3O66!xpD5Yy`4&er@6zMbeeSe%sc6=Xz-+F=;}{Wb8RhUY;TU zk*phA%JenKH-^cgn4PSvqYkR%)$4Bvn89p?6mlk$Yx9bQfOK!7ctvDo$>}cb)e7 zS~gXt(V=rDk=^*4frIg63|ce2bn%jLmqwK}5Uo=2Awnr_f!k!#zOJewn=y)dRQD=F zuGT0>UCy{=XqRKi@p4)=KH(p3%03zy$HD@LPn!r8Rc!$-@c|D6%(cY_bC(*2`@h^V zd7Gs$X-7Sz32gK}i-)?MVXf3oQTG*n`BNWO#71tp6v7!`eA!^q_C|$R#w!0+sJoDq z6*JsoMRJo&?h`%v#V^u$ajQe9Z-1dKDtI%whlI!>Q0TVBH%hwPvoyta(GBDd#6_PYW1i@@pi_p9|7BM6AV>J||q6 zLw}SPG)B2m$k0C-4A!)PIHguZCI4<<>bsSmQ*2eGL+8Gg8u=%sP&-?mQ3W-t`Yot4 z9B#TBmJ~hi$}eMqr4`NfG6v)@SC=s9=NG`Z#Mt-9#yGv5e4v=P)BWh)|)#_6}yZLyvTyczDT+ncQ3 zUnQG~4xz_u4l1AZ>ivna9pWrglGn7vLefSN`aNbw>CH%sObX#}CL0=jeEjgbu&x+* z%6AFLFc_>aHKBhMo0Ok@ZoFf&A9;toQ23cmab6;afItzl2nT#X|!s{hKZIi4x7OCNQ9X%!@ZQR9XFO?bmQ^JC=S0q+X4B zTU}XMtZgPB9bK*vY%A4is8WO3wT^8vpp`!w0-=V(`ttRjel2e}v7o*GWU?0gzQ$ zG5z7(0MA7y^eOq5u+Va&SM7g1m!0m|zQi66vVQIzHB_k%G3Sp~DQ%NWd4lol@IJdEB&gHBn9B&ON04P!kx0cUd4ewk!xo6k|qo08Ld)MiOz_&Af$5e2V+tq1FCY1=rh-YAZse(5XflH;umwHBQan$ z9arz+mO`KC>bnYHi8_XoJJ$o85(&L<1*+E4?jN|D?_8Z8*qymd6SM5OV#sS*Nw_Pw z*ik-DUagqOP=G>npygaF=iiRts?tE3Q4XryETo-F3aUi$Ed-$S_MzXSIA;HXIsGCfulG zxkwGN9%_lkH=E*ShYie@bWuuB$B-5K596jQP$+VY;tL!T8mbWwZ))aiDV7UtHo-+rJGSAZumJ<1wKeur(AO$zYuK9qDw%-g9rZZuK* zv#gpD)!6vzkxPwRE{guSLRb@w?1{FEMMn12{Fct{VK;U#a~xEj! z&frXPx6gbAS>_|Vi`7IK4{?MQyyWHWuZ@iY@oWxa7z}uzxck*=`IHI7yf5eL$Jd{m z`yg@ws@FBi;nsF`k&=Vm-;}B!93e6Tb`D5!jB;=`xmU5wfM6@jzV?~&|L|vZ9YsP2 zrISuhY^^Zgu5>g!Jg0Lj=o9W52Zvvwbd`Xx@;^iI*!+pD2)_9r#+-lsh+WGe>NXy@ zkgiw=X1P9;X3KZk_+ATu>Eg1cP#44_VwBR&T?Tn32DauV?bQ#Q( zIz1&8GA!tNN06$88PEH z$=*o4qA9hxuJJIAsj>JIt#EG{_(BMvcQ)dBF8@mfR?^A_=Plm}Z43ur2DW>5xU>Qa_6X7G1~U){6Zt@(EU;9}?7`GfQgR~_H30@A%S z;Ly4q_(Fi4{Wg{W*8;l~%@I|iS+_g~?^)cKGgWEgj#@fY(u@hr?^G=I`Z|WZXcL&JU62qrBVFWG!u{ZkL*GcNBZUvHz zL9=PTkU7^FbHS5OyCaiqMq!r3F)t{m>F|vrBPDY~6H6Ewl&utgfalV?t zgrxS(>{_ct->baiCLCDNNk5JVj3ho5Qq+rx7EoBVKh%qdNW@3-IM>U z)k$Ivd|yR?xyy7;q9}ZES3pRrJE_H2-H3w`HS*&NM)>^f&Zh8G-nKLoFT0_oQzFz^ zEm-!-P$=~ac7D9WD2AEK^o2E59L1H92#hbMvR11%i8W=#KR5%1>73+!n~reqpe+99(Xf*Voj*gm^aW1tmaANKz1c(U&Dsw~vNCIT!YPtF1@-(Q%_}WNs0GDd zGQKZUTlcdl~gbLoz=DHPJxhkUlPm;LHbP7KKW4)H$N2Hv{} zfd`E1_!%G7E%ebA*=dZq+nX=jiG)>t@)MB`TbZf||l*ArNE1@>S~O#B1VQ`7j)q`3h*}^f zUG2BGmjn$KqySewZB_2ls8fOw;83F53^(U4)2v=Qf0Lb^O&rl<&OPLfKrqX%NxV)J z@kHcl_=dhwnwevK)P*vFjjJ`990Y}AgjJ3r@3?9A3cT!|%)5%@ag>7Fe|tly%}7=<+TU(SexIZO3< z%EsZO?4a&5DkW6vbhYZKDcGt5ulAs+)6v!|>E~2D6EjOS$veAkk*;RNt&NT2qg-_o zrXZpKuyv&b0nA5z{+w;ynQ`G4S>*d6?_bioI-h%1PNRWwxgy`zj3klTHnX302>ni( z-6dqPcvqG_`O1?4x8g^L0kg#SXEQCXt>U8_=x>*O`ivW7caJWp@S-yNEci%01rOtS z6H1p38dyonEh;WvKd;2{LOix3yf}7{h@Yi=8T!MZmYA`Wv^J+VB8XvV!`d#=J*F^q zM)!2=c&~YXZ>pL;rZ3jZ0Nhl-8i!X(7r=!EklH9rGPh`JZ7_w;Ze9q>s7r-KyDr=X zL9fN|gk_C$3v8EEdy7zzJ1Zx_%XIsjfYSZ@lHY27D)NxZ3=$83Lb)N3WuamOujQXP zgmDYCt7;gMkkexu^pGDmx&7>h0#sseru5umtRm)fxVL=_ns@^pSl$0pU z#HU42RcWup8j;f_%pqj&L0qWB^xG+UaJTh57F?TGE75g(@+Y7`; zKT{!22^XLexZWLF%HpPpDtt!^#2%oH6SU%4w-^7s(5N8$u=|WIDi%`F4qHxclaSg@ zZpr*I^DV+lAr953H3{GLejb7n{N<%jYtG)>^cH3^lDY`O0Fo*u%o5dsu6&`=-k6dm z4;^?g_$;C-IT}wSB6{@V)b)vtWriFzWrD&CM zZ^`Aoyn@TSM)w{aua8&qxxFj};eScEO{GwD(cZ$3SovmT~sOXT3?(epYusIm6q$K3~rt@+>HxP@G3R3*6M^nJN)bl0dCr%U|{K#0H zdAd2TV42?bu3sys%`C}yW>tW!gXSa-ugWt)a=2}SnSnm~&z zQ~0s}IFbbm2ow21ulQM>)t?Kk;FgeA`B}8VlO8!@8C)LA7W?jYOMs+7iMHroT4YR+ z#3ssxNITRV#>(#6T65syn&3-$WX@PpQvD`o5f9C;4wZ5DyYlm8ptd5BHF_pxR>hoS z&nsag8;t(zk3b~Uy|^|inmX>trE@~x&!<10#qWBVf`@qUnOSNk%)G~!>-6eqNN_*& zMOni}Rv2s3Mi!3Wc~?mo3X;ENusP4NGq0V*(W`a;+4&jM#4*A1HMQyu`8O->{x7sw-0#mP(@Ys$?9f)hVRL^y1viJIAF^4+?Rqd2$_FvpAhLfM^U25 z#4~GF8TY*<(*1r(2b_Txr4~mA#_I>U9%7bX$FVVFo!1395!{r8ihJx(lQd5+) z+FH$RS!aohHP(P-GT!;o7vjj(X9{`0HczINw4y`g8XhDrZaL>Ree>0i&W?t|@m=K( z<~pZj$|tr@E^}oP$ZrfY6($>Qn4BRQl6YYNxZn+&_v}r|>XYX!98sKAUCp zJq(K=;cL78({EaRAvrR#DR&+M>^Ajr);MNAPS5WLNylh$(vK=7Dr@-3BZ>Q5LP2=)121wiv@zwsP+zR4b3cG`5K$2-h4EKCd)KbA`v zWSD%{B09YpLGF;~)Dx4Pq(O2cci<`E_{TpLUmFur_pTIt!%9Q85vQefaR9{k)s4?I>;kdX2B0 zBV}+x*KH1qZ??jq^FG|xhCCa*Ezqt8xt05@j36K z!3jOD(*9LL-5wI@E&eesfwc{V?lovNyOzStb*fDV4}&B3Ym>7; z{bzg1EW>9l$*j(LiVE$U3mEq1WP(kzDo1De0a8b5uuo}`#B%~nqSDi|ye8+szE`B5 z)5aIL?B-jx%bNEjL@#0bw|^1<#VQ}aD(|zU97e>px3e?b=5s}JM+Us~I3}#f)1Fk~ zt&~U={hR0bA*=7?W{!nT%?OLj%k>)_P54RUO+1!+A|B$9>$G~F&?e|mg=S@`45jM> zlXduIr7FyyA|LzxYj5_rB~-dQ8a_sjU2l+Zf?rfv*tnK#RR%uxefGV83PsO5aYX(^ zeXgmin|JJInb2{<+TH@j_?uEo6@UIWA&V_sbwE${@4wJ2q3eULV)b(PrZf5H^^CiV zjA4;0Yk2NGq*u8efs`GEOCGnUGj@Kfaq54kz=G>RTqSs4eaPmuldRF_pslslRz3PVHg5u?#wmUho=O}6{BDf3LXD?<2LWH4i?5y$M@ia-_^{5=`xGThu4`MO&^t= zU~h?nx|U|_+HdM+Y_(h59pvRd_pbuAzt<%K;rHVoNbzUjpQ{hW_?lgJo0dM2kb579 zMW6~stC!VUO;oWiIeVXXQTPJuO844pWb?b|&s5mxd`Qvq=jRJeB7mqyu>r$6YJTx& zm96!7WNW-w^rBC`Y0lO==rNMP5JGEuCnqQMbFMQ72kf{$J4O^<*N45D>Hwqx_5$62 zZXiQc6fZ>RWK6k!!MC|Jq8wT6;WUok_$B`*~6+n-R z=UVp$3aY_m14+EM3iI1;Ps)uh(Lt#|tmeLfL3b!Yqt4#-(J&uz#75hFq(qs=rHRV9 zD&Hq8dA;!R8+?>~+3^l8gm!g9u8&p3K{fE#yfwhY`J4AuWsAo}fK|zhT&(c5UksAi zwJ55p!U=#$J)_i;4UCpOJ{Q_e2bIJ1BNulevVcj0HXTv4mz`_2JP!Gnyo|rMh9_&yT=uN9FC=i8SYCiGQT7^?o%r)Gbc?VSQXE?}Ol!fI{`lnzopPNFE{? zU^5KIXO2D><=3#Z#FM+a37TDGUoc+E*OAETV)+Sw#cs_-`cpK>I;XlT&M~OVc3Fxl z3Q+ajva+8F)f`9}JAFIj@FtEQb@`JkzhE^zs&ZPIDAN|)Vx?zZUoZyF0kW7YEXBwjyx$X)8Y z5T)}LA`m0Eh+FmEkJ&A{ejPtxFGpyVsYg*o9LgV`@`FU{`hDdtclvHV&&!*vjU>;l zv^SPkR{PESZNcQ8KbGvh&zcuZbg>cRJWH-+TEWXycnv?8j*iY3PZ*w|vIlha7&V>r z?qP$rg40cqq9AXm0S_g^vpY$J58rV}Gexm(k<~53x7}0b zXk2^oHS5#q8Zf*nMV~^k8K3NIuqW!=tGD3a*pftVOPu7?EBZm-Ec}Sk?;N!?_?ymi z{B7*~P@!_gCqrwyj!DkabcB(4aU+sh&D7bj8J{$kPY$B*5!BQ7216pB*PhDEr6d^cPRYtiMl`NmiA7ng6}D18y!Fqrmv|#v+rR%yJ1ZTIlXZRq+$iRE(FdmCfC+ zAU_S*9}nV{voKa!o=D8mQT=w#7a3J0D{H^K1U3EOu1cS)(_V5f%wZQ)q{IkQW4QF@ znOET3{HNFNB$4ca8N5Qwq4qhL63L?7*^?{X>glWVox zJjvqf48;2~bX_)<8N)ZD1_|9(+2ip$3_%^WCq0m@JK40Dma#z+4 zV15nC`QLxMN?J@avtK+xcd2_yclq&UX158ExiP;oXOp*Hv1{Nx`9Q9w`!LzVl~*nO zi5RZuS8ZNzUFL=a{brm+G5gaq+IN^=f{h~yNri1zTIH%x$>adCETVBLi<;^; z$>8PhUmXXBcEWx1B?Q*IF_e?nZSf;3s6wAUkz664=eZ^nCd!#c${Try&2YdKdya$% z!a}7Xp;=FI-)quR4XwCp%6UGgq)L;USXfwq!(Bnl61m!`F6qAqYD!%A7=a@d@216X zB}JA_JmunOk~WT$$6kKu81?5q5igR`2_3~NTwbGBWx?H)%MSCxQy<6hln!TLilRvf zl^$E)rlwK~BhC~P93r8+dgMeSZ`<+c*|BHjr&Zi6BQ;z=XV&m3uhbAvr@e^KO{i{4 z^(^10p)YaGA6+%)G@!M2w6wHryVp;56{qt25P1>8+@3lNDWtedDVfGLRacMX8r99`76H~@P4r(e?iwV~v z(kf$^UO@Q0k9>K%l!WEa1pXS@4?`o*_T4b)@DJTL;@cn+#xY(@{ng`Z)fqQCzkYq` zT0&q~zz0}ay*QDf2kjxTN$34qkB?PI6(ir>FjSQ|eyT*x8%^p_IEsFa5n+VSNlcYx zVfA|{C9Uq0p@7Hip6rD)r_9}=ITtQ$bFTYQfrk`%b0?~p{98?tDdf2jk)<=h#^Eci zv=k8#l9MpxzEdN3*L%`M6nlR&T-GKnQ^Vb*%bt@ZZO^4x)yQZWP;wPsXM5Y_?YAJo z_&aAYDIwt9pWXD1oV6Du((BgQUMSP<0Wx$h=aOVsKVe!YA6-rsr?!Amh1 z8@6@@W)C{<{17OO&#|t11wq;Tp=9%%43o?fUX(i!DN-glOx#?`$#cpf*xCFJ{F&7+x^CgfXi>Wh+}~ zbUDg%ca$8_Yx6!lpTTIkz1(hXZPmZ{slK(fMaQaF;nU-}XYbvW$1*G`S^+{I%dqvF z*o3ccOSVa8=IYp^{cjz^GcnVo5w!K~kcbIl99zG<{new}!%HioVZk*jmz*81m(46m zJKJHRl_vFz*xuXK82nTtES1SypX#m8eHrlNZDp{ej#zE9N@A2c?H5kf^{%#mSY0hF zB`~kKEc<;cik)s?d|BfrirdS^K9s=+^T(eT#wGK%8J3xe>1%6iGfVd<8~aUUSGmEE zu^G{${HA_L(~|{^)9Xd&17=c4C-5W3p~| z;kbY^E28>DxL9dT_(`<*sT?2YZv0Qi*mUA&gFM=b0ahiRWnbfL28=rPJTv>g?2yoJQmd8WcYE;(mAU*r zl1dif?+lvI%Is&mo6r)&o0^(>Dg3UBLB9aVDQ`sYjzm3gAj3nT++k|kInjtQrNPgb zAU?8`u3<{YTYbMkhlhs;NBHS+vS$9*J%YSpxGmNFicCG=@Op0g_QJz%HcM= zRPbnkOY|^A{(4Kj{o5=7(yO;gC@^%S$g{b9-!rfxng>m1+I$3?rtcv!^)hQbM)r3A zJo1n+XQ@{07Qp4Ex*%^L4Fo_|dwgt6==OY>;=JP_$hevSv=4T934rWJaI$!OV&d%N zr1f&ESpWJr5q+Zs8K@c-|2fv$oo^K+#ZE9ONaE(?d<{WJ9pY;A5bnA^uS=T}n4XS6 zz-FRXQzN-Dpyt}_d*g|K$12U7kQX01JC3~RVv;TuxQ4BH?@sI6)0V>aX6pABJ3mld z{t7ldZ>k=Wr}OsDk{cA?W%7(Vfp)`?`&o7?l+j(#90J&m{_M{pmp`aKS0ILN0_2WAuWRDHy91oN zgKUbM4TL!j?9ZYiDGWX?efo+-a3CDv+esGAd)7tH8Uf_`D>H%>@;67}S_8R}W{r8& z=;n7~ZlsQZS2c!6Fm%&*va*9QAdvu+&*|~;-=UcP9ZCw}H$VFxQ@1a)8}(u%L>%ye zs$m)&p(MV~xEL^lmuNb5Vez#91XE&l4=HgO$k97pTZkOrG~ zBk}6{C*{M;(|s>DDelgDKa1Y-Z)<@D1z-oLzk0CWp#>gvJizf@eW4#k6XF8;5u6CK z2oSRcRqOZ;($p5q@gk7u;L41EqzoArdXE|H{u>frYxj%Qg#GqAUw9A@PZo&xHG=Hv z<>Eupr`W;mC#7~boAQ2!z{)#FKLROCOZ`jh>gwX-2^0Fen;s+izeIzD27s!|juncv z&1A(=Zi1dxyRR^!DQS%uh*m*~HW0yrap^6>jBy_+mI%nC@%W>(zlj)<|E529;%RLl z`r+aSoZH*8cE4kogeK8_BzCudlui)1e({R(V9?IDT7$U?Ort^MKWb;{Ev=s~n%LfN zSjPSz7z01 ze-$u=1#~%}C58eL?u$+w-wLoocM!J_Af62deb=AB1_+egiER_aWHNsOFP%l8Qewb< zKALgse0928f&nJK6k&ulfuXhLFsjeJPW>$P6`*WxQ1=?xSqj@L+AFZUZ+^Zgx1Mc? za(Rw~RE-BzX>JrO?Fu-AIk0jU03)^Fxy|zTr9}|(j*q|Rb1g7NutzV~Q>jC#5VK-R zfmlP9mSAGycEY)a8o(<;6jh!CA#A1^^Ph+QTz3{g5o-VoPy;a?{r^~Wfj980XY*eB zfQ5MQPUjv{D&9ZqF$8T_O<_q(TRTGBi1M)k;x(-U&AU4^YQ7~?sG*6V_Z0L<{yM71w`@2-kvGq#*%=)W@bRx2u7mtJzx6ky@4iN!PI{(D~o(w zY7nmh*j0Lw!$$}q-XRoWgb@f;W7l?ea})jZgF3MR@Bx_rSYLHN@C0gQIv#*B15F!9 z280L`ve{wZo&`$#EPSdVl7U!duX5ki7#fsJpkHZqG8yA$w1a{7BoXR zx@p4D&=3|0@AXN!A7UzBy#;q5osQ_>Ia0?6+nPIV8NRdYjI)t@^vV6ub&T(1Tw#4~px z4SZxIe$*@ilG@r@-~*;?0pkUN3}EGn{)#U4xO5VbblpHoSA^>XBO8J3nA`4EEJ0w?b~dHVRtAX?|>Hy_|Nf(eYlt7DWNC8iAbR}#`_{lq z0jURQ^)E*fd_kRKf1y1CF~H7qt;zs%6sNvWf)|Jq2f!$)|2eLxt{x0diG%0>eD5_P z3W3+~7Y0LAloJeaAjnw1qm+de$Crq)*@06)1)7S4ZJ!Jd4*tg~03m7k0EWJn?sqfc z=d)AUdqBa9q|)dKM^Rw}G1MznF28qq3RoQoSTEtVD%l?%;@tlA*C?-47d`ayM`fL# z0PP^I9TGYkNU?3&jFKP+8u$&`41A6T`3@5w`fsX%Z3Uocf}e_bQ*!4AP9q6z$GHSA z)OHry@BSh=nYh1n;{6U%w~>s+joQB&q}O#CT!~xVxc182uzzi4QOW+0~QY|9!xoQ7-hsSHgS@z&Fm?`@k8dq^tj z(Bgvet@qUM`Boj=Wm5W!*xrc6xVVf-8tkdWus>Q% zYpa2MM7uSlX^tw4OGn5A)%{LB%Ln2dLJHh3=XQ6?=pICYoeC`-PAWf6dyQc3u$?0h zo4_YWP)4j_r|uRAum0zmEaiWCo%9=2BYUGgWb9)m5-Bi4m)=G39gr4AdtawLsroOZ z8q}u8Vej?e$>nusfMIrP6wT&=uG=&wD8%ZNGtf!YwB!|hDV*E4`E9o77tjczA{Lmy z)UzNq?eOjay*~+R5vL-)V-2?f+ey;Qor45qgZHOE zAUe-epr@vxscLVUooMl(&?lN|KPX$yY+j{-&pkulS=@+(R2GRKmbS)m=F1pRNd6KN zJ!PV9*k{}}$P*n+yc--HmrN1 zl@%}kd#){@@=6h4P(kTsx)B_4@@n04hXq8TrL)BB(Uld%wzD&D0i z0^5AF?d5qRE1;y@23cE);dAtao_R6f8psc{_1Op=7Y%JyIJQBU$`fd0QW`1}4^D9; z>42g1$|-V*xvf_e^KEfHUZr|d_#HVH7gm^;I@*8{(MRyu22I)IBZ?Tpg^a&?jGPUF z@`x1}Sn5SvWGYfdb84!xCq-)dET4F$=gjUoER@vOHe0i1^m3CD813(_7FO5sT2+Va z6K;mnbIg=#F_Q05kf^iBvER%6+X_>8vr69(cBZU!$gMfEgF2!~1|D^2|5`wZ zpSbkr*FDg*lRn@Pnnj5o3;SD+!SS`<9LX)gNIC(`i#z#(X`v3NO`ZZkJPw07fGSc| znMtJA*_;~zN(xDu4jJQ>-jJ=#-_|qhDVBx@3zs|hBhA}>RhUsW-GV(679rXtarNyD zUOAmL-iN)FjT#)$HgQX-qQO4=^Jr!(}ynIm-cJaYa_Rf0yXxNOv z_i(l6f>cph)Na4-roWZ%={hdV`?hv$h*sZ0-(g+@CDm7KF47DhOr@RPzehwEJMxBY zfBoazSYN9DvSFl{9?=Nr(RD-8)-Dq7PRCJE-{7~u*Gipw$TWJLov`nWbs|`AJO4x; zTWY$0ri`Gc6=NFr^PS_qm69uEl6#ih!~IM7JFNQ>-yf;#!YEJdG$=x?<*a3F{0j9n zOjlqpoL>VZh+qZ~B+SyIn4R;!RmX?eqzwA+$M=AD#7ADT(!V0!er;M0gDCC}^2q*)u$I*zVFh*-k7&Xk#r_8pcoUHo{RFHI>cfYs1fO9b z{RAhlK=xJJ2zZSl&?Kh9^fxQp3<6;oq|BDe%F0Ke42NzNh{V2*7?Ibt;I(=EeSgb; z1BrVu0oYNNLR8Qg(1VGsczHvtj`KoIM830oDq+>nPfjdDLlGxECR|1xUmYubs@I-$n{J zu{9~EjkwFYT`)9S^@1$726=(rrfeN54@5Fs)ZS9q*I2-Y!8orcW zE)Q7a!@p>@z)>cFl{O2pf9UVEsRmAsDJNXMif+xiiBn-i!v7AUMisLL9+_{iw-~pvzTro8n(}Qi@9Ns*`I#RIAY(m}<7g^mOA2qSEqo=xUW}*se0&#+ zD9wy_Slg;H)9}156RkaqEwf~{p(8}7JwF(CT9zQG{7*Z(01-|E7C;~%Sep8 z(gOP$BS}~JBcV_({#n;np)Wu13O6BI98;x2xKTsKgk#nM#duMRq1c$DTqY;vrFwS1 z98%>f`1mYUBKab3`Yd%NsiiBK>!T@t{+JdY)wm}RWAYr!%t>OdaNLxGm|0IhC7^+> zeum$beAuc$gBeeeHR^|@%@AQ5^EgPQHN{8-k#K=8#`LOsK#iKxg@;Z9dfyVnq;pS7 z#pb{^rU=x~e#ZcJO+^{%L)!%;&;+L{R!)0ztR2Hor6hmU+WX61MSxRHiX|$ZB%dh~ z3VIm{Wn{ok)fMZg8d@qPTB$75UQ9nC2RV>SQYQ2x=)B9Bg3Zs(P5z)ofh%r)Hg zq9#^@4uv8K3ZGaP0)^aj(|pzA6MEWw9-f+CdjewOBj(sQmX0V8?c5U&5@xtKj|jVl zx#dVytX#im@i6y%NKkCMu8Pm00)~9-Bbw2Xs5hymJwHqDL)EzQi;F@2zrUeAJu~z9 z+SlIRzURe#gOb8R3(yt@3TlXM9Y8OSOPJZ%z&&F^a;SLf8tBp9IkL)o7K=$>d8RY4 z^^K1G)=mEAg`%GSVw~XgDw;H|rY^l=lyh%xXJ{q#GIcT(>=^pz?*i~Hb71(9makhF zXwY%K%S7cz_bfGy>|agZ_ywv55sWViUL*KB;48y<#1e6d<=%jTpr!Ktq2CopT^t-% zj^Tp+Z&aSX!wl!Ht4;bZP5o@GWS6R;wS0VDp1Fq-iO4n&VytnmR3 z;)r!n`8X_}n3(VbG*bG7Clb@W=~9Edr^z!VI@1el$HH z$398FvaiOTMiyXS7@a2y{b{{ASK zwuPJ$LJ{feY4>c|wYH|F*D;oYwAqwuHl+_A_mS&o<%|=-WNq~;{2YBrB|rWs<(Dz) z$J*oO&81*Fnl%nKfm9!a`h#i($|QGTk};RdKh5~g>Rjt5$Hzh!D>1#Id^Yy>kHMC( z_s-5L*|4V(;_#r3%_~3#-yM(2FM9u=2rRh%FLT{j4)B9!G#>7Kl+*72+9R zRTj#uJA;3Vop<%pK*Kp_!R!~HEgIX#h;IqTqf~s3Z|}hgH`iZBc5(oH;s3|T0(Xl{X zp6aodguiO&Z)s4e0o_0_&69RWrmx%D-6p3^UtoQTZjnhvxvN2$i^n zj=ws;1Akg0mSo(6{q`XVH;enn2vkDx?wzNkg{S@gx`hpKN=;M>jQLYiDU!7A((MxH zc9GF^cs@?1nVLMQ(y~=CPjWt)O1`r69YCen9AlRuE$l5k{cbk?AkH%tJ4mv7@F|pz zW56)mVkaE)^bBXWY**L9&r@CN%J;UamMdF=dUGgyeZG;%BEtA?$4qo)_tlsFPdt?2 z52tVLRdZEh;%jfat3mBQxtkTtC{l9$A`7${w5+X*mwPZq0;NRqx5a}zUKt5gDrP+g zov+sRX5p@N zCIFn*L1w7kx?ept`?E~Jjxw-j7Lbw!@{h&7$aEE98u|ICg)~p~i?kaXTkm&(^gs@k| z7ij`6TcFzyNby6&1KRu{ojHas_9eXnP|{t&IBtUI$YNjO zUKbJsQVQS8RRj7YReNe-o0}RO4!e$yyd3 zB2mS}bJ*F%MM)Y(H92EHcOvtYp3~Q9;j$=}e-9`j0ezsC`**kdG4%4Tf{&3kA9weB z!!AH`P`ro4k|DWeEmU!?bQ6uPKAqDxcJ6s!9Qz}__kcW0?YsJ+(x0#SUZ`eB9(`*l zveGe5H5S=ayyHkG&B$r#wC7*Rtf^u^GkkV-*Z#Sq&F}Hg#bXT;Y29(>K+{HzA5!ng zX!^{IDWQ3Jnon7KCOQ0jAgN?b&C!jSFE4^KS7N-&OsNj?;50W~slx(%JmxCZLi&l{ zAH|g2BMW~!_c59xBwySlY{IH6{-=aXNd`G4ibW~ATEUOd$k9%B&K2%@oW3vJ8ahuW zHpR5Fl?n?{ZI2)x>6ye;vBXi9OhXb+zMVW0z4n)nD7^X5sQ^7uc=|LsYxzk)K;{#t z&6F)0C?iQs}xfKK=6;@Ez2!fC z#F@jKId$!|ziaKxG(o)rv>@Ce@!CUZjV5cM_s4D)1UwPdFg$hTB{y&q;HAs5BU8I=Yl}PIj7)|tesct2uO5Nx)S4~ zIQ)2T*$-#g`srh;;Z{I zjhN4iR~xknz|$L!dWSZm*l31t$92tJub8jN zNGB@GnV!SZtH@my%AqG>$TWcXTL8{*DVW&w#)kn7(j*xby^RCuWr!Vf_6!mnbbcF3 zFr;W>O8sq!2?KmWMhT?Z>TjMyvomtDb@g(e_Wo*)7w>u1qd=z8!a^+?SW}*>i+sD1 z*c(q|qQX}Se3>j)?3J@ekfcMY&|uaV0{Ay<)^~wb_?i@CuHv+~$+w{bdsWS2QQG*r zp)nj0;VMS$h8Z0FR!0!h4BJY291FX|k;#}Mb>BIs5TBGt>x;a&pMR4mq({toGOA$H z4j@M~64i757z2_`$Odc0Cx(iVomuI2A^EHtccz{uwo@S?`G#fm8kK@!pCs_L6C)d6l~?wgqA-IL+sLdOk#jy0cS!mP99t$bN5M7lO3+Bm zy6=+!E4(Zg{4+?<``(y?#6%}`-$n-)NbL?P3Pik{A^G~?DnDto=$$jYsm3CDb;%qI z_`RyVJ>qC5YMPR=?Atl^YcO@Lh-q+5Z=jdm=?cjc)e` zM2x)Q2BAF_#R?8FqJq`aP$kTCj#-HXtKk_hziB1+=D0*xeD3*U3!Xkzc}o7b+s9j68FVS#dy`V~iF_=XuQ}X*s z4If{92*YlC#4F4tzbr_S!?<8cLMy4ro*^Q2G-&ZoRlqoS9vVIUQhAy?VZgFAn=ZTw=T$MBHqjh=&d-=%%1eLLAsl@)QYx8P853>A5Z_eAkO zc;h_?8o9m3@3pOsj6)fyKpmy&yE`|}XBdfUR%{uof*`3-=^!PcsI!fQ7D*c7q!kb5 z74>ZfDBy%16WA{6OQiX%`A=HRasIlN(hev&$bI}(x*h{7fhRRGD6ODrnk{5(W+7?l z&dS~%K$ixAo5KGt&HnhIQ^sf|F4Doz9W1JLExU?@*lhaDM5~;esgvjZO zSC>aB`;60s^J`ftOWeTXB|ys9>(pK;pxd#HkD$dxB*|+rgbdn-d}e1ch)xK z+o4D0i8?A`y?O>N6I+God`cah@!hm_b*v;@cdc{-%|WCDNOQ|Lb`S*w0($R5;3@Y+ zChF4!)=$ItXo7_70_JL^guSEvg>ZkyBn=G*qfgMS>~z?o8r1R}_A*Shrv3NnR}Jy; zJ?sqd+~NLo`S3P#rE^Tpp^9OG^D(1eK|1r33~;MN$#HF7caB3m*`)}Jn>e^x=jM8^ z2c>pRu11Qx$fcfaA9G7(PnRQ2ajTEgKM zJ1KsUlwYDJ9VV~XPciCjHAWvKM!b32Uy9!7U0*=$l&oz!b&a=I^X0m(_M7?ilkzXl zP9Fb4k9%?gy^q4SG_RoJ$=kE+SA7-LP(45#>e`mzu#V- zIcH-irR1#Zc$M>&^P_jYDaFNzGDcDpc`c{6@RbL9u+OP|gwg3*{ey@n-Fxf+Af3Kv6XZx?tA;nee#v%@uvWV9B+7GDiA2aH!6qx`V`vk&Z6?>e_p*UxzFKcFq%Yg-1#Dcu0sQy&{0Fq=3T~B+o8P|r zO9ntFVBV`eF#!zk)Z!xlx7yolt|(Hl^1-J^T=INdTkXe^iXqtku5IbmQ-7Qg*Me*= zKc=dkRM-=MAKcJeHk%SyZSU;GM8H7Su!*BvPIRPGY)Ol>2c=uY5eN+N`gcw5f7VX8 zr-K9pi)m-XPZWF0;6s5(t~Kz~+MWq&w-9{S8rU{>eaP|qikDe#L73o$3!mE;!`u~V zeHI5L8TshfZiQQ?NC8#Mpu*rnlTu^5xn6z_uUEPtF^_U}zLn$okzy@BNw{fYlLD!s z1Arp{s=EPw3RN6^CS-}YYa3C`YApEn@#D32?1|6*6vINn>df#GK9c|vH3l|b=U1RA zZy;f4mc%OBJ6@`X(*iHoEB&Xa{I(1n15i@NS`N4pM{}1i%)C5IcJTPlu|#M|-|q$0B}* z1RLDHEAeD~rlp!CjUG93RJ)iB~W`_ z(-iuQhlPa&^D=2PbZ7CGPk!rHFMtC9FOh-R5V>??qA0*4W;Z~q^`Lm}{AuR|0&2Y$D=4b(^tr~ee(#!aqUg8&0V zxJIV>=V$iSyq1iFzh~HL+L|8FXmd$+sW#7F*VhZhOZZWVb}jBKWom05AY)m|LB%K8 z2I}F%4EsM1Wt*SFRPCEduEX6$xc2kxhD^Prv&!HgbCCyLQ%3}7L$lS0LL9S-E>kUY zBO&O5gd+qnV!R^$2NOH5#Mmo^aa{C;;8n>8NtQBe*v^J!{isMU5T|wU^%B7TqH9$z z%#8NzzJ8^zM-u+Wh)3yt;Du$6kf?v17-fv~GouX`5u>cLhI+^7pLOpjva)ZMwBLn! zBq#`!unYtS_9?Jn7+PyPj7}+J&XY#8i)zO0IE2jaw7^1iNqLHx4dszkE6<~v@7v^q z?qJ40m3iRCe5Q!XJNC@(Pwgtq#~Es>@cr`|z{q;4H$6q#}8Ge?aU8nFiJ#_!p_N6}j-J8r#9P^_e)qgMU>wP%0pIdd? zg-OLzDgO;EbZ*qf`>QzJi#)}`=`KQ46mgeR?u_>wP=nDEM$)Mzp8KFqfP_%7@Gb+!D5{XO zznAyv5r*)qt#6)o*gx74JPWB_X+}(U-CXQ4CJDbNY#2BisG!xG=M4Bgl&D6pJ>gbA z*XDe+BCe*X#J}7 zC2RFjH1EFNB}WOli-&?ko5NCK3XU5DQVuAT?G1poCX z<=u-6uFz(dMELm}QBz&5{A1YL$;T-|$kZ_pE9O9d@bubJTYo2yS(t-I2(z7YnY629 z#9nZkLik%b!rnvV0o@hLhDPdg9CDA${G6QvJrRZ?FELsqwFZJ{x=M((P_K}jDl>1M zR6Ul{*A;RXm^S9L(Wn?)6!S2KfkNM92=_!#*PQoD&5V#}B@~fwrRHp+Ly=k#+wH%) z17DCC!KTcJrhKJ`QJQy6{(#QY%857|CjN(s>YTFg*07D#EK$)q^6eaY*unKeroYAK z6fzX~_>xDWaZx0D3AWmrU`;*lx)%=Ck!}31bxWQmZb^f}%|kTG$M}Iwr1$t3%Yj>7 zPOO$ti_?1!->T$89C0;gQ{$_d7n2Zt&&_8`-DD_mkZLRoYC!}D$LLl=CaGCWeda3v zbr5%N$K=D0#mkE_q~q5SD(8~pQUpr9lSlW>Q;Lei(R&E&+VWQVCt^pGig=i*@)t0| z6p^9omADN)4;Uok@m3xSCjYLuopX0AW*?lbg@!xO^oVNVG>auCeqJ^m%Huc<&C9nd zwYTUGkF-UmGliJ_Q_t$;JaYeMJxhh@MLDZ!nU;*~BUjCkyhkUeNila@jsPDXSaSfG zn)8sVWV~L8#LRy~)u3+s7<}4?3=0Fb=e8Y3t-9y$-=tM0Y(51f(&4 z@2*|N1}|uJ`d=p6>Zfa(`SQ$Ji&rf33ivNxA^<0Zanh)d5)&X{<;xX)vI$r&SJYUw z&_>N1n%6u-(Pt}~(|g2v!xjvjJbfJ7@@sbplE}2mHG%H?c=G)b`n(Z#{n?-JTZS}% zxo`)Y3NNNDNYhQ=WH<=#*vX;Kkyfs3UJGLMKkM(z1p<2|n8=&8TJ-;@YVAm#9Ts3L z5z6UF!5oTH0_K!QTABi69EDSXoWbFH#Y?rxr2B2V3;iWX|D zhUhWE4&qk#<;LrA2#tFWy%g2HD*8KM#KA$oMfo+j;04MTuW)ljEwzM-dD zAdwD-oZ~!3Byb!VN|5n6J_s75IsoFbJsb~R`9eb-tjQ?lE(eL{cAunve zn1y5Ih^k1k{xE=Rp?f}gZ#Fbf z0uWZ@5R#W4?oY0|e$6NVCS)9nOi#ytnA^MZ?%q(!?Mc*Sza1E-tr z)Z-A>1ffn)?I;Zm;Y z1DI#vUSLhJ0zr*~wZPp{s*)@A5kEcHysf=qSp zm@QS-5p)O#PxFVGwwO=IO}8}i9YQmg&(B0=n`fdzMf7J#z%rcy(_&VhcZU1z&EJ9y zADQhxOglGsh}|FzgDtYg9<`g2ZXCX&qGkbw;%EU}Vt1ek`7;>bc#4NLafC$7_s_eJ zH}61#4*8^`zYu(C4W&Xr8f#L9au*eURa5)5soFE}2n;vWw7GX2wy;~LT-t?LcZRy+ z(6Uh-D0NTw?we7r5S=(<(t5K}xURnAjx0H4RhIBE>MtB-0}`{n#If!53(KRpmG;$D z{Pd_JZL;yoAx`Uu0Tie!p!)=++@SuFQD!xZ<_>GW2~kx7oeU+0FqsGsf?!8AC*=Sd ztvfYBt&9Da}7Oko@knLl;4uUE3zQggN+K^W4%SO8>k=Hl5M-C;Wm zSa(W7Ys*l6lE}%$XGTAp+_YJ=N9`Mtz0IWGdM)+NmCgNrhcsEoJ$}Nq>x7F~4ihrT z)N}PgrPblkjx~2IxoOu@Pf-EprA48rXS|9uD);;JTf{`FS5YcP0yH*3^>fm43z@fV z0-F&X)o?JW!$Yu-Sc|?t*ZpHIQxs zsw(Y2(()^3>|O(FZbL4f=O9YuJ0QS7%F8i8(#jBJ^@*^lrM7C~4^|h@X^E}83D{7? zcns!A#B$FzL&=?^n=>3hp0l5yAH1uCw)gIMI`kky$*iZt+DDWJtghqXk?y_$&zpJk z$ufegJhvA!3Q$uffL@>;UeI1;eb&MMvAf(_eT#;lCsEccshfO=ZrZ&Ie=G!@_~*qR zXrt3gvQ8@myhhZ4Z?q z>2u8Xs;C00()u>{KiF$2ouMmuinZ!9e*|SxO@4)z3g;|KuEk&H&`&I+vYb+|jpv@j zqaGhG5weZ)YCnMh_#AD;q8?j3Ve!8u5QiVOeH_ z4{rb*0MY*{XmyR0t3ibogEkZMdIMrRY~FuX(7m}iY+O4gA^hwt)wFf3hXw zaQGVxAH-+R0$t@eA>z!DS>HF26o?=^pj0C)A~k%DM*H%`m}Zka*Ne>=)QOdFiq~f= zz##(Qfh!qh>_7^n84vKwI7ac!k2MtL((U+;sgz1S(4{aYF%1+AG0@FoKZEQJ0e{N}dQ*iG^BFHtuxaY}^4x zVdDfq)Dn2bcYRBHy+{GQ5PuiUB~ClHnKV5&QU|5pPkQNb{G=qQ+ms#ahiqe7mGibf zzYEx2<3poL)-hlXdWeDB7)S!XAm9j^w+q_NRUN=W*1AebG3?$^Zz!6kpFK2pfLXMT zBB|iyNeO`UF9Th-2ZY zGFhQR@;C(r8|#us;PX@}^jiY}o?tEkZGj)XPSvRN5zI`gm?%+y9f2*+%RZv7!5p5j z=6@R`pv|3eZSL%|q0*g2NVePdS|DrsVFX73q(!Wg|9-KP_RiGhjWh!zL`jfey()0| z;3mOs?F`dxIVSQ8IA1Xw*E?{PoWMEgGrd!EBfJV>6N6cBpw7i8?MG->nmc&^>(a%552a%cbDiZ^eimWb2x0KHH> zmK3N78t5<*DFtd1rJ*++B#%v8->vlpi7_z*x~MM`Vx0f0YmXZ&_+0~Mb}g^(d5ZQC z;ew%`Bnfh>cE5ql5mocoV?`EjHl4vB-C?H_Pab>&sTA3^Za5w#%wtkz`O9DQy396- zS>JiFNR)SzTN)M8?C1n+k2pGYj8HJW+V8pEs}eQfU}9htW}zS8(#TaX={u9ca2#53 zk|96qBN=ivq#;3jORd*VFC01HX%gn4kGB%#@+^SNEqsXYzc=~`e@JqSViOe!k1K32 zV7y2Cx@?sqOeFt-MuZ@K;WoOlV|ky#Bi?2p%R*Fz&?lT@b{y-h0R3sIEvB>U*Dp(> zpxL818s0&Ak$x_WNCU?vboz7Jv!%Gk0NwPt?FjkCF)wp?J#=8>$F9()G}Rn%(d9_9 zWZKGdCTI?if1Sn(>biWqC4+Q^%(c`(%wPH+3~9eUKz7BOgxQg(88S)fQ+p^B3z@9? z6ZrI4%eBv#;1}Q)WabaK#Q(IFp}x%63O;oIG#ZCs#)YM)Xgtpb!{1q0KIf)gGI-l9 zCGyRCWM26QEWp7X4I(azuQF=M@!TV4k>kZ_zBCs5z!oa5>Y=)bny%YHBj_|IblrQG zU*NLJF2;L0;|;NW*;kb(aASWcefckt;PdfHyc)aBK={sLr*@0j5A(bVt=c-SzIaIr z%0U@x9%&c`^M>6>bUhdd(SPa<#quEuTVDgzb(-lU?x^eIGW~C*^-!bT_h%jXJ=PhDJ`i(l(@HEK5Dk z*jfGgx2&HyJ8o)>YD^5qrCCIq#nv3DsA{sj?ABb8@nN{|wK*z-+_>lo@nOlk>)~nFxU$@|cbZ)!f%5T)ct35|u|m?Bn&yuuToaS0hwkkOS?NS?01w zrWuO=CqvPpTA>EaN(t&Zyz7=XvLH*Hxg@zUfZ{a~b&SjjNE+iD$5(>?bl`y?vi5q| z?imLTBUvh)>)PBL4{E0TpRj|-@tK5qAI_0T-l!%5`|6T1JXx%;{{uK^MK8S0xlD+7 z4j=Q0j{lSK%q%;ue6>#~Z*MRvOK!MLH=#~2`?Q)_2@EGB&yE0c(gTBoy(dZ_E~Ok2 zC`FPp0bg~Br7?nc~#8FWtT5*a0h)fG%m-GsfhDHseZkXAAlO_!C zx1XY-r}Y1%ftV(-_ebsEDDWhweJt0t2#772%S%GWf?PYs$-#ucnI^>o`nX)b{lKD~ zA*@<^d5}(uj9Z-dz{We?_0>s8R+MaH6GcguHKdFVQK9%is&!D>gMcu2dD&|`9L5&= z!j$Jlk(q}*F6S{wbS6;9Qe_YZ~%M8pN6`2=$`dN!}r6^~k7cnL|F-p{7Tk;2*H zw2LQkTXDlwCSK1Tn70@F_;QX#nertrFX$I8AWU1aN#RID%LkEkwt#bwPp)`UOD9foaDk&1({O zB}9l`0!~D<@M_XiwPHoD`v>dUIcDlLiIkK-lI@#?ML?4wYOsJnfG86zuLt9!>|ivB zi!*apuVREM@&TWG5;y7K94U0jiN2StQ6e3)@_{lH7*84nPvV$Ih!iHiSBIKa_Xv-l zV!of$Em*^viqwCt&=O%AlWFp`bzrRh zg3xH$BWMv^)`~`jekeg8E93=zB1g;}T~WE$@5_vjkbOZ`0yHuOjEFIK(|x@JB>qyU zc>=^~+%1AG$Q$1+;!Hj#V9ynE2jDD77bORsfcLFZvf8L}3AR=9I~aYg5RR4kGjt=w z-W6*j1*@;7B|Y$vDfp`O2XO+3zxHGaZF~3@U3UrJCwTvkf_?D>h3?HWN$rIGyb@*D zl8T9ugx+!i5V!ms=ToyBIj`2dKgDG771Ahda0V;~GjbnMMFs!7$hQ%&u^Kg;`lKGg zsG`?#bj^ZipVV@(xDmka@d&>CT#{{O6#S>On>KXu%;rJt?vjm*03zNfc%7xRf39tj z{Z-WgMR(r{t<8iFtl@=T-M0NR(%ysb!`EI+zQYZX;ghCNE^(vgoXHG+Cf+o|9 zwUvHKCUT*D2&oJh%bZgU%>lr%wC=}@+HcDTqgbsT57miU0vmUAw@J|GBEl~|?W}oY zK$PGh-09ZcVue{~=<`dCAdt@7bA5t`yFKqBUOtlJ7F&6cqcbX?}* z;a&KQfW9V4{w>{)Dm;0nQ#3DwbwlR&FQPEsq9u975bv5({YjJz?JD^P!KgF&;|nim zS~oN4pj1sPFt742!UMJV8NA$!JiEP9!Z{q=*9MtUQ;G>*8d@o&IchqZ`>o%^8ni*e zcXZDBmbGohC%x<*RVwPiET8haabZ^EkjN!mPQ1+!!5+l{E?1S+L#a@XDjb-^!vLEL zD#`MBA3$+oc07AEit5V|WzWWpZT#8cw+%+?&%BgMA_(Abl%E>n5qda`IC#y{20Cgg zVW1{Fv8(iNB2V{M;IdiR0zoPuL{bCi8hG@9nAWwLPl^9UOUJ0$Y7CEO?U2g`9qlcf zD7T2Z*xBTJCv%#dS)(-$DF{Rt33g7t-5KJqmMg-aUGmS$9(BFZB-K3p#er42#MzG@ zd6Do?HfMkZZ@f&a;+cOw-c;jiO41*U^J)IwU9`iJbkJ$DzEF&`@Q#uQL-SU%H|xWw_N&cSzQDPzd*r8%!TW#BPklnwDJB9w~yP zA)ubyx!TJ*cem?nR85ML28i~T?aGvER`5#H7H!Gx`R8WmtWNs>ZZ>XQ?-Pi4fgk^? zWW(0!4nYw`VeJ_HTq5#ADR{f0dG;ebwo?On&p>DpN{ID*p90|Hmu-PEw)< zDqDMI_}d^(XA#=@7pm*iW7Duc3}A`eO$jZDZ1(H6(b zpHrg-9zKj5?EMihW#bF@Jyqhxc(!@1ZDr1bk<|=M3fo_BIV@74J_lB& z^B&{H?2e*aK!i_-#&rK{CXTp<9|97aN~)*<`y|$O`(B|G%G|~VjHKZ_nM#Gzz3D|* z1)Pw&(1flOECJcCEI|YE-oM=_`Hhnk1lyP4!&MH+LatR!W9eS`@AW+ofK=CDJ8}`$ z!P>fMCtR0^<=}<;f0r`z7Zjw#xNu)e9b|#GYxe$1qVSCyHuw_Fr^uW|-I4EF7OwGC za9FIrEbo6)p}4Uy6busxhgWRzrQ$(>DZO|Ou^Poh9MguKDW|bAAQ{urgKL#0qUht44S!&}A3+s&|CK%!;YD~_ zaGk0&wgc18*VBC4dd2Z4IzU+RSI_RRaU*-Ytsu##)g~l-8!nZ8+JUyK4UHygT z`$F66dvoO)@76sQfspj>sJgXf{Rhyr=n_Yq&jipmw?`8H$zYnV{V?yy_Y#C{y#IZy z*K8WE^$vh58{j>$BA2gQ;SSV*)|+Ep2f*2hltAtFDw|pDMJ62dL<8Fz{C=tYZYp z@3CnuF7a*HG6>9CH}fXQTKyO~>3*_&_v}SJ4Ud*xwEU3y*W8PyqSU@+yS*800>^6L zd_Gsh?=L8)3~vDhffG4E=Chw43zBmIp#zehPtMNHJXMDiKx~^*1eXnjJAj20{E2LN zN~8F_kY>uX_x%zxzQD?g+9~+=Zzu?OrFaBny4Zj-nvdSlydmPc5l4x>Mz`y2*jyO@C+f%byGS0XT00ZoP2MXl(ATA167hWeM zDVOpCrsE_pZa|VWU^t`Br+`tVHBfxgrUVX|a_WR31)|jBZ|LbOmdi2*=8LR=2(?GB znD-PWv8)HjVam`7fDK~M4_pXx)_Q<7jQkN;I6z=D^si%2f#Cuy_zMu-RndO?b_N(U zs^I0Tey6tFsf<_>a#F;x?Kdq zC-@#eUkQJnJ#mSP0q=_da#9{>6Pzr9`j{0JZ+=p_I?n&g28dnZsx)$E&JfIH#G zw;&SAT{yzZ2Us*eUqruw+NZ$9*8WTMcE6B$*{@lhIdNp$y=H`@24Bm8GZ(-ulZ~Dz z4c5xpLe19Su20^4y!(VmMnFK2mX`Jh+RNj8Z)_=t6d+zcFAq?fLL2-Jy(xf&)qv&E zwU?E|LN-v+>}fj->$L&J39Kz4V66$3>(JaJaP0J3`y^JRw4ihjxqLO2==u@t%&DiqYhri_+l+rlER#cDb6|E0pDf=!F2;cEPS9@%$5M#3yhHV_$uFNXG1P1 z^J1<4(^x)3fdS4{?6fp0$GEqJQsLsqk8>j5E*(ptRE}6CY_&l_AKf@ccnL@^SliD7 z`%_MFawB!tcbVBHegYNzt{HmS#gkG~=YqEc{S0d~nLbEDxbnC1h6Z6;3 znR0aQ^e%S&FL~*4$`+eCp2Og05RMdU-Hp7748454ZlhNf0alOdj{ zEd`P46t;+FIC{omGtBxuMK*JnTTIVS5jhYG@Sn^Ze2BCdsZ*Lp?B$w$I!VXv^3n$hFe?P)trBZivz;KZcrp~zY z{+my??!VBMWd~Q!U)g8Ad(Z*^+~B8yTn+ID>GXq^T=|>Y!>~vuQN+Ct=7w?T{<7(@ zI$iZU+gj1~#!1^d_#NjmFLx4Fq@CP$QX8Cmhd;Rl#Qse+66QMGsS>Ea_gNg(erDKp zs#OhS8Jmb_%KkM6bV9>2>kI!kBgU7?y~GPenLtY#^J2Z26bsjUH09Xl{5dTpdfSx9 z;4EoBs)?)e1W~x|0cOEuD? zQbZ{UedZ`!riOwoca9u1!3k3KwehT;-UVD!yL9{RwCw>RS$(u-Z4FISYNQhreJ!*r z8EgqtA0rq{{ugc;rQcPJ)j0!tIj6Siss@?Za3J0(w}J&rG*W z{oJXRKg=@-4?0}ldTFNl&n`qb+u;!xm$1IZbbqbHkOHdo@k?cHTlKW?^9S{>Czckv z-I^)ix_dBemQF3Y+YKwUoo{v7Pzqr3irVuNxLIXS2pA5!Kd{L~qfcCIIuOxd$8u`> z`|u5)!)_@Ez<{AiOv{M{KX~B$)ktEF-kO}wq#rn>mE1;ji&OM9C4t%c$~ z{o`YCb|P_>yE^75by>G9*9H`m?Kv{c7>eVV*)JGoyG^WZ(mVi{-}bEVCz=r= zq{>L^d7lfUk0T{6K65?1enxkEy(cM9iQe!(Af!O;q*1ydab{wAM-27as8(W2(I>(| z#@Nsj29>C*^h))7YgcdmV5W$cE&F1T@93=Je-!r>k?X#++;c2!Bu5weVeH0e3y22IsFQ8s3fD))9JUt9mrBlDW%b89s2RYXHRY*Xo@k4>|3 z0VUOLLzYr(W6ySMjg&Zul|E7F*f@uaJQc%CIhJ`0^tG%aJNyR=;F6GrQf&;%Xtor+ zBN)5&a(zN5lZ2n^?N5U=rRlMJ6PHo`0f0rgc~9p3{FrD%Fuk@qHe5iDbjb9mPFt2w zP_3-YW3ty+9=0uDkzB=LNyYeH2jg=@JXW|K}p z-w75lSLGGYv?E0DIhv2z7LB2D+`||0`!DC`=llD%trqQTP8(f+`@^vTrLBiZgx$q) zu-7Fh#-H`8iQJPj6FD&b_60;7o1;JpJ5u5T*nM?2* zM;6v!v(3a>!pK@=a?P+0qR&rmpH|Bhq|82%j0~<>b3|lm8dnOZcbXfnJ6{FLJzOEyreS| zo>CSA)+Eg}!t+TSGWOl~E2<^NdBIc?I`hHuZCBZwdaq|-?Ujri>*V$L7&3A^C<17Z zRV!ZMd&gbx17ZTK`|qGprOt?M?Na5q4M-6L*sRlLm@uZlUHSt1r>6x>jN2^W3;Rgm zXmhqg+S8C8i>4@R59VIHASgRTakhF{+ic>vr5MNkV8WVF0Bh%v_&h`I#hZ%Jh_4QB z=0_#r`(%zI2qInjNK8u1)7wfAuNAhvL_;$eN>~)xe6j0}{Zs#~k@Y4fnW3GS|mcWf@t=yR!ajNOH#nQjMWQY#Bz!8C`B4lKA)%vdX?4JB|BwAFV;- zCZ$3#a8ZK6H>lSM4C`w4!Q*3bi5CUbc-dqZmY#3>0=K6PMVd|BwEho)O~Uc}3v-NG zR2?_T9$wVg{M^>Ylvpps-zE5IY4=?VAEu)l|B=x&ogTE9cjc8<&YW}{3;(fa{Tz2G zj?{$N_u4dOlu~w+>?1t?hsrVzl{@2bK^mIKTi6zY5O@^x`Army7-y=ZP@@Z0gA=x*A}y_FC$ z{{Qf-Ed5@p?w@~yb7R&2HL^C~WFvb*>gT7vH(egv=U)PTRk`>DaZL_ud9U;`he%$);NKV>0P29FBROx`%+2DN~O&vOf8vJ0^Iu!#?lmrTx@t8Wn;XvB5prh715bjUzL!hkH(}YU;moVKW>SnF zY?@TPuizoFj?Fhl=#YNrMW+>oyWpfc6`{D7n$xVEo~14M3o(k6dXt4 znw~$G({iYOH@{ChMLU1&JgA7K%bz7;;9!mr9V|KVzG?q){X$;tURj%oM9A=Nph&xx zIikb$NZFy39$JF`+aC2XM-P+Fc}?If(}UA#xdBvHU|`&>M(*t#u8v_npGw5|C3N7_ zj@#e-P%@AgA-Xr7&YiJvVKctmXp;-fK+w5L)pk0Ig+ASn>`#~M`Kq=((Mi6SXWnL? zWmWx({3={ID}w7IE(hbEYBaYyo^7)UPusDeh47k)GQ^w;b6BzPcYK!LIQqiWs6UCp zy7{#1w|H9V@VAuwqk7VBg{?P-^cy0#yEN9HBTwHy&3}EwS2^uEv*@%@dbe8N{N}sB zn=1K_xq|13h4k(xPc~<#TM1n^PoF+k!P^%kS)%Dk@XMbPPwTN|Hcz#X>f3yAc2Dx8 z>YS~Pac64OAa&}Puw`D`{o<^t-@?%Pb>%~Yw!Bj61mB2<6#rOdvTU%7IkN5FyFf0T z1x5BY$oFqAeS2vQ7Y!CJ=l8DKH^>VK(SJ1DQTgb4xtzQ+*|r}YWz-`%`x%#&eUD&UF2PR&%Dek>X?c#r@`4T zGYn#7dSYfzXZAlW=b?r4?8cnN?Ovz-GCXm@`^c=`KG&yu?`;?P=1=dQBs|MeBC7M_ zn=V;DK28shONcVR^NUZvI{Ctc{XVYxcwZ1ZT{PwHzOBw|klLU}w^dE+kCB>I^ z`j#9)YmvK=Xn(*df^()VUn;ca&WN3zot!*Z%#Z9Ya;7q{)Aiu`wxpyart1aeFOaZn zWn*&&CVmCho<&9QNJx5Q(cyn*+yXCvn#jOlC=%r%Z8Dfw_KuDuAPD(%y_5WAud}!~ zSei=k>gB@q(IO%`QBra;@7rC)QX`zEYv4WtPnpoVyQ#vAjErBmLowliRtCw?K(ddE z!?Mv1E~I`HbAR{!iOu1}`UlcN1&N)QsFLTK$1nCQ`na7HbS*9hiygoHCo35B(?1C5 zfB3xa&@Dz!$(PE?u0VdabOfw_-qQs#Pd% z3vImCcN(mMni-#&W&%5yM5;eL+|tt1mzI`FHBq5dHNzTdWaq{;HFn#HaoPO7xJ}n}O2J=z)n|=l!v%@i9mgRz1?sfMP^yDUd!f}co z+A7zRZ?9|-NCKJHTVBb#Kd(9b{`7bQodf~o+b%FlL3(k9blSFwUhzH5xz193M8MFA zsXfaag@uum!z!$Wt8%^|7v+qRbJ-F6fwvboT2=}vU6C$Ko2^e*%^r>H z+rh8TuGRC!xJ4IRM&`U4rbRs53Hwyy%qGt69aI_nOgTgn6ZjAp8|VmzNcNsTVB7FT*uDLzpr9Zif${M3gIBk1?d>fV zzQPRC#;~Yn(@!KwjY~^PBEV0KYtb-?#j0SOkjok+P1KCSk{pc4cK8Wz0q;)%Kh-JlvI21L65|jjb7!%4#G^3z-knW z8wXYMG7j21WV~d_^mKGP?wJ#${-GeRP~4^qI2^&yKjC+$G#zoL!;6df1<8SPsiQAu z>d$#(N)%JlnX%`r>8hBx3VJWp0($S>hfhltT2Mcny^M(_lA^Bsc6SVS5P69v2#czb{u^F{+#Ki3xZ?70%<(0n ze8HnE#!$>n{z)~=_g zr{WlmN&-vNARKkQPZk}YiR1ldStE~v$uLDz)iI0YLGyjn#;Qar2}2g^a5yt#mHyzs z9^E6@$+oRGct4DNKqOIe>)!UZ0F6FK*djm+`MjTg^i{joSjipv$0rQWW{;jffBt~! zEZKl~0f(2qBjpr9-#}2&$)h=WyrkveX!|U-qQc?1xyNacqA~?jS+!I5`<_TgzRbHv zYnamj`C_m-e8j9WWPA6zDTB*uG>K7z#MN4*!DaJnTwGjG5PShDOg4|>QXR;9r{(5; z3#L4RORM|O?d`(i)3tJE-fWfA@6`dbS>729bZ=3)vyE=j)o4;k5SAnekU3o=2It-t z7zv^4fFQm{OG5+Pl;|HOgwtbVRivcgrnS6i=rWaBTJ$i@s>^w)`9j07Bx!WT%VoAqPvEn0)W*5-C7EFfP*s@smRop|6nl zwSotDf0yzGzmG|vDh-)^ehFq6y*AO-ip6Rma&RC+vyOj`Oz3d9f~s^~98Bx+WqR5$ z2&-WBLG5}Pg<2A6esR8ekpBfT`8#YF-|1$hu5K zu&RlY>P19E3h&+xo))QR_}>TW>gpiDgom|>4)yqey!;xdDW`g~_Cln|hOO#EiW&{F z3m&D5`V9DJc|?{xdoeg0<74(LEIz8cK=9Fx_1)}wb^iYi-+HwnLtXq%f}xlx$5qc=5dFgC-{l$6||vGJJwlQ8nB9phw7`=gXZA zG-@T{i-G)Z1){$yCP91TW;S@EkQjZZOB>knX6R*5P z%y^8azwb1#EMktXpMI*j#`NJ$+@(OnD-Nbp9d!@lUV6{DrSk>)tn){6?Q6U*x!=b4 z-CFW#BI;@=(sICM9zOnJKvbkp8k9EG3Jbjr|sUgYv>n`v>k~=$;-=2Or1f8 zFo!8Vp}R%}Wy3V%!UnZchD@1RcTy!r4`sv}_%&`UeKzfpm(H^+{jjfR@MeKdMV*yh z>`1NXIp@!}952?3HAO$x;x6g!l{j#G^0S6<%)q_XY7_Zg>_xH9?4r$XB?bkx$9S(DVMl2@U?kM)gvuvnTCL9~cNfn(6qQd@n-r(eD8B3XUG1S3WW> zrbDhE*)GXc)_wlUchPFzkUvpKkiy4lz%?SVOTTHzR=3d3%!cdYCIL?hjq>GzlZ)XSz?p@8Oq{uun&ev5Y~b z)76D$fH_^t($$LDlHb^bC+tSGmUs%wKwk8y4jcz=p_?| zlzgRLv+_Fc*NfLRFyRd|828_j#=)7v%%yewS*y#d%kCl8yh$$IVXo>q539IWeZnir zcbW+uz5jc?v!zu){px)=r8wJcgJd>_^rIIa-O$a6OK^4Vc8xgi-uyr?XsFQfQiyeG zR#t%~jnK(6CWR6zOMP#I#di&H7T>2~Ol|jhK*hV8Bk67@va~+?%u5aC3XZ`q=r0U= zZgo}6JYio_*Q=ZmYHN?7SFfZHOv512MJ)E!@K|jLjwdQSYHNREQj{9&#OmeCm+4g` zg{Xc!jkDP$`|6VPIa9|qJy#X78mE?zzLqKdcY3PzIVpV5@;8wD*ZA#^3E8)s4Ne>R z@~A9zRfhla7Lm`4d+wUMi`T@|!r}#sM);$HznFxd4<>tNY);>#J?!hVt@6>s2Q<84 zUt?rk{47p5gzw^Y78B*)&Nq!UdpnSy)(P8uvNV1`XR$wVK;K*;T?>JYinE?vTGVxo z#kT*xUtcSX77-1&zB#a$^4!RSdab|qJI&T^b}n!pl09!KM#+12uHm=FeW{rhng{#D zg)755Xly39qrjjDLKV<)E33D;A3~^)k$xOF{p#MAFJBT96JeLbr>zvMJ*sm0-PB%_ zDNr6cZ~9PBent4Su3^M+XHDlp`9@gdLu&Vx>NRe|Pg^z(uR9|8+*yR$;nHqL{xAbw zziRda>H6o6aH~*xhbKHI(={BD60f|sO}%~Fl!vtGiR=~^7h%Ud2^dJT*KeOc8@+e$ zC>5`>OwyY_@u@@Q6W;0?RJ<84+=HA|sS>)r6{qp>I~VVJ(K}?qFOp(n?iVCu_HD-f zbFT|yQ!C9F2zBU>?J@lD^vZv{gwhPv5p2RZVaNl0F=wn+9v@pZSuOv;&~yQ3SBmqO zH|dW^-A)gxviNV(r*pV4C)f26&y%tAr?*;NsA<03$}D=e&gbEs7??cs$M=J=5P6(I z=SvCUhmkwP`KisG4Vy84W=?GnPfM8JD+|CK};lnw~F|PUS zgEP)1HYa2Hj5RpU-;=aRNXoG7xt_32aVy;VqHE*x=gjxV9tSIBL_M#r)(Fe;N^**b z>0q$Anf&0ybGsCa34M*o1`%1_iZLD!1sj*8DW5%AzqdON7@jy&yvv)4t5zOO<yG$Oh8(W)PhPvmd=-jUqwwI$6uAy>H3PA{PCD3|!3E&u$fGx?AAS`>Xg5xv$o z{op}CNP#iAljfJVYv=7W^VV|S4w%YN@p|Yk&#Z56c#{xf?_sI;r{PP9>+UUMRU8}~ zEE2X)-1>8xFoHBbV8?Yxe+GhD6gr^+Y{aG8q)|g*=Azl>J4h?1hFB+AQ&Tb@@h~`e%0p|-oKGm8v;SG_usmzhDT=(KA7%P4y!GX3dx;$)#Wlb zejr3skcn&mja}Ymx^dR&`*iJ+*trsB^z&YgnCvbb68@=b{>;bl@dc_!G|W^g+l#_~ znK~>*7JL*7I`%sh8pSPrR4l0{hkxLKj$qP@SS@Ro)1u;U3gFemlK;#UK6LBdl?AE! znUF-Qj1lXbb}ex(uUS}3hDNTt9Est}OB%^GB>O@*d98g91T3ZV>*bota_S!yzI82n zVOMuQ`UM@UMug&(d)J@S>^#Ntr6f_&txRV}obl(5oYq#6$`{s&N&4||(auSM{EUn= zNuOIIGh0Tzcr&89yL}eh%%5F*nc8)I=I7I{Rp+33rHr%u?+O=RGgC^$_`dD_1-|2( z*RL-uOtkC!$C_(vYaf4!(PSC7Zh48&@`UxZhHqC@QMtRr$h@km>W1dMf2{aL4Bx6{x7F+_u~m7sq+{Cow=ddhM(#xE-1P1g0u36E&A`*-3}wZD&DwJhyD z5&6QUr{`72k-5Z7DGiMrN7tm@ zZ|@d)yM8B$BVA19R2E&T(zNHt9#_i50`WfM231U3eB?9P{q(8Yp>>JjXP7$v?`7wv zUdpi1(o%#%BeRFAT&>0%3Gm@f$K_s&2gjb(Qx^5bwjU1v(ixWjfhU3rJ%AY6lR8R8 z{O9OR+BuvLam+GQNYGS%U0f8sNk*QqW9UvR=bAv-rdb&Co?ARIN&D_mlbeXBxK^e! zi#>nNz!>n@juI_OY)W6Vvap>0+z!{ddF1!PC!9I2)nR5aC(06DNuyirCwv())s+_- zN2h(&tIm~1g)#~0J+y05Jb17&=$~fFE~3(vFBjWd;i%FjYlz`0(7w#w+~l-`i^;-q?a18 z97AZaWOrpXd|E|;T?ZT^&Tl|hVp#U)7!`b*Z`s&JtT?@<=7O7>+r^8Aarxl&Z{x-! zifhx1%Im`=HtRrxoLyWl1_US#?RH>4Li)XZ`}WXDGdlAAucw!%`?vD?ejVJmG8SN1 z`dU_2*4)H|-i9g*Q##`-Dk{(@?eIJ!LX#b|v{m1qP|J6^R+^faTt;0z+J~$xfBSu9 zY(*KT9^~NQKu%5`v?f6 zNIRf=Q>MQJcI1aF_9KQ>!WJLr;R(i_VG39A{?N~=vNYYFTUZ!9Ez;cf`|=-veHZ3O zXYTHJj3FI-e0&7X2T7Bg*x)`Pn6!(T`Cfkhg^kqah-NF|p>BP50lIC1xeOsJx6j=@ zz6I`QiCxosY$@HLoVhT}WDGNCy0XdXdu6ASB->NR1xS7^P7xKv-!m0=Vr`ru9iLiw zu71KJ$G6aI-yX31)eop7<{%!PX1vt!9%$ETbB7D6*dhdV=o##8Z$FNb;cK+)uTm;o z&|#0RWzq+CiB1gc07dZ&lhd#OT(2h( zXcQ;3PJY5w2v+r~8%3|OtZ5Mi7Tn7DlG@tZf`YC;wdSR^agk=&m(_)iJ;p{xKaLht zmW_BSZ7vbrxahbQrl+P>KxcGzbzNIs{pg256ePKAX+1{+*Q#E)@+W9Qa<1AB($qf2 zCJ^Ke`%dZ9o78;&n1nIez`LPp8f*%8{R!q&!R~FH@4KvRWRwEh$41zUU zX=+l9C_|dxk-=neb!x-J6t)=yr`?R}z>xJ>JV8NxQr=R=soHwb< zKXZU9Q{iGoVPPTe8xC8IFBTUB0Lg!156u5?wC1k-bF=*j0j%v?Qy=((5Ev+yW2XnnYkWn;{rj-5f{b;?P zXspI;cY$@s*y9U3DBAL%&~2ai&%D~Z9tt4=@9kJ9MfnJzt4(y`z)_>A7R|{u2w}4! z3=1}qz+xcSZH?!)lg{SfyLWFaXt#w}YT)`WKY1(UnE!*Ix>v8k^sJ-mxn6NiJ(B3X z*;snNDMKGacmLNH;Qll2d*ar$!Sp|umt87H@C`5iS)Iz)NpE$nJW{hgmg(OcukGA( zR+8M##gGNmzIJJMaS<$Q1-0k6FBt4($b(p6JG#$NM3#$d%S`fCIqP!N(}tX zXyZJ(Fo!o#lj1h@&XR26c8!-OyLScLs0d2a$nBq5!f>1OQ1eNGx)nQ*A3l?3@){(?JV+dL1|1iJq{8yMKo?zphBl0PuBxla5Wu1nA4 z+dV7aF^m4ghp~4Zz`vReJDq>R=zwUsy`ScSq&sVmoM}wdLLN zzHazs+V>JhKYTmRX?5@;?Ao|8?!VH7c~00C9>7TTp`SY9&CQ4(I2om{OYK$`7T6EE zK8LCW{S%t@Z`G;Cf$NV(JWEPTWotUFJUJ6x4@OF^(8BWaH|TES>E-(*CHs1Nd%17U z9lxm~$gNZ#0FXI&>yAB&8+eeK`Nyl{bjQXEICk%bL5zus*~zW&rmt`O`*$VBr&I}= z($1YX{%o$FEH*0|y7c>p!}`{kcE`#Qe0)Dk%;ss)qM^5M-n_X~R$gBI%~Y6~X1RCH zN!wYfPJcIY183Kl&|aPQtCP0`A&YnK-sS1|cKSnR3l35j>4QTVFI+2uFe4Q-Oo{)l z`TP9JXAt8tDK;)9<}tutU_$uF3s;YTR*b)XMf@184*;A=>T;G!*U2`}6krTjDOZ{v zdn_BSf^LteHa0i+1-S;59~BrePTpNidBK=?H4j_04?Q+DOG{NXbjZDW?9`s0<;V9t z9`1lj|Hl2_rkSyEQoG~n-5GPIuDDrD8(z6`C7uouo-^Ra$;ohsn?vqghdkfFSarni z##tx@`0#x=o@bP~XnD2d|L*A5p(6bNS()G3UJ-XGJ>1;Ndc8)P9tHNDe^j(wgfF6W zHn4B)&7WoJ*Et5l&`JV~U4N>-g${S%cQ{BL9UWJ0=m@TS9qQqZ;#Lt%`u1PBQd!fp zckkA{b;WR>>QOR?%LE(Vd-H%3O0>9}8Xsq1E;9~v_aRzd6C0bOFf=(H3&N7_leffJ zda!}98lP1E`r%r7W3uT|ac24J z5J#+_pr9*E3eiCbTN5QtGrkjkTO4_RH-r6PclJxDps$U9`mMxR1d|TK_dxQq5hffD zN4`;@USoxb_Q|)i+|;>W+PEenCwFt&CwMEN&dPbL4wFgSHTUBHFwGzk%JS=1_kh~~ zG6=vC3T(9Q9MYW9UgOF+9=-uYyF?(p1_pg-#OWr@(LYJ*AdIim4okSp# zfOr5#2s_&dFhebE>+NmCM#*@MH*aSZN5tfR@gjpw%v7-3vPqTIH|UR*Z|_(+IXAu* zmA*_r*zulao7`Y~4C@E9Z^B#QDTlT@GT6|0aT7Oa7@9XDBcs&jF$+DtvhV(^wyX#C zbwf-yUwpw}S8h44anKVu6#Rb_l{VJnt~NI-=gfW;haXgnSyhI)8pSq{>y8NQz45=j z$8n^ky~g`^tv){R8;qV-vF?Y-_3`mRZ_6We5lrmtb3;?Gg!yh#V1c4%5ZXY#BDq7dw6UR zjXN2cn7W*Q!n?fdq<(>-pT_?oye5b|lMT5F6>e$KlP8y9E#dAS#`J1>6_};m-NlX{ zz2s_l@?(H3)Bqv@Lc7Dkm7q0daQpjb6O$x^p2hF^+kbpd%QKlVPWY+iz|$K4{t3t_ zs?D#c@Imqirrc|AT1IAO`O=e@ zJLTtrM`CRszy-)rkNJbQFxt@ZaO3e^P}G==cfxnqG*K)9vuIx_$q8Bj1HXo}v^1eQ zkXz%Go9{g%l}DCY=yT$x;fsviRi%=8BR*0@6rl9JYAX z1_zVY;~tSDewmt$6_@*==Z=;ZO-#G|<=;Qh;}eJwWnHudB;tB>bcKZ&9HARU%0=_b ztW2b^V3)bt%I`h{foDDbb92+w*FRNm(caz;TK*aM>J}Dk{px-faHTS_3Z#*1VSx~R z(Bo$E!X*uETmJrGPtLB!`rZdaP(wt~7RaCia&rDqI%Ss8qHgZ)s6vT{bXVdkf*ML( z!X8XV4sJu7Fki1)1(L(_?(W1cXU{`Nl$7jv=;>`3l&P|@v-_=Uf4r^sfLh!^P0b78 zj8<5c5F}Zv!By})bdpFRhWpd?#+jKFnTn@Xj(DbMrDS9TeC}lEG!>sfKx=4d$V`)W z_b$n2mp>y*94gb8wRmLn_|6!0J|362INr2M@{+;-KLdk_hh+L4i_uHrl(4^evEQHXbcJ>3(VsLn z8f>9VrPXqcEXH+GS-lAl7LLxsAwGJ#v)l8KcHS$Df}z;9?Kb)}A@LP#Jt+^bgH_A| zS{xY6XOTVzIA*~Im%hDwjn=fY?#x+os&Px*-~Be6FKPtH&J0*6aK|4%H+Pov{?9p+ zx?y`^rO5CT3rDLv)QVD5Q%6TfF~N^=+cvjqEA$c`f_%y-DmIpv8|E275l7!3;k6O+1bRc6ddKZ+Wgrg0Zm*|T&%0DopIzs3FU?w z_ZOT8Mjx|DIY$!3HOqT(o`=T&I+Jc+)yS>WtE!kbW#@}yRlirk<1CUE<1a0ds?0~@ z)GutyOrihKb1aH(Kp^J#v#<{`x@%jF4H|dwRWM(lODmpUq_vXrnsop!^hn3=vzlBQ(y9lUZ z5>4@rm5}L^|2aj`%x>*I7~RSsr0ws#&SZ5{+O(O^ST50XziWlHS&Z1%&$O}RykSf1 zj^)?*Uq3Jv(S4-6M?&w>P9;k>vN{cPJs<3%jg>Y%qY}ASE9J)xb-T0nCqDZe=x9-D zMLaaR#8M3O9@lLLm;4e&Q=_;DUhvTrt-bZaKZG!f3NurQJ_vw4H_)!AcIJ+>G7s>L zW_EZVw`+$BnS=o{h0r@vsVPLivM=LN>)&T8e@*#iE>9S}(DmcmL+;DM*Hsw=gx-Bn zcajv={7;J4_rJ^0{PYVKnV%*ta-BR2*A9M#Q|U*NeCAnPNg|SlCAzPZuHt+6E*E?t z?NqUXhY!)^o5{1EN}dwN?B%#ou31pF-{O@ab{595Z3p2gO6uei=RTp+~^ZxJ7z`7THr zWsTK1$Pue%epO76Dd*^mz^l%wa;hoMJpwxKrT)lcsngr<%5yK){Pa?MK;_mt?D>fk zUFcf2n}b6{RP^|DwqsazV3`}&IQcLfAX-I5Me(x!Sap8g;qx1lVk}!77NzP|7?O3Cq4@0kC+w`OD~KrjW|lD4T)D)@l6VRg@pYB64a8W4q^B=SWt#n! zHuh5}4|D2wzURJUh0a2ZikBninq=|IVET&b<(-rgLf$ckMQD?LtL>vb+xdy7QLyj< zw(I`A#Fw16SD`1JfWC@s=36rEf!QuW(kaXT)nd!12HNB6Rss9>6A&tcB1 z0$K4rUTHDMi$EB7*4_@_L?2-332ST4{>k8u5cSz+2ExqK!u35r@nBcDP*-QFv$N9t z`o?eCE0;B9jqA+Bn*Lhwm{Q*>9R9Cd%_mm#&(wv3+kX1Ag*q*)h_@NfCjH$|iPOBr zt3u^cME~CUBKtZNZ<@G`{OMN$4Yqp-Pv-D=xqZL97d`9dV`|dQPosvSy;HYx%~-{) z;mkk72`?@#X4izogknA3_Qk{{$HEi*N58$ZTfv|4F}q0Q|THj`ka4j zJSD)U;~y7SD_vi>2RyZxtWso&QqwE#Y*<*`Ad-)W2aAtaSru=#z&d0y6$4Mq$;kmi zv;xFy2ZCd2^KCX&2V;|XrRA@?_U!pSUG$;u+$p$m*oQmVSvdZ*b=~_hi2O8WNsF3247Mh}O$~XZOTKYK zIa}e0haJKxyq~I|qjbpOsgePI+j}?(D|xL z>5yWYAEm~(er8KC4lvJXpSVzmq$i>w*UDA&*(lpn0FVfSa+w$z5$D3Uw5NN$e{g8% z#mxh`j~OJtbY7BlSTZ=Pm8N(lyt)SA{qGo6}ZKP z;$vg0t%`BCaWi0H5fRW(Qy&ia^-wP#w@^_$jTATh*7;bT(NQ>Qh*0YH5urt@ra*n* z`uSPo|Ni@rt@>;9qk$mAH}EjqCaZkvHaZjdi!-I94j#n4S;m~jn!-Z&-#ZkV!S{{psC{P*8?Ao3q8{&Xqs z`rX$8Xk?%9ec#Q`&C~?l*10PYnp@2F$<1ji65*YEB6dCSjO6af7;pr*cnPqghu;2O zLGvR(&G647UUOU==lLbL9c%6G?hX;$-mFf*dhB0bX2$$^qDSR>Ytv~CXwmC)?vIVH zp2vP59tKwU?R-613F(#?>GihQ@1*7=z5K@)7w>N#d-qGz#l?k+7dXJBW%qRk763x! z^eVX;u_2GwP7WcU9~`u<^O=PD+p%K@&cDC(m;Oz?{9Xiph|09`-Zs761D^`eA0oG) zpubH|Mstm?;=R}|Xg37dI41wGD^yUj$-(zgUS_r1*3f){W(5-C?7pNYN@TE}rzMgJS^by?Sc@V<@4uwM*;l!pvIZ zfdCvX{~9^9b?erM9lKpW2<+y)XCiVkpKKp|4Ky1M%=n`JlVQ;i+qKoz)e++a17V#LCq6Ft$L|hdj8gkXceU!7 z6m1*8VZ7j zqc;w)UFSZURb~D01@Qe*euFB&f5X?Tq@bGYVh@Dx6j$ptIEQ;%|%SNrR{T=qti>t-Q zedRNa3ZD1$Bx&U-aQ9}FkNUd0<0SxrML-bWzh9k-oRTtrx2!lnzZ1|Xo0;UzF#Wck z9#J0R=dPb`Ve#9yZxJ~n%oky%S?5**ta2H{Cen3Wfn^@C+39r)x)1{J^y$;)7s1Ug z2=B)w5Z;5CR%iYG)hj;6sNqsO3C1YSdim&%Pna$rMt{u#OD-^hApKmuTwsC+b6v3E zM1~8Zg1lfucD4id6AIa`%B>%W?%K+CeNP7+!GrR|&l*V9h~`d#;fiDhe$Jjfi>=J~ zuMxhKENnZznd9uXZ*R*SLRGRYs@->u(o$KoL$e?m0y~zMM^r2Y;*3!CqMf6?wbQ*?P(VOAiF{lfH|B?AwH07b z0Q2X+zt{DD;2xuqTfM`Afq|i<&*ub`Gk|*{Ogijy&$d(v_d0Q}Hc;w*KflB~cRoC{ ztc;F{@o;w!Q_0p5bdpMkA5$0Tz~ZG?IU<&ziA=&nqk{Sh#hs*lLa4S1CY>IBXo)7M zu3B2rW<_TnV@_!mg_x336Osd>DOx!qUA?_~fZ`ItYGR@fJOf}=QKhs zpC~e34A1!L)#h|L-$IZ~v^M#({(TSnWMitcZ>zdZOV#NCppBV{i530}6P=x!<~O64 zJufv}fs+W)G7)C}7r}FqvdHk2ZWtBZmhmvNwY7D37wNB7kYS0*8*&dRSh4yFURFRB@Y$#KIkV+4%i=8~}+ z4b3PrqkUu5UqotN(rD$q>YovT;*poX^zCgm&|}>T@0V_<@nL_ULDBbG;?SYgX&(gx zL_N@?t}kz=Cnl2ZcouvF0BF5Trd|ywn-_Xb&uisX9cUO^5kwAcXV}`9jVMeRXTCuJ`NpM2jI`PTg6*z zfYbTf!LGm^nXhg9>W#Z#E&idxWqVxA!zNT{;M2eeY1t%6rROfua>PEm@ZFZSEp`7% z4d4Cq#d&!!T8I?)+g)C07cG5)pp@iGjUCe+ku9GwDQT5vPn6`?E2bJBU~1DbwFnk| zq`567heK`w!Vu8k?Y?8QZ$0RbThF_T+}NubH+7JEQ8&q|)sd>A{H3YOtD<{kahj*6N8f!j zGB#e|sI>w_gc(RWb75)X{MTEeUMrTf%7^Ze*i|WiMbBDs$_yY~h>3_znf(^=Q?q9g zKLBn4wp{r3=k;<8A=V$OKz78aK2b|m{VJhiX)tP?+71_jM@%QDqF2k% z&4&HvMXH$gdZZm$^^L5oqI<-n1v7z}{mA+8l)cbrUC6#wa($}~fBaOX|Ac4p7TzTB zlf*L|8SY4E4YSd!ur!FMK3&L(G)WzVQk-x9B$ z+b0)t-G>Pu*AnTHbL$g~QJbJGCZFo9+pl=X&dTcR4dVbl+BPI5H1Y=)fBvjLmmxi# zKzvWCwz8H`$_onu(#I;SPL_0({dB%2Xe%&eHRo~VkF=x9u?>@0E0ic4Z1pWD(4yrH z+xIE&rbQlHAdc!}rxtJ9uZk=&L(7ImND3Sk7Je|>-sjJFvCv<#Lh|>7BnLp3xSdS* zQu)K^wRz}r7w-M^S8Tf%$C`I*!jz+0t2ost*)i%>urQ~0Mc9gca92)3bmD;otzsjz z=-AiesFHlBDRrMhlyzn0S0>Qqn0l<(^S?a5n@jJ%pQDLwSEdN1=5=n> zVF}ec1%-W0&hpie%3Fs$XTDBo{emNoeEvtHo#7HJ<&v?5oV;N=?kQioX9Dh|9m~y* zRHqFSHNv1*QXp}52%lSv;%{oG?Yo*XJvZL};!yhm&w zf+g!*;bkH1e9>QhcX$ZB`@xr|jxk@0(Gf)M0CY_t^^7)=+C-Kb;GY2F2}ei1{%X=V z%CeL%g!Npu+`V#TNJ(MLxnFRxUA{n1)P?t6PhVlmuTPv&R=uwaF%!@8gvBE((-Ovy zPp+}1vFi&aSsELMYJ;ttrPi8oO0?Q2>l)w7RL8e-w0VM-ceCFl?NXuQ^|@(Mzs?>mx_+Iu)3o^$yxy_F@s`kqw>zGp!{ZKzg~YOm zrenGN^!G(OAnH1$kDGoT7fN&eaB`1YwgG);dvl;)HLTa#i$LV*!5k8Yqy<}fxs>OEY8j|4^4m^9U@_(uw~HaG8%*p5FOyD$bB5`t4;i43=XUH!q%eGG~9z1yP$og48 z+>XgYF=}L6lyn60HD%oQQSv6~rSQIHdq`2NplW$GC(6jUt<0G&lu0l(!$Qot&$+W& zkRl={@y!vb$clXh-Jd1H=%w5$4<_a<_+$@H^whe@o$K1srDgQ$7 zQfg726Ek(XoiS!LSB-JK{>n8?J}Q0#M{v z(_g)z41zSdZmCVHUz{ID_IsvkMp-U>^^|dyNfDj2+A5VST1fzvf+zE8csP`7pNqs# zE&QJF3!_+qaAH$T>Mz`kwEjsgd zg5My(IOF~99rd;lg1l^9S26b8k?q+(&)Mtr*k>aFM(S%0R>hBI^X1_Nf_j2rpP>0% z*z}YuOFk9(nKn<$(eAeEfd>K(j5>u9X%&&8g;iC)tzpzaqc=O0{w&nKhGqLoWaALe zaG7fW6}SCvwQhAtR#s^5-jtM-$*HNGdlUigSob3#xsy|_Z@Gafn4lRsA+Yw9-z=B# za+M*~(y_#(z@tR)pR>J%{h9*;*%ptTcfjs|KLo2)+xgv7C;j#@hNzRQo5pMk+&c_~ z?MiKj7vpvbb{O4@ru zjWc3`NDiRL-0ACJ0Ss-+?CngAY=avu!PSbg4-O^^8Y;Y(wK)GKd^;<$l#>2G?AiSM zJCUG6R<7vvEKt?|f#xK+UEpwz0e%PN2mDF|unJj4qi(~y00xTe+t=bEQdSI?PfAem z3#QKe4*L83uXmmGW&J-UY>A$Ewj8~$``DPB%WsO&9=m3yWl*sOk{56?d1Tu$uQK%p znY9^+K*;3(hZc!v-aR5R(>+_7M_?b%>hdKZ5QBbBOk}*EZw_w}nC`H9b?sjsKp)70 z>*(px?cABxGf8J7xnypULt7nL+f!o&z6P=;m__NJ{Urj38Pb)!VeoH;n}UMy@L&?5 zmX7?3Yx(*3yw31__0-D41|H!vuLe`Cv!_Q}N9Qu2H(kN}dEQUxr3uN>)6!b|(YDi6 zWLN)WZ7S>&_;fVk6_^gvX?ph^oMIBl0>6Co%!*J~cMwIGM?Hqc@{DGtuhtO#ZYnB% zqP1m9OAB1^&=uBzM1cM0>P9CfCV-7>Oey`n`hNBdh=d(apMsW$uy%EAjc@?NU`I(R zRQpo8larGP+9ATVv(f^&12Y=&+?nv5jEwjg4?E>}0(3(HH(4tWgvCFACPYO(hMN|Sh1K|otB1sM3qWS2r-4tNXqaMsBYe8ZEbg8Ls#>93Fl=<)D5S2eY#2wZ=>rKY@KReXq# z&k>9-9Cd2Xr1aai$E@@Sz6crYJ z1?C+7M<;#_p(yaWb!4rs;P-}0Eyac;pm^g9xJQT{hKA5X^xY6y>QNJQHMKV@%DyiJ z#+hh2q=ka?rg#W6yKH3cfBucoDY5^Bfrm0OZ{7s!>>sKE zwo|%{uK~C!eEj?%bCy`w{RHEIpdvqwWB{@VHtGn#SEz|d@iH#3JPDY$>CdtRcRVW_ zTP^s#K)LNOvK8pgMMw-NX8q*#CAi!$Clg9wQ6L^g*hqR!wm3oBAXmK8JGFR#7Rm&L z(nbZz9RwJgZ7^NyO8Nf)W!VNd6B7YUS)aQ3wF|sLqD}em$^Sr1OifGs3l27sj|GDq zxI+^f8xXTq(bV73z<}OxlAZP%BnnBEY4qAeJhca5Ei%gk0|P=jH^--@j+>ay?^RL? zLLj{-aH$_Dd}4WN1X4=pP;T*Si~>7TG^CX$B_oppnASo}@y>rieKWg%>Ix2^Rl2V{ z*EI+EOueEZ5Qn4GasUrKHq5zX0@UuCXxp4Vz|Td7$Ligd{Q*BzsAhcYhwodW zh3@;m4a?&IcE7EP?cLkn^@8~}KWHV=(!MC@pvne>j0O%B_4QUD{qNef36X*xIRaXl zP%$nn=nomNQv|nfsc)-Px@YCcLo?9x0+y~0xf95+pWia^2jI3sgxv5|f6?MG%O3!) z0V-4rVmu4hD4I=RyJ`eoNzJMRIC?^wLsx*Yy5YEf(U2pXtKD^IPXG_>*8`4FI+Y*$009rTpjtpTd3k(bO#h+rk0ks)4ls5vC7`Ly|QI zFrgo(q|)bJbVrFlb;9Loedpl22e~ebi13RJm+0-Hsa@ z8#_8qUKMZP{sGZowjM?GF$N`pySe8~K_4aV=DvZ|w5A zfS4c)1f&-GH;`8Nxq!_{HCvEx+Q_?qpV1~Hg5TZv>wIow;}yU@P^HAjDti^R5%bhC zAu~f-*%}%`=LdKVz^$x|%$c$Je?w|!Ku@(~XB6d=2HWA$?5EM<$RdCBuzGCds~Lb9?* zIDj8S@-n9{4(^DKjotgX9(rS8VWBT#EE{cEt%|R`Blj{nd2%-^E1@00qEaa#yDK_s zAIw2o$r+fj4!YbrhO{7{dU~7isRzZyn=roY^z>!G%{omGT|`VV%S~9gXjcik5@I!kX~PGzAEsM-|qIqz@*$vlN33IVtEH zydoG*44Nbi=VuX;7g6J_0BfZzsy#KnxLP(^IIwi70lCcuf1kS1ckeQ=H9v?h9nG;v zO;vWFf_Nb*f1!@9#C|tph zN&1Rct;vJ=iPgxW=#`)ul9S?14GjnS!K7ghZ>v7Vb>y+SW9aNlig@gvw_ZNsodkU5qd)2($b3 z63Ha4p&MiOj9oNa-I#5!{mK5+?HRkp_*BsnON2FB5H6{gBJqW{wn;{sy}V~u^j%$a z`~8Ft@P)2wim}9~^Ifs9xT!2=>Xdm@CRRu<^(T9fylcNpjJnU$se=|FlE#I~6BG+H zcyU-TQDK`sFf;y3`BZ()^|8BAtdT?4ovY=KJoHl>&H>YcgEq=S&gS`D#+YQzYO~gC zo-qG~Zj->9!dYe|%(Zizk~qsqvRtg^6Erq`@on0+mj!u=J1DMNr*IA*`eY!N<9{IE zqaZ@#bfBA&cHX2zSJ#taRpu)d$$4@5!}ZghyYK`m;x=#s`p3p>s$>?A+$?UK5V{e` zeZr8r(WF}yxewdDlLv)43#^j3B*x30iZDqPeEK#J!KWyplhZoiL=C=FYwK?UM$-%Q zdUNlkSw^echLTMfFZ}{-A$pUz76FY{MRkTUam-f(SCp}N=N}*YZ|^yO&hAfkpmpUr z(n(EMv1FNK@!sVP6JtqLD=;iFad%Nr->W;s7j`30!BW8Jpp>)~I-D8h(ae`tS0sD% zFvKkW;9(1G%eL&UE_uYd^06%fS|SkQM^6QQPViR6J0A#OeAn3aJH4mUSF}ZA#k!di zhXSSJG?||r6FwP7uQK5rwJ90eqAZxy5<;J6T`+j=p4K1!t@aeOl{_4u5B z;GHlPFX(mrCp~idgf&Ym`RG)r94<8!Y2p==I+Rvv zxwZ!`g|_ePXZ#YMba?AyEoV`A0Xv&FUv|7xJ@F`<{rG&USLtr+2L25GolA)kqh+V{ z^1nP6IpNVKR}tkaUn{>eejn{r6raCn#G_y9lqalqCw`GVv3})GIfLBom03=mi<6sI z8a{l#EVz~Dc6!r7_+?tDbeV&Ezfr~&wEpe`avHF-D?MD>Lv9cE1@nZ6q2@qJPR?Rp zSno(eEFak?$An(7(B{(qnhUexQT|zOhYw&5a&VZr{(0A>m+x)!2S$uSbro$CIuzua z6+K%1vtUOMJme-VT&i-TnuMwG6|}<9A{VIgGf_1uS{l%t^SAZ5?|1u4TmHKqdwr6+ zJu27VG*Lt`c{JHTn3?Q$p9}@*M(^jt^gmxWyYc7_*sXS(&o{20O*ZH4vR-6H0RuI~Q=DS@5TLHXyzw$d22>+jKS72NAsu9Ffi(Czi2V_Rqo++S4L ztfD|n+yi~!IYYbOFgO=Tr9Wpy`+uL~-f#Db_ucAp4r>)LjWHfFT9Z`F{&v55tV3yU ze*pib?}Pf^QVWi>sfXUQO4GF;rQbsp|LlT+DcSSIKN(!Zf9Pz}?q(=?h+imhi-w z%w!3gZO8P&E}?yT0bc?Y#}2WyiD*aB>!s84GWRpTEiK)3di&vh&wt;mlS_MvzrCMv zmS__n_fuq>r3cY6UE4*PqrvvSx?E*5M!U&-kBe?UJ1@Uu^{IVKzuhM;g|nYBOn+gR z;wT0Qphq1V%=@LQDAPNexhc~hOQrF%Oz9~UAFxtCKOp1e%a^p}N=#7Meb0ki?JiEH z%$7Da-ZVe+rYzgqfrm8Prnzl0T+rgQ&rvzyz@j+8!cw61yNr6>=Lx0TSUG(2@EvvRH&{o_!lO+Mg$jHU7e%5&dGajW4W@0Q*7EM!&J=NO0A;ffYh~pDY0iwzTcXdsE z`DY>O2mikHC>;ZAg3`t&IKn;=p*)?AcQF)|WhYJ_B{A#WT&)#Eq>4eZ^N2pFcA*^TF%FC-O&*yq>X@1=>CACQZyRS!ieNKa@=6Bl5p# zoNvIluH`l+EG97;iI}+j{}rwN+czJTYT|k6I$xqiON1{lcdng-Ux%0afqDQah3^>s zzpvtQjW~;YngSsjTYJuq6<_-c7~|{mn!RK#aAI78C;xVCgV6Cjv+t0fu2Z!j<@Wyu zjG&0@3xK!Y+g*Rk?Y-{Q@Tz}27mYN2!ch-W=WA{#0!Z*!cPwH zbaF7qYJ9K#6qk30649FoC*6M#``>a&T{sB%Gd7t<2V@ONV zosG}3O{*^;`gZdcxYCxw)mEn8zI`KN$fbC!fZlw4Ahf0%Tv(hUHokn;4-n=F& zef=1J_h%~T&s4^TjV}^Gt35-x<#zTS{`5tp)BSBz&|geP1&4=d0sFtd z*@eG3>E!eYj!}Hjm*)q2)xynY;|zoqrf#3><{~E}go-XZ3=nlZ2w4|Ky*t3Uv7WcM zaiZ%-aAmcj**lKU-CW%q?bj%8o7tQxJ{`O{x1Ez|>5cAk^}VA{tje98RI}p7x~(!J zEQ&JO~mdjJ#m44(PlXeacV%bdqm17T~AJ!;Y%o3GLueKAdB<7Zlv z*afl91CAGyELDGWb3NN=S9mK=de?rfXn!{2}`+c;wHKUu)6W28BuWb1O}zCavN9*ZALvbkgXlw{MEZQ!uas;TY0fp8$n+OgGMsjn2cUIACfJ~ z1FPozdMjDxw|7I?^CgWBlnb6!(k3|`6OgU;lkO6U7V?%~%L)EOo?1L;afX9zYiHcs z9cLW!4)zJ&S=eV4d=aa4%aTtmHctrd&2Cg%11T2q)v_u zU8&Ufs`1CuZ!5c29Lbo(L;%i%pMAGYC|DcVZ|Ad#s6KXk_wpOgdCB_%3ns$dpXH8K zKl@TqaM7@PNVVOV?6_#CXw&`Be6EtC`%dH*)Q#Ltb!;w2v%vNP<2UV_KP?;x8yV(v z`Xx73a-&MY>lvFMMHd;2^ z>Dp~NJ7%wPQmC~UmJj&;s?UbEm&Y|r+x2Pef*yE<*9W6flWEaPNyVkEbW=(y_r z{KmjMRoT5ZksZtHDrAenxxD7`H$OiZ_((V9bs%m_$34B__g3Lo{`TY+6zoI&_u@rq zj#@o+zJzK^<-mH7O`*x=9h^AZPF5LJ<(5OVYVp zJo53r%USLCVv{j@{k}2IIgGs>dov#J;pUFD=JlI%t{q;GEsyaDx@p)s>1}d;UZF!~UoYh` zK#=JxZdl{}iZN;(N%>L_DTBO))LFGq)g{>h(~(M+sh%AJ4sxr09k`l9C*OvT0A;k3 zAgA>PhvTQ#q{~pt8S)__%e30AQsjL7p`hvWGN$ZLjvM4NhPMydMq4C&t?bt!%bPw! z56RN9bNj{XO$gso>eCue6BI-Uj}X$-BQ7fBGx@8I@X7Ym6Q_PSY9aIa>-@vkFLD%w zdK221JmCL3AUw!?9vOiIwPSxzDty2Zj!Hm@(|FOL8PUeIP{|Yy;V81X0xJTB%*lwsTK-c@3Vn{j(yB?vc#wexadLBOj_{2D+cFpu z0oBM1w1xrdFO4Pc2EOr?wy?8y5*M!$+^Pkse6HUg9m>Wm`aMTaExvCRqe5-(N)FN| z_~8+tqzTl}0MV%sWkZdXw#q{R4x$pNx;p7-1W0?#u{@6j|A^PonQ|0#6%=X4=$S(@ zJuU0w<{kKr5R`4q?h&<04q7TS8Esp{fKmz73WpIpjZQ+WB7=0K(fPAco$h zE5b`TM{D$>+gSDVbIb5V+A`6FYYP(D#omRZMaY)9+OBA^rys2P#YG5{e~+}qf~H8M z73M3xj1OTDp;3>1fvCyQFQ2FtPqq$)YN|ej-KE}fDT0s2-6>r#~$jlr! zK_yOw$njA5_eJz8RZCW?(nVRf^N*N zMXsN-iGG^#({t}b#PbgG*{E1mf@1ilqkv%YyDL7XOQ<>V6uPWk}qF&`S!4_zXC;^j>_h3;Awij=1jLg*hL@`%qgRLfbNtu*{81wRk zx}QK$hki&*>=?ZeGGcYI2Tfvtzd_Y_n^Q~at(=Y3>=Yj#OB!k0u0+2Imlnsg^5hrK zDRS#F!?cpEUj5rvuIb3AE9y)|o5TRc3XND?ns!}jC`7xy+E&^<**|0B>75XK>e7)Y zhQ!-bBNG^@FgfcJi$t(}{Teg$>KrbEbok?CKl)A;@% zODl6Wp?T`?nKuO>?}|^G#^+x#Q()>UF(WDWi>0xe&WIWs*Yf-L_{Bkb8%4MDz(?qhh9 zJ}ao21q4dp^}Lc^=jmD?7-u?H=bYk+g6{<%x|`$FU=Wc+kA~FQ%y{uR9 zBN}9W?nC^u&)a4rsJmT_YV_;<63;8;k9ZiD&_6x$9?*qV37tOJm9xZ32R~BlOi_yQUO= ztT^P4PLU!WGH$(J_gNGv4=&$$yb<-Zj#d9?bslhS6}mSjf&#a}T>qu#;|I zrgC|Gr#B#}0?t40sd4A|J~{*i{vDY6z|KQvLjX0k{TS1i_ZtoJPW|wVgz^Lfar7}q z$m9Dv31+;Ymf8EJSC1^kdklr-JM-7!%Vh0VxkJw_SH(Bn)+@fzO6Fa7-XP!h{wc5@ z9l25$_5L0h5!Ruo(Q~yV)==1azsMuqSmr_XaFf3nWyGOn>XW@c@onniCDrfjQSjd5 z?6@yKrvDio`AMK>!nvW1g~{@Zd%R}xkkFcu6G?Jc+{kHNmS7wqHI>j}7{EbEuTCRV{DcjDj*)v}Ff32gm=F*^VF z61!!dHu;@8W12i&?ead@r=GY8g8EG)5CY;DU$6*54=kHk6`lYM;jaws++FWoj}EGX z>K>)1_Rs96rA2Ma6GBoAscG^wP6R|Z`hgu=T%9vWqu6Vh?PZ~70(F_|nfAQ?qZGcd zXeLi8pAix}XRxVF=sTP10()L_Cr3MiAk zFCE<%vwTVYnY2HC9V;QF8ykJGJ$TZi&Kb}9;sW)H;>VEj%YV!ZqYFrtKd~?wYLNPW zs17QVfGs*`T)Y{=Q`-cG3||(gDBed2HYWRWDE1DiayZ_9(c^D#<};gJ&ArgGhu3pL@qryORm}2JZsm%Mlmq8Y#Y}3b09Wdw0K4*Xv;fQ zTPuCPJmwBIB94SA&v1d8~$$G#+1;fC}uOPl2l7Bb*OJ4_NS7A81Ns zf8NIYWqOpu5Jrsc_#MRS*4~Kh{GQP~ki$9T4avaFrx?Y&S^YDz=5FUyyM$*emv5^{ zUn{T%JP1&Vq}u3(f4(=hzj>9FZ%#u>Bu11F7`FG=;8YddS4N%Va_Rft=hIFXYP$C+lGGr8~PZe5a%?H zcK!*+`hoP1e5jhXa;3?mxtnhfz$O#K+op~Of`D*|W+2$CVC?#N95IR9 z4za9rS^_mBR@kU}|K?Ua6L{$4Kk+;54xzIcyV}dNgbEcewXge=njRT8`P(HMwH~qu`PGl zPk;Tk+jb@lkMcx1Y@p~Li ziR2YCz+y2VN3`TQf0=41RnEynw^=` zS;`{Gml?ru!Kz%7oUK~VCtDdgy*V>~&R(?Ft4F>x``}eZ>Xv*@UHIR&VrX$?=U4dm%kVA>JzqI|vP0jE>%)d(3;kW}LeNR>!FrNW4iDppr# zPA^ukV%>Prn8Y(Hx?s=S5E}3L<;Q2~C-(!TVGd~_i&Y>#4Z#(_ZEqSoZQ-={ZI+0w$Ur+vP<5mVN*a#`l3yN^^ui}RBvL!;?0y?HaI-12Y`^CCqbeaISg_j|EF(249J$Rtw$8DQs#tUXismk@SG2eK7qd`|+#vWQ)+Zj^ zOn2@teBpKhjTFls0t9gYcuic*=7BeKobOvOdn_#rjvFifS({;pGbbut^s?QHcr0w} zlD%R+DMtL@Z%$?#pg|X8v=Es%Z?CO&IxHOSWCO~69SHD^!8f(QbW3!k1HJ`3IP`2n zyf{Q)|BzpmRB3^M%LaB} z7wQcvYln}>egy^tfjA&Qg!D{S7a8pRkl=fZ`W5e{dvmbKDBZfZv}3-OZrEls>1guH zbp{nL%Bb3U@XzwUzJH${kA)Q2E8b;}j+j%<@FI8|JkVb4)8BoM=FtLzCkAM3f$zq37#_1+o!0=!oiYi!;i%TQ$`vfgln?+Y0lI8qLCe{9!+iz> z;bQ<_Pb{cuw#hW{0bZQUOTq?9_RZ2(IlhYr+mg-Vsd%MGgrHt(t$uenDANif?95-< zGu0W39r4{{E-OxD7=x=0{{f>`UP&N^h%f)D4N21LKN}vDYNxLooDeg~Z-k6#vP)m- zaOW`*$Vj)+sLmBHIpV-pL#(a9*aHg{3I-e81HhV4z5zQ$*!YaVtm~vCwoLOB{EQ&? zQZ$UZVj&Y4fh?tdcFqR5K4shX*>H));7gW=`oT+L?yVQrw%>zuO8_@|=-_I99uxQu z7*O}Y&jn+>z#X*`YwvDehQDVsBXS3R`GHvU78oP4r|PPkjL0EhzIdQ`=FJ8!1CjdmiC+SV zojJ|D`#CO{z9Aaate6ITgWbcyh%2UZ5yq530AO+gIHLD|ApMgQ#xJx-3D+_UWQ4$_ zI@R+U1e65<5hONyAIu`ffhQH1Ez5u5O5zOH)&eUtSgFDq3kVYYj}&srFH-Wam4+j> zZpEtugfwfd`!C{Rrr;*8)}ETKEZczD**Ao?AQewSyP-eM>`ffJZI-3@`DrAix~-E( zhaJ)6DCrC@T)p#u8R#vOb}j0!no`BEp9QY1elFs>_cqBAQPrCpuY6^K90>!4paxQDsVzg|Dk zs!=n0aEkGC#{H#Bn#Ya$Ra*bt#I9m%sY8Oh8vj@Kj!~hHxRYGX`tXZlL8N1HDCA=7 zq{=h4akp49<;!CHOg3#ALgYkl3iJ$4-tU+8{ztI>RT2`5M0{u|lr?cgTsVF>FcoxB zf%_pe*1wKEL!n$BXB^9g0@+~rdK})JlAW;$DY;>Aof_G8>>J5D0>P=Rb<+OBY3y*! zlu}hav%Ep1etJ*8@HNMzgZ{|a4})i}%1D7;nipR`;H<5N3`(k@x|t>*Wslod=klz? z{hmsoelpr@oC4R@R%swkk;#LDLLNn)Yl(zhslprh$u3@pRWN^M1Q}!#FyL4S(T~uf zw_qnC9Awc{Zap3UU;*a7KB5@eH`U=6<)7CQ6l7!)#&v+8HF zFiS%M9gtzHtN$76F@|e1-+(ZRNU35=lqKK?WbgjUU9RcCNem!w?XLER1T$E)CLoAj z0vY>i6x^jAuM*#2!d4(BpspQe*nBIs2Tq+*04*vW0jm}WQZ?QVIWCu~aG*bxa8I}X z`2WRRq(Ni+K&&63|r8OxV!vb%HQC|ZmH zv+_NvSs+W*{7__NC}cbb^9Q67KRU4KjOT~U83M28;>NE=as1ukj1|%PSU)+!KNG;JIbLn4j>28J%_aC!(cvibEU?t>C$sl*-ji8Iz%IQPg7;lrBm z77`R&YiqF~@OLR0WnEwM;So#l+q<;rzeWgBvFwy&_=z}p5Z7<*IvBKP8K05sgw;Xj zHilfE!=Ibx;q*nm0bSk6M_`g`_mj{3*KO$uEXy(EZ(kbH##i)|aY+*SvIhB-9_l(Z zqK4DCHG))t%GO`zPH@@ten0{lzQM2Kvw4$Y|0o>3VDgCT!n@c}q0O8~b|E*7%3<8J z&LH;JNYGGTe6d#3@)=0d*mLt+%8_2qUu!3koHz#$&Y-jE%3v^7>ian(5-9!%Say1a zAHbR7Zen6G{OJ)>tPLdUIRMWF;D=dVgUMruLGB3n2oyJa`4R;(1}17CPnq4H>8`pm1sKQRJ(BuB6OHh=O6f(8{af0$=MAqRrk=Ht>5 z&7TDsTY`iatr#UF!K1ae_utKj z@n;H48wy2os7Oke<;b|0u~K6;c*8n!+>l+F`V-C67bv7Wh}X!eaJxvRhO72BGV#MJ}r9HV`j zwV8&XJ@gBoj(HSyY#WH_1j9TnKe2IexWVrC0z?##GZao8c?fL#S%G^qU|7M?o}^g! zm%>2=kCW#uu#P2gBL@oIi>o0#3JTao`T6_?bUHqI06%wz$kb6-bh#iVPwh1kE3gZW>5eh`9^ z4$5fZrqYu=#x=~Srb8t9bKj{3&10s*e>4qb4Ijv?#K?_;Bk%qVA@zU9WLnrN|Z?aC8dZIuvQbMOpJ zBzI#!GE_XAnEDIJ6V0E_R__0|7@ytxyqdq&XpzUfwiaTyFaIwYq%XDLy$aE0hUahox0RkX<-8;@ zX3^%O?;`$LXsZ>Ik7tjOA$?p`zS;e0f+TZWOxyV=40p*}2KN|Xfnx924TI%>`S?75 zjqR_S#y7CkhD?!$g@^%jROIkC_vdCHD5>Xe+ek;}2E3%BBj!|i5E+3+oscI7JyZf} zR_Dcl$a57b&m{*!P@Fw1g;LnvwW4&g&(BZLsP4*V5^h%!gv#5jS_sKTSwcxTD3kNE z1T$WJ=cGK;tp2J%Y7waWQn_Z~lyn?42iVxK4`;+t)Yc7k{PK%K*wv5Bc0|!j@n9u& zOC*$@`rO)Ot$b`9#x!#I$X6MFB)W=60K%dW>`u1b_<*NcCEW}aaLGi ztxJk_zKCf%05%aguE9YKiI!`Hl58rpMC&rN(X&DV7!5wQY{uCBVOU*VTN5fg(9eKH zs}XHY!Sq+K->AoKLOw~uz?@|y6%DR@b@rB@YNvI z182VKB}hN1Yn{n?Q~;q93%{q3UV|MXXsto3l_yWM3JcyTxPMBoCy4r~=GlQ7;%zUN z#;Shj!90dX@ykL#+q`j#r;3dqm5u%QaQsg5@3KxfRm8ZyGrRhI_NfI^=njdl8}C0{ zccs0R((Z&JDWr+qUq>i@)DF@98~c?Sl!$aIyY|gv7|F;sdJhQM4}D;h1`s}r8Kr7! zQu*H%diRO9FM8fQeg_d0g~2Hgwdy~^*(V#S=XCxB?(N*7PzD_~SKjsfq14SH7WuAA z&(hVR%G|Jd{fFn_0TP=*ZP@g_axR}Sq83nnu}tL$)cAh9fYi)wGQbY<$>M*jm9-~)?iX}K8rN~D{IBg2e^oJ5@A-OU`{G_In-8h3-5R_^ z+W{JVWr07)DV-$+6bj5X10Jy3EjI0s!4`wS8h0!%tHDYr0u;Ky1lH64W3~arj&|4l zD~@SaK==L6h(q8&|I+5`sn?(Go%@EJ%f}Ic-m6lo`x9^jX7!aBv%|#16#d?h0Qf(! zO*#cxecxX9o&y;M;F)Dn!0;19n_MLr6hNxpTwhxp3*L`hXQmJPYH|G*8dUjHvu@}KAz<$>J%dOk01tG;M${E>cUgC!1EuWmpe_4Rn zM+s}PrE?;nrxQFQbN@E+%m`Y6v7YBYoy29>E-?3)iq0u@;~Pj5{tL7H0TQ_%oLXK&IK_+J**pU-i~q4~7@EF0sPCzKBXDNc1085icx~+gya> zKb5_e2~lC4JREF`CTk-j-_x|RgKw+fp*7oHCN5kPR(H&L@+_y%OzGyzYvA-N(qObB zTvIu{eS;WmOS&)yK$zcvi39WGKp-};0IW*^v1Oh`_Xw|#=J z3LDQ*78AB$xiA8x4rRFdI?=wxd_j^NXtJvS0|@# z004qMClL4a{ng{`tI3`I674a}gpdJ^3r<~P`la`sfaTrieXeU{eCC1DEa=46hO4bR zS%XxrYiz=uK!Wx2>@1`C0qYrWM#CRN`#;MCXjMA6!~nCgE`d*c-(5LDnTbbKE-mB^ z*00OU){}=P_QeeiSu*a*pO1?VglzLdy*qfmNj?AP-(pi&5#aXOh^NQGlYXDqkoI)A zLYJrP-oHMRn4AkY5W-gB1tttk5<^g8ng=RM0;&_@uk? zn^^1xTdPAw+?&jp4uddK^r@4e9rN{YSr0{#EufGW-`^ zUHA*vrE+CwL(_4z{q;D5oPWO8pF@AI$>zw(levT1wm8lU@&KJf3d8)a3R;y+dF4nNsLWR zR{j-SslrbXAY=$&f3vCq2)jDA?6VJW=D5h+tOJqJmfZ|u2ws~ z>DbnuStzF-NK#4qeBjODPFTos>9Q(Ui+S~6cPHt)hDYn z%)lD%^$J5l{`vt^;!~|&wS13hqt0R#>w?+g@2njuX8bbU#+CE#m)xmpcE|xJJm~dj zt=XiKj?wy!zRr{yx3{+!AaOi`|LKIxN4A<8tbe3IAKugvd(bB{ipzhVAxtlm+Yw1Y z4w-t_>bGW?u6Rj42}#&aIng{PtBHHIV0X@!!d$;y#X#^@ri7wSQ{`^*Xu-=D!{FJ| z9}$RaVi`=^oK|sKxddSHAzY zzI@kb(wM}!{tSA`P_5~3<1bcSMU(h9khVuIT}syR#=mDysJ-NQ(i5?35LO%js%rFF zo+3(SIJyArwaZrJ+oHL7#FV*ijGt)q$nwjmk#JQ$A}@bAA;{tXHQ^-Sj+f(aV|We% z=>>F&)FKtga`-e0ikA01gmjD4>ZGR3D^n>|((w1i4bM$?($7Jq)OGUdGzQ8Xwa6K#@AEXD50iUtH$r1+lg?z8a6^usR(x*CX#@^+x|)W6 z-g=G>qowJ>ULvUCH@1gO=*eF*{neU`!ZmYn&*RR>3By^^c%_cUPc(aA&*Oc4Vkg-o zJrzl;dsdlkf^5*Qx7#HQ)D$G+EBbj`WjGLZVPiAqd~n~Jrp&2p!xZ5y7EvLK__?*}NBF~9ZT|=y z=tnpkdW$C}Bq4Q1@QF0GRaOVdq0#gl-v@g>__1gw>vYlZ1QV5D#OJ=5vO?5luASli zgJj=|+F!PmK;&PeCj5OPCuPr~uk{0#I%l*}c%~UebvY^6q z%7vvh$G^ti*va*Vz$3p)_%l&rRH;GyAD+@sP3fKQ&aFyteyMul(R}ptRz+1!I_yt0 zXmpn~HWA6cM~tH@MC6=5i5Zhw?8&spWXlqNsKW>_5OE%6#`6`eNL`l{*cq8e{2d zgUrfT6#X3@i5c;eNUj$yhai5)aATNvnMGu$?tNn=w@eI6 zEVS%(4TSR_K;X!VUlN%3f(%R33>%DmnI#YQ-#XF@Mc)YEAv#Mm_SB2z z^BxHLjakU35i@d(An~B@qC@#>Qn!g8LmcYAR+&P83e9RvW1b|beV%cBgZWk}*x%U^ zN8o9+WV%#K7=bPWnA{&694HA5!c3+BSOE!uN9YdELHa7ttHAA?^3fX1ia-hj2N;lf z$VTM-5C^b}jnHft7^jY~7_E8vEY1xkVe8>hszj>XC$MXN(I5J}df1*nJh?lL=`%{c zr8r7nNK0V*!uf##aSYjo9>or;gVZggDZile!#@Xh+-MLXUECz65q_^mi;JMH+()lX z++EAf%#r`f;gyCC9G$A4XI(Dc{o!g>YvhyD?RR~iKYoi&<_T;A_oNUpl?inRX|XQc}!&7M}dEB(jj| zXQDN#Sm&nz;wv3Pg1(?6Rk9nMqzaU0QY{}2$1H=(Ov)4WX`d0INt6Xxe>MvA-@}Mw z)1bpm9?#&t@c|VlbPDW~assu^AhP1c-?dp$O0`Y|NmYa4c-M^Jf;JteHvMXSTp3z- zBv$87TR+UBx;8s)jZ%rBRM5q<#OmUU$!57!t<8#7;^=^+9pe^4=h<=^M_Y}5;L&-C zMcQG_jG`&U`*ZAdftw*M`DFCr5ZW3IeSL-$?MzX*;(%qd4DC}WdRwXhkLo&sbr*x5 zhadM~rAn3jS1m1FlERSfj0%7yQ}3A>#*Tkd=grz5KF>7f7VL~Qt7?Qn67lqOV}mg{a&q_54_+0*8V6wxoM&0Bt1rL6PJXbCVYd1X%je7k^X30L-s zix5~X6jZik0PG|A`5P+bWP>0CQ9eQ7Z-d7$sB?HYUZMP@<@M02lV;*Wn|aj zVRju>x|cx#`^GdRm~pBVxSeihFQb;$9r^E$yi_wK(IIMA-cvfRC^?l2evuRXP`!5= zuFNBn5ymfLN__HXtSPca%W8hhC-{H;qRS9YP%?M8n5xW}hqueg_0_)Z7E|V!^%0ke z&r$Tu*@Z7xrQ0Y-G^0?jU8mY{Vb+!|5pY{LtV*;+k03sxL?uFCdHrckL7`O@zs=!{%y)wkMli*Ws9#DQ?OLQ`dl;C>c~@Rhqo`S` ze!78zmz0Pl+*j`u=9APM;3F|n5OuCd#E|!gj_+|e`-S{%zwyRqyI`(5f~n}z*@|l?%0wQ zG<-D5%4c0CCP&rxYEoZBQf~d(^}jC_z?=*txjR9cFYrVcHmIjfQomTP_tt%VpSqH* z_BS5g>Ju(mtGsUA^xK_PTnCb`1;*kmc&GFPTSMj}N)7s{u_>VR&&dfy!8`y7?7M9> zia@$0 zQopzR{xq6j`2DP}ra}(&w4B|opwb9GBW1}=>f0M1yY**$;iK&sxVUfMnmLNw8mNGr zwIdcYkY55A+iPdwTc&nGQLIlk?|kqe%+Dk{^9+f%h+jUMw`-{h=Bu69;uD_$o^{o4xO$EIsXfUC z-5zqRNeY!b&~P3fmAmjuI)6j1jVqh)Al?O#y$>y*RsbA5q2oL)nEP0$lTXKqPt7>a zV?i09YpnrtsSGbDB{Q-?ip;mGedd%ZpBpaxkrA^MYM{^jMIY9#iFEb^PxdfnFQ zaKq|X68xg(0!u3?VsiwJ=1Vp(i|1+ncinaWuWg@D89ZJI+aOPqc?@aV~;YQug3jOfTP0Wty01t z@`j|Txs{JaCVLrGVfOa?Lrngi%)Q6$lmYcPWm7;KT{va+#g7FnOz^)0sy=W=S8JaE zVxkvJ%VA4B50nUHVNh`fgS^Z@2mtWOf|#WR@H8Vz(_@3=>{(4*14>cp>5Fs~%prA} z)M#|k`3s--^_r$_pR&w3O_>O+LIij@oF~_h!uB-7lGrVCc@oh4g`FF~fZX$c_^1Kwx~>#?Au( zkAnf7#qP6>38aonxb7WQZi+`4c zeBdFLb7y3S)WNN#rguRz%uZM)*in-0t0%3^G*kp8{~X1`mJGE6#jsInQCmdiQ|J_a zz>w89|3=>g-qf0j0b%!|>zp0*0)vrj?`CnnT-bm)urj}S^9YukUa&wi?-dhR$82Y~ z;R^Ga;kPa~&!Wj<=i$Hu>r5m5zDDf?5)VOmrcn9*@+`l+^qjwW1WA~}-Q>MhQ(v@4 z0(szA#kqMrctz!t$$LqD^f9~ zO`+SesmQX9nZ81upSRG61@!Uf*iceg9Y1r_0X+Nop>=&iyD&MFGEl&7 z78dmrT_L0L)Pqj}t|e1DGn4$#A9)J&i9@d|`^Z3~6d*`l_clz*)qkft?2j2!Ur^;I9 zxg%Nga{{vV2ruvOZ@BGjtS$?!`2IfAYhqFV{@`p~=4ct6IPqOHlHFcA3s#~GxrmmR zZIISJUv3OFoS%=Z&#J*fR}GYp@oM6|3PlqSd_<*{m3SBW-L2=b$ela86Z!1>Rqtiz zCD?k(vS%J29*W+sS_z6Ajlqh(K=*)&hQ<|8IbdZlpfdw>R6BOW<}muDNV&B4^uO;F z>=~=0aBR$8P_Oc4RIY$W?>92ZD!y^=Kn#(nnBXP*_C9=kX4(Fo2PvfApl!v7<9%t@ zG@-=pCqc${#_dsBEgFsp`RerUkLa@yLjPDdURe=lH*g&$L3ImN%%7{!N)_1PM?RXm z9G=&D;ZkzirkAKQ@Df!jrWt;6X%)dIQ%Zw}pbEpD?4-#ypBrL{@cm4Z0EWq+SZxxBBY z3&&?cKO|g;A9cNVF3~ijK7bRvSyRl{NUkA#UF*FbdC095irR0aXN%TS99_Rqtt9&U z%MWty?^tYbVHgtEp5DP3jg0fmh@tmh$g*$icgdt z4Vk7@boao;9(@dgWE?{V>cFm>)XdFCB?TMYr*O3Kh!7$Y($+DJ0`_NdDrnY$U!;Ru zTJBA3d^zDI=`Ua5xvid;?^<+zaHlVQRge9p+nS$Rm-!t?3PaYtv)T)Y)j@XSwsrKm zW+msQyKzPM=*`7L)(i61h!B*SGCqVKqKmM*Xp~b@evW{plcqn!$Ghr)^Q1sTNQN3c zrTVvGvw>Z601x+{V4mu{^Mg4BaiYMuC(u7}s!85CR~)pEOv5Ce69wdDYfsKOZui|Y z#1a{Cr`B$X<$RO?GYSbg$vZD8I)$*=(I)8apmPP2O|tdk=xV&_dsxv9;7_XZFML3F z+&RdD6)KOM{4QE8!|pka(mHUNOV-P1)^ zZg2o4ER(k+zB7g^RFg=pq$s-lkkYJ|o=p+Vo_XN(<5%btNMa(*qtv#Cc8gK#(3*uH z1@4%Q3O@>hZbz(=f~m)Xd$AB9KZ-9un7b2qJ^GKXQP_7>7Q}adzB+%2n+MKVM9H-D z^!W8tDMmiNwx%Yxr5c?*cIz+PfjwYh0;^!TQlX$f*I-x+GBjWd z(Wn>y;2_YE2?`PZ!(CJQqaGV|t$`F5m)13GP|W-LxUk!p9gud_vkUlW-ekU|#n)O|uO&8|DSNdSUcJd)shM7B zCe>0vMl`p9Cw0I40zzSzwy+ z`YWE?jgO`_yRWL}D}IEuIZoY5*kitJ;ru6pGipe~>kL{&_t-78K_U(ZXas%nf(io2 z*d16mEiWytIk)%yJK_M6Gl<=+USTtQ&$tOvIwo5H%`02va0X&r{({T>cq;oR7=}K8 zn4xsVa!^?T!V2sFTFeths#{Yb-I;#G$M`}40x@hfu@c+n9-iR0XuN-37vnyiz` zJ5}mNq{I>Czu5HN>X&Qar=9Rpio-peIQP(0KAOvKP_RC~6zN1BXFw|w6KLCu7tITV zE4J|LGzLR&$BtUmL=ZN~f*jBu;!AJ`* zE}(Jh!-prH;zL}Qpw}0K62nL;fJ7VU>vw?j2E^2Y=;5Crg$rbDzKf3jy-@gY=q0?d zzU~A1D?to0xTeuPml#Xp4Xxh$AKb_>OVup{hbWw{-+orDW7YmOeXqp*p2diw${cX*vt!t7Kt>yO-49Uil&F$Rb z_kZpz1f@AY`dI1Um5+5YIpNODA#N=0TEpo_USv1KG@nd%Gbn&)v0b^ljr`Y2uC48> zBbmOB@@0YMoA*T12Ogn5f**m^>GWJro^G<-c-^`RTiI~-kDSYAw0788$)4#1^gQ4` zQS87#;|Ze%CQOjR!|(#_-xa8&0Ue{h!ug<14CKiGfU(Gd6ap#Fpj!wR7Z*f{e*^_G zF(sD&`RLdzx@a#w0lbn(z^${B)A^MtV8I}2klPG=6d<1!oUX7!a8NpS*?CM!E<#{> z(be*U!j?w`!~=bxyZigmRH}?0AK7pPQAoH)t#ajPET$t+yb#xps9_f8 zvu=<9!hv0lQhU=Wx=rM}sa5;$nQ8Xg8U2SC`?TSPQjn673LjR&-WYthPzwpuGL>(XPi$I*#Z-I>`?g9rEE#)c ziqkX@K9nR|W@l+>mo#)XGYR*MRpANTbP6}OsBf#+%`|C5k9&|5KEa|lX_L>T+0ZXx z-_}e?HN~AiO2J=wLK!>2xK>5Jw`Nso-0kDS_*W=@2JH5V)P927F3e8euHpZcYY5z& zL1cQ2RlS^{c^&X;1adnq*U zn=m`CL)v3zQu0xpB)vzzxzWDfa1~i+4insT(J~lAkbcY(8_}Siaic}}Nwa}GDDyfxb!S%a>ih$hFcgALj+*v2fi%gX4W zlarJFI0EvR5=O_S&~iYpfW--5{6I?Nr2yL&c^2sU61AB~3&zv$BLf|#l<%~FJ$j|e zL=c@vvzUho*)Oq8xc_B<{z<_^zY_xAOH0r^20k!+3G6j#tVt>A z7izVQ*-`p0e|ir8MwXV-CzOzbAPHuE!nv-JK(fq3@EHDGT7JfTBq*;-TrU;jy`gd> z$MTIqfgL)x`nNgcWh0s3hyTD}K4lnh;>w;ZkWTWKjDf_|G^$g^Y=qgDVDgiRB4f{w z2^Ohh&P^Taw^C7Q_$PEodkye6v`N)UBL}Y%uEc)BE3GVbzB-2DdS*z%mUQ$hHOjU1 zJkz^8!c{95P8BhaWV6k6nJ}iW>t9@WFR#72(f@8v^}*mPV&Og<4HM)4V(+cPs%*b* z(M3uqg3?{mB`qz|4bm;$AYCFLh=7DhNK1Ej35t@^rG%1Ff`lL-vFGylefv85+NZAb z_gVk=>bur@o^?NWj5+6+W8k9&w^h1e*-{ZmUPk&-=@{{UlB=tD&v#7!9Z9~wEyo__ zKfmj|F6jCUE9TCMh+)f+?5E^8{l2sYil_mKGYScz&mtLsn+;b;LA9M;hExx7+qgtL^8iAIdEcPYMIJ4?K_K=eB{?Lqb0vv zY4ONq4*#?$UX~U$E1YHv-PJ7O{84qGo%6F0O+$-S$AaHzgA!xnN@pX1>PG_S52l-X zdJ3<#lDHzw^zw~eQ?^vMUDMKH8n$(wGja9bA)QsVag27C37dH&m}p=e6sK;c-?ui9 zmB9Cl)Im%_?xB9Sp564ILUQHwwu&lc!nW=C`OlcN>mgNt14{2&`K!o98|8k_p>_*S zo-xc9^lxS)Ax&Fa-;Ux#pLuHN|EwWX+5P?cvF|7H2Y8knN4Hto;_=a4CrLh03P|Tv z=jNjJ8s~zRh1hLtz*ERYw8diks%ma+sVExz`LS;x zrIPO3DHaxsx9O)tP4_9eqk39EesisIytu6S^!csK%8%w6hZPPD->{{W5>q2*9)SIL zU&=#D1R`{RMMHzqrM5L+Mvs>64#w%NTg~Ir-FOp~RzmJg2xWE_c{YXHb(?*^j%{Zv=$KSGUe1p4-nZ~YHp%;aj*)%qCKqQO&arrFi)f?Bn z5s0^gI8!=dN;D^rlge1Kt47qVeo9gW1b|r!Z1T-zQY^pQy+m|sl*vyV%_?69C~41b z-QO*sJsERee|NnhbQzIU!T!Bw(Bdw!TZL@6|25IYq4v%>TIrS9H}(zghAMcK+6xyH zsrc-KiI6rgb8Qe@i!(73NnqAwy;DeEm!!G+^0%=2^g0?M_dC%zHWy;B_UGtK#40i} zHMh{AZ+IIJd?$|ap48t!A@2L%Qo5b?v2gPtCqS=BkNSG(Jqs(8p;eL#q7?A$Z(kEd zP1PK|DCbp7SUkMTfp{d8mX^C_sQ70yay1lH62a*UwFviKzETeK~GF0>wT*So>=IM1@j<--#tnq9Z zv&#z^T=S)rSj2l|)jYGZe=0sD67vj!*t<(8`$6@F#k1sfSl4AgB|-;LQYL8Ln1=29 zl5TXMvbc`ki^7*)iHImP2wgr-E4(X0PTBOBdo3E>yfr4>&lR!_0O*>YkcOT4T%MnT z1VK6In{PY~DR^6h;9<)v!dA-YtMN?}iy!;bvy`(C6swUz8fCZE;Be?w-lLsxAN5N# zM`MmeOm_~Up-Hu~kISKdQb=ckQu+Yebm`si4qlpJqaR2AevhJS7Vwbv5JKwHLR+b{ zJ4=`gvHr%7<2XLu9zz0cdW?6b+eW^2mKd%0HhNnoU%FF|DQy2k~z$Vm~4ecQ8wk55Qy8BYUm z*%h&GbgLm06%~a~QPWI8a zaLZFP&%k5GMR2S<^54CV?iT#G>HD#n=lx;1m`S~NkClJf8@y@dlx51+uB!~NB&Hpo$ptj0l!HYojOk@^*bSL|6d6zW6<`?-7=a}9# z)EzfplrH57@f7C%;lej2M!&swv$|_|PSmY($F`3kJc8pbC!QhytNZw)M(+{w6~SK) zi+#KWjMv?{FZP|+<=ee}q#b|EpuF1^k35n*e2`w*EPwA__W*yux9VW&kqW-o&2P=) z{apM8$3N*^R=wYQH`VAmBia~U(#fs$y%!BZyDpn4V(z~^w9_Ve&o_CmY3Av|{L)&L z0?|;Mm~{n*kl|LG^zxSVC0dX8HT4Dont9v1otEC3otG==-X1uVCIF_=6#2fI&y9I^ z%-4)Wy=jCs>Hz#L{4z4kCA=!qzt}taB!bzBql7QM)0Z_s#iWcjWU{|^9{c3i)%Uu` z2N@97Lqb9xqHYb-rk~6a6xO*D*D5d;ao!A)0*W(UXyr^Y-vuefp^F zvZtN%CqWnQqXScA=#vh_byw2wT$$K^vlL%!mbnxC%dT|RrS#tHZs+w^p)wBBv>hL6 zw11bR9SA9_{&nK7d9ht`H9JMk8RR>b``3znar=+*cMeuu%9JJM@63xsXJhJN`4v4o zIyQqL&s4uR^0^492JF7+RN)!@^L71j2h@7OgE$>BeqBUIUZ9|Jz4lyK`gJc@&$^m^ zJ+q;CdVV)$(4@g(N=j7acD};tP@&Kur(pV8<{zUUSFH+%Ujk7vTT*&@j^FLP5IH7w zOq)}qx@Ug3^JKZkhv4@O@;L92g(JplR{mA&=-Li2ShE>@6!RYiXHhT19j5kYN(sI=G4^ z{00~P5BOf6PDSV4ycC(k^FM)qD!_m07gz!mS61TfJR;a4Aa(QtxemBPu=sUy`bm3L zO7pP$%LA0Q{fHRBQhuG?pm&eMilwR^Vz~sNp)?zFWz68vg^dm|8P`j9%dy^Ia(X0> zMHeP*@%g%Cr>crc>e%p!y+=QB9a@BRE&=$H5bfT=Cx512^wm7)LL$IV(_o>3SVQ)~ zawVUOrghY7f$8^u*yc`N@}H*G&KWWVa6I~{)9rT=1F3mulN;HKmcM#2dylY}&pTfc@W4XkbD|=e~^zw@HU=l7Sud zTG#_6_sOrz_w_6&%BmmW=g=Z^!ZE`8!j?PSIP-p|`rvCYK~7)%PZJK8ZLVb5aap zeh~iLvxPk&e^2|1L%0SH^d94{hdscGF>2RtkaKyM?ui+0)$s<=WMGzA%tbSvTjEk? zYU@OQGFDzx5_v4cejW1`X`5!zgppzl-Gn8t;ry)&JONT25%vg1?Cf;RR2${{=4$~} zbxK8i0ayvEx4MMSY4Ipiyf%`;iIprTt$*K6nyq*)U6%e|7;cP32A$`a?b&;C8^RtF z2LeQJT_;~{eM@oQ{P3Fl`Ssfs9IV5%C|J}tpPz1sTXIj3k?ZrO?Tx7w;aV9D5G{IX zZ#E|C)0&w{>A%#$Q>s%|P8Be$i(_mlPd|>t^;#GMKH!l^9-0D53L<+#Xc-qJ4)IbK z3u6;>F#LdL*lmI#ZO8bEH->59-8XG34bnsLJ39FTwA4zt33J6$hO8Zm>~z5mB&^za zTq)NOfXJFZJGj1WIW0ZPAHzcgDd>{np#JUGd2Zq!sfAE!(A8OfsFSpgg$SD}=)9%rJ`pAYL$)=dQhaVnxmv+k6(2V{U zC0tq*k@OysFRv|5+Ol&>xuIY^j@7cLeM6BZrSoIaWi}RNha&{!mIiY<~C1quQ( zQy{#;1${{LeEeG`_YCuNCqB|8-YI$DuQ9M)EvnBZ8vo&i%v}tdhzVTnKN0;_2Bk01 zUkMT*q`r1&V835`ZEhQgSZqvl3bOCKjeE0$CxH&%|4gTkn&^mq3KB?B5-HtU3!n%QGnblY1Rh9i|F}ej*($q8`~G50n=Hu z!d<4W+LjkVh|q~EFEq<4Ha^jFW3Jwr)qOM?MAyXXqn60?$6v0UFy#Gqbb{j1+S*#4 z2UnP!D!_6`v;#87is1=~9|>oYc-ogYgB__Ny-^`3_v${XWt~Wimb3fQ`;iUXxYI(c zWdlj52wQ_^0;A5o&&{o0?FLWyAkGlB=t`OvYK=ymHm?=S_@_2riWuNyGi$Of8I}vU z6lft#+on(w0_{5^at5{8q9^T)x#@#T9BPe!OKe3~evUA$n*ue6^#>%OA zp>2o587}AiNR=N&%6?F%HYh#h4b!#ToWy^IoKT~}$m1fy=QhX=g%`R~G@q!8c99u8 zN714M>}|!u$QC}lY;IkwRL=Jsi_B6u_F^#=HT&nEsH6F0--Oou)6(CrGGWD zyyEj0an6#Fr zkKHTz+2v)cl=pE15>GmnL}yjuXN4bD zgDjybm0GC+{<8i$x%#DVPt5C*C<-W&5>PW^S*J;CNF(id)95e4vISjgm?~3V@r>RM z4J?tC6a5_?9@IIqjiF>e%Fc?Ys++Z{y?g21R`WUIb;ZWj%T2-}f6I3!Qnw_YFO^0e zenLQEWWXuUUjFs>5#7zGj6kKd3QDK6C-3x`c(lyj-QZC5b z=J^YGg~V_z)krOyoR8x_tt6H_61TTmyh?NJa3NEvlYe{6sy_H8WoK`>P;^7oK7Z|# z0eqUr?jo5Gk1&|rknVQhnH9i`V7L|e!30ZsX=Z9_%~Qm+Wey0HV9tV6`PF1$h0fNw zKcID@t)~~!7EkwB&y~YvV63do)?rU%PiSbv= zhN^s4&8L=_9MrPdSi7U^+BAchZF^>lP6B~LXIZ4O8uWB2cG;vP5}uHM!$~l#7#LWsSStku-lDN!lHOf?sxq?U5R~Xrp270F4auzj|G7YOH zR*hO1|E0>TW!IGYeK)ROFUXNA13oZ_SUYcN2j=3(a0=6iNZVasj`WDSaSu{)zX=j8 zI_SQX%rn1)x0XQSGnIGp?k}tu@|S;MGWI0Gzk**gK?NEeEEYS|uBZFp5-p7tejU$J zo<+gGLxPQs4Vh8v5o%lDdZqBe1Ow*|@@&J-z9F-s4Vdn|dGk?_rHZn0^vpI@T++?Z zLi@BrYhHt9TyS840?v*8H|{_2WmpurQp##-?!tHRi$PY|pus^03O{nv(xF-<*L{fS zrswAa0h@!1+S!rjJX2SIL@Ha-)^Xf304EqFxw*N}qt)0bI3G^Se;vL9+Xs8& zIfylZ_|F|CrdXNoB-5&OH(|Pd96SB$DFA@@IA~>4{-+1l2q+J^_T4nq(`y1Z8_*5< zLnq1GdgnhY^1~uHPX*8#PEAhwc9>FgTfI(p1qG;)EODgXKi|{Ka}BE7H>e;}3?q{_ zw0(rMkqnth;Wn!*DZ%c4Bzdy<43K(LV816b< zp!G{`oc*?U1x#RlBu*C5qH;}vVFnHuJo(nS=8Lt#SakdZo?c#%*>}?mdH!+x2%!1# zH+OjZB~Z5hR+CpEJ!GH!F@Xn}G6*5X;~IU+Endy5LbXhAoL5d*l@c{CL;5`PmqH0m z0CJjLq{qR4)nE5}4%!_7@fR#qcR<(yN|U!EWsspGxOPI&f;AKU{2EN(`+&^@p>rIM zT8wm88nO|e;Wy5R>R|{&wP%hKAIs(26{yqziDWYbI$f#7VBl^dHK{J34IQ6~N}TOe z?xSwIaE*A#ox2HohmE=c9Vf`voEa+@e$1bjhv2tSTvk?RSRs+E&Xy!`y88O+H2Vtt z%05HGFa{dj1KhnoFjNXvHf`VX4c3D64Kx-H{`CHvVix2FF$A>FvvYDfLjerj40MMAtU>!Wp-E+Jn)aY1Pd7VT*AkQa#0@#uuvI;4}yL zz|~h-_WgTU`#9ggJkl<$=Vf>gYM``RAh@iZz~xSX`IT9?3D?02{{W_~@PdNDFt85h zr*hse-VViz;Q+DoF&sL6La4e)`tH45aDnE1P;r6!61Y|3W1+*D4z+`&O1$vv$a~ZS z<6-b2gqT9watsut)WE6{I5;~wSF7GHOm+RM9fPvp1Anic?;yXx)f+mS& zWtusUAsu0ReSN)KN-#e817w91d^eC(KKP37frmp-Hnkba>IghNfHxs+HFn4>!kGk- z>%FR`2Dt8Wva;cA_+c8po}QjaXHEDVXo5<(ul9jWj3mq$S+E8+dv_kmoTdLR@DzOO zx=k3!osnB*7Wh+^O&J&%EODXJ@hgEa&0E`>0Z>Is z2iIWsha z5fo-qI1COBv#$cja%JJ?0snT1i&&Ky`M^y7J7&M>Lx-h`1AWQj1f{@o1`pK`x^-AU z>5@+`b7iA-p^Rq-ACD#m2X5Afg<)-`{ECn2rCEG|{s4-QiU36IELwf{m%-Q6@ZSrS z8)$DZeG#-wOg8XQ7~UJ`CEyhZz2&jUc|zYFzmrA5-4Z_aqy$Xu-|R&G2pk$7v11 zr1w!?c9gfs&+E0tat^>*s+196fQY zBhW@hBBCrYvJ-OO_dF_#3t+F_)7$%yFbP^y>|&vy3fuou1zAoc3UWgB4Gf&^xIUc= z7=(g&x6bvn)$^dIpnn83??ce&$`tWpi!T5JXVizO@^W&+*|0v&Phl4}3k*Dm%JA~a z3RsbMn+QnXCPr@N1YnCha&v=J3mB0iL+yfM%s(^g?!ZZU@TT)+f@D+t8 z^1R6@Y;^SB76JujMnyQln!ZrHaGC!0?OWjMEZg6}6hQ6Yd?in<*vthFzFmMJ%!LMq z)CItqDPybMr10dwS;3ffTz&cWO)Y^jt{3+Yc#X8WuMKQY6dC>7jnp!P z#}1D`2^0|w7as<)79HJv4&tk2l1?m{V%XP)kVkenLzo=896SG4fFuAyp+N+40-)4W ziPVdz*Z~p@u9H1*U<_Ou;|%I-WaZ>;p}$|*M`9G%6f-2BzVq4I;@j47MM`P(w?z2FsJH}AlO>%e;uL|2Nv8U9QyI`@q5trxxT*63{OnI8bK0r->&w( zT{sg-S|RWle#6-<#LrJD5qJV-&f>10e$#_^c%?BJmN@KyyA#9$0E@o$dbSv&#XR~o z@sWs%YKZ4^3(TG@@j;%?a2vfmDsaQl*n%7eBO?^^o|k0M6$wuli?ITt0&sqo|`&dL86GE8=62Z2=Q52UPfhQ1C)xN1Ee5uuK!@fCvC^k1-I6WbYX`$3jhg{Jq(g2 zWdNiQT|g!81XzJoz-8Ah$FgEgV0R z0De9};*Cr}6R!L*a*#k8?L7s5S}?5hQ^<4&LP8E;WHb2J8yn9btPSc_7=q_iGPfD@ zXZHL!)SW0&Cy55X@8&-}u+KgP{uva`paTuO$P5=XYXQOm{C-xZSJi`&6R`_(4bI?u zk4@B*x9nel5%G=R-^HPClu(vaA57pwF<`u@(yy8zg5r3oYqt#Wbq*d6%r;*DZZNq( zeAg0u-z=1r1^}#en;7oFnQ_{OG)L=W2$9SJa0`lBy+9B)H8Z<{29?y_m5cLr$&uk< zT^7Qn3J(>e*(9ooZwG_+$cth))-(uZH75s>*gZTvNQ@(HXkz8W!=veGg5l8TNCp7q z2B8hdpagtBSXTX_4)|FEv*LMMJ;7ZtABX?M44nkf08Za+$YAOGhY*s)1zTRW?{Z`> zb%4`>n7{Yt_??tN+3)#zd1wfbw~0l5Y&IK7B||^BEvtF*79njX150=XY03?d3QB>< zVFPq}4qye&ls_Orn@=9b50jKR3%-~+3pO4@VCn+m0->t3>};gJJCgs5yyIrv>#2h& z+}R>tQH7fl_t|*%Xb=EZsa|NngTPxYf8|Itd}+%RB>?DwDQj=&wGu4+XtqV9MZe zWc04Nvqs$lQPmutJ^Imxh=<`QT%LRq9iZz5MjTbe#aKRccBKD_phWw+)n9L9pV+lK zIyj7PdIBTS6zEi-S_SBZbT_Ie7X}GwA%b|}{sUX8fr>}W&60^69z_|Q@DK-S^#EphIkKaPG86Yxb z&k@oZ&}Bh{(nZV@F3ZBnd4Ys?(97Z2FuS_CN*7cOn-@eT;JYo3m>)J-h9_Q`o+0K3 zLxGC8j}L3HBmFI^S+Az@+?S9F-4%jH!x(u;ZJ@~ssdSeF)&*&4tsoEh-l!Ti2e}7;i)IMtv(J&Uc>rC%W33&YB9O-zTquO_ z2qHjCQ;3fMpdqp2h9QBpoeFZgR)R_(SxnCj3c>cjyKP^zywZa}hKMC;07#g3>%5@o=5cR1Pu`j47#vtTfRTt*djt{hx20KqyX&X$7_#6)kPQl_*K6TOKh(;l z*Ai5J!<8^SJAR`@k$}Lypdv?j9>~?lupGb(LJZ{I;78#(F=8$M5JwxqrVqm>$ANvS z97tuzMTyKYWE~y2#A&>HWMVik#XOhF{PJ~5YpAGaM4c8ViM37`AybcyH32J@`YB4I}Zh9^H%o(R5*y*q2lt zgE=zodlg4SNtJ|Nl`O^TaW8<2j%4v8m4n~}4iLnYE4Q__4zJ;XF^Pi;TMRZ*cNqdx z_E!qhL95f5>9ylpG9Q|SHD_C+dejiXFv?!`j|p;b#4LykY^~Yb8-Nw?K_cVnk6F;2 zb=%P%oLDtzF*ry}EFa$`Fe1>@>~`LAs94BIXBrt88cZsBu`J_vlnZH`$rMbvPy_F+ z!*Q8hFJgqAtcziqn2?YV`yEzQ=az2^UpC9N2Ony|x4~;B8n<9_auN>9hKQq3uzqYQn)_wg) zVvlM%n=yhaX{&vfzr)&gO)c8`ArAVC9LA_&3wSkc4P+(}B_)rSHxH}5c!Byt$)HH> zc4%26ftFyRi$dfJ#Kun*tzY%py1Kg@Nj{0CM==NlRkGJX`&s_D;ovYF0RuPeb0{6B z9x%O*QFu)!jio9$rw(SbIC1@TaV5A8QvK1^p4U3je)6bt3cA`mh%*eh=v z8qnU><>l2ijBTPKUL?5crx325QgAk8=GNy+(JCn^N7*?$wckA#=i+2%mt`C%`;vi4 zYuH4`UTv1agCk-nig??t)mkTTM?I7O{L!Os0xGHt03~r7tPuGpbk*qEsn9Cc0(UY@ zot_XoCl7hOk!Q~`xuSzS3U24-(i?8Kd8ivFi|A$lO=MBaP69_A5!bGW8?JOPtwpMe zT84**yOPs=(2&z$i0CL&Q3e;Xteb~{&>3cE?R#BzekKjV zsqt~%56qi*!cU&5+U`8BH;%S@C|hD$^*!E8{L@Ek9sX8{s`>cInunQLj8$$JKI4pj zhKU~QY9t6L`JG{Zg~6mh6p0z>YX{?4UiQe(Ww8HDH7%{nPIyq+(&7jBgW(Br7=GGy z9>biwOGx>QQ2`HMiBg%Np0ItGSkv@ejt`wTu&)0tmK=6`WCCT3jb&^toh_NAY|AG6 zF79;+`?(m(JlQPVUB`fEJd{dZvKkKj$1U>^>xcbkEkIqIpGgIxv+d~$IA}wqbD$#t zdrUL@pq<0^;(2-b@NjG_9)1a@=oEc$Y|p^BmNNUCVcB=%8W$7O=|WYa77Cfi1&XW( zVz2Nyl85|H%RUtQd!>n~kz!O-#2aa{#UT)O6`U2-!kI@g9CHSxrZX&r{WB$NAMQIS z^ec^DJuNkBpQ{U3IG-B!e=;H0U9e|4}DOYjt}(jDYGW zT}WRhea(+3sbX7jHFB9A0Ct%43v&}93N|gJLutNR@oKR{v#YZiD3(NhhYRlSk zIaW+sd3ia+CeS&;F&`eL5h0t|_R!RT?Iyp!mxywm)}+R)LqF9k1H$L)@T5QqDqe$y zKUHO^5@P`(e6Avo59M#}86Y)l|2J&&-`QdE2aV9o1L)Ms$_nfgu3bpT3kgoSD)i5N z{jQQlV4r@)?l7U2WBUDe9=(iWZt;uh8n?$BgOJcV2BZvGbzC1#xZ%gF1yDBY;ds2t-5Nl8`i64-#jOknHC6Tmy$= zfL{QpVB&m-#v#a>sY{$%HAh393mcM`2))yn7o zeuyE#b27+MFrC}D8Gm={!!MX_G|=x1{fv?jEkWOP3Z%DBkXBM>TkHi|_ngTkszaf; zg6`UEcm7HV;xX;289f~xcmO+4yXZur6&HU3C?1MFEf8~mFj*|lwfBbzh33icSD!3U z5GC4Y5E|WMX1-=y20yd9E8JzYdHwM8cz40E1`MsD=XPSFqd~VOhzXgaJC(_---Or? zS&D#o11wAOz)pclM8uFQ1)=~!=wg5p6*BF94d6cYlC;nY0hi}9CJC+-r2i5orX7t$ zpfG;j!NI|K&=oShSFZmUW=sV#_>3ZgIfd<0bH5b=nuiY`K6vnK!Nt|i&d$lH9a7a% zMC{0T2f`vqoVssm|D2b{%DD>(rZf#?IDj-~`9xv{?@WMhM12kz)r!(moOX76EPv*T z6Yxsb6@3A~l^@`MkPr>zRFLrmQVJfDO>f@3foPeQf#E^fRHb3V7E%k*Clv~PXOQA% zDugF42|pF-h=j<1^#+bukV$ujgm>VHn}IDjXi< zP4AFPG-x*zBtc{yGTz*f0EE7|u4oBMGN=WdLD&YVTgbYi4{sd#qq%AlHb71q zqC$lNtyc;%nv+ljlG_%AggA>8te^fDgKLHP$k*?$K}#RP;TQRdQTuR1F~i27(b4>w z`UMZnwJdxl;%LPL$gA&ziJC%!m5Rz|2-1Nr{IV@%j#CI?1WU+JLjD{ZQsq!p zWe+TcKmKzq<#7?UX0xo#UcWx;{4ckuJq}go=k{=9@ttcJ;zdpN1D~)=a zxO|`b!*vDc3lP=y4dpD7;s&NxH+@)F$o|a@7AN!s$|UKJ*gj+^=y%wmk6?+*vK^|S za7*rcBHKSznxmy|BeR_N(yv47jEyYP(Z`|+wqNRayH?XMOB)QXhT$&I!U3_@jtA66 zkJk}p?D>kDw^Orxq6VFv+`U7T3KHX898+^Xe__Ogb+m87dCa}#0KQ3_C^YTH%D`T(=ui4x4a7uSR2&H3icb#D zxQ%RC#*#LY!u&d>#q@8JvTE8yw=BB6ZE>ISFL)uE$N+s1Hp@pg3|acGxQm|i;0)Tc zOo}8lV4Boa%4BC_NhEjQL7?;M*w9zkKU-qwPwT>BOJ<)URq_1(o16Tts4Rdtssa>!}-7wH!J!kx7OHc3lkfOg_&Rx>X6UK_|!^ zm=(eSCMDWT1lWuYsozbzf z*6PpSQ$lfW*ylTHzw};#_J9N5b}4_->F+LV)05zq6<2U#Y#t%OABh-orhX(viF)oP zo_A^>=@uCj3wNljM-d}P-fSG(yM$Ir1I@67bcXLc7uYnAH6yqR1FyJI>4dh?hEqwy zeHJ~u1UKP@4tL6$S_S5|Ocu|-^3ADI^r}{>0$$%HwW&u1(<8X2Ui_UKbDa9Itsev8 z3+UA(22P$MMjuI2FCJMyCJ4DCmlgRYS{$Okei)4;!$A8;6*qAsuq61}Sm3QL)qqf% zW%1t()!o}n`H9;}(=nJ8J<`G1>~j`<$-e^xxY{OqM+lBKPyVa(8S%b)I0`e&5?LZv z#iR^HlZY#t$wv>9p<_;eDH5SS=E{CExM|pUBJ0g-2Fyi8BHrB@re~?|vI1JKlqn7; zfQ9RMCBI?a#c+JGrOu=O51|7^8ZVhqj)G^k_CLXnWKzJSC&Yh@339N23f8 z*Kb&`a<+m(b6)Jo%e7Vn}*CK(*R#jY|D_xi`qQEU?b=uZ!brGa1u*DyQ8y z4Jxfp1_M$0XI)0F?=*FRY45zXA-wR@zRl8f;K?C_{BAD+C>Lx)BY5Z8RMqpp?;+ax zoNLR>FR3FB)MODDJ*S&$vTed3kJA2zc&O0HwuGN1=L@m=ncUS&F;0L z|6;|UNoYm>XR_#;mJzL{)kyZdxa+8GJsc#mg>`@Uxk}r6W`PCL&QEzCQQhK&dBmNd zjt0B1)%=u>>NsU;XXQs}HZQc}`WUKwXhH@tkQG{~hX8DuEk^Yh@y&^-HxH}*OA#3dvbbsrM>j*0C|K3rFsa8S_nH1!V z<_)EMpsM!N6`wx5(Kv@{$KT~PE9_Mj~n$!bYu{fVc%|cVsUm z|EzC2L8{+BZjGup^ps&>Se{|m0hb7hYmmqu1U`otGt@T82$@d^v>}wNG&w3f& z!Wpl7AiD&|ZIACOyTBGGyHA!d@N(dQI>P_FkxR1xABx$&q+RUu&6F{e*S984uA3^U zOkiN&ns`PVBjf+7MUh>UB$b1BkhVJ^G#H(v-VCZ^EhM58L*;_6=bnP1X21OPW^pU# z3`;-1qlnIDo7PMU{)_ybv*C`PiwJF*hZ^jdTv*0alq~O9U^CJ--v0i2f%bSanHST! znCtdURjZGUg+f{Dsx%QuWHZWzMI=oZv7RM6_gUZi{bTi9^%)jxk2FamuE?dK<9W4? zH$yLCEWEhXd|Fx?W(!xwOol~tXO-CnhqBQ2)Ynkm>f`pltoP{>`HfjoX*;+)bSF-d+K9mnIU@h;F5& zaSY;CD1dbfPUczQeM27k+t}j{ZyF!i3F~mI<>u=Qk_@n`@erU7)~2rD-f^$v+edin zvL|S}gej@A;(adGSuJ%iE8XI2&rwKd>@i+1YiVHb5aFk8qs`c|Ij*^^UolPo zalK4&BTc~mUHOC7M7mZjzVLI4N9_ex1+8&f%bpxB*4OzoC4cM5D@I}ScM-T5;yFz@ zvTk~YTa_J5K3o_bJHQyqN%Rm+2IWwj9#+q-q*4nJalO>kN{+Y5k)qm;lv6=-~04~<=SV9K*QOn^USFPMhd(e8mN`l zzy4gB{)hA=f9b~idUQ(n$}Gf}tDwY&I{_@6J~kwl>nD&r)V#}|q+EQ1lO@^q1HwhQ z-{!!tl*=k?U)$pTH{&zuyoVM>50fsxf6|sIX?k5}y+6ue@wt}NjUcE?3Y&ZHUDi~s zomR&6fuop;A|>W6CCkHK7RF=s`tLI4`~=91*nSXdO%6PklqyTS~+cv|`#N}aHL*ZBm(of1g)III~E*c!xjV`RL z{rko&_#=dGD^g>H&D73NfSP`^!NB9pRJ2!+MtFoaQQL37nIR}7LC2)sFfZtT+d|^n zKy0`4vn}Y?d6QW;q0~Tws8D*hJGz=9Det2*khD{;o0*HqG!!w|{ZqE*`CdFlofx-Y z>Pk#eP3^PndjaJZV{s1lwPG5CVL^WnL1n`S?E5f{|MqGsA%d}LzT7j)&ydf#V*)g`j&Tr~D!gAw2xPD23WVAgvM~`&iyz`W5Y?isNopVe-h{?$G#* zL0J@94x3zbM6jC}Mh|7M%L9`~OZlYa8ilmFdb*g`4N!u4k|O`OD*EA3Hb!7;>&{#^ zTh)5{HWHJeeZtt@m3k%2SBwK|UpnX{eroY7WHV}*zT~5Bk~A;Hx=)pj-p=!Wx$dPP zF1)bN_Br0^#yAOGp3FYRzbLUz+l<{t z`(|U`XqMU8#H|FX&F#K7t$pbX>Sa2Ir;y&C*~iXP?+mYADPMSd_P*BXZ3^nmwgjAr zv`-7m42YYe#BZ;K;rTj-BBW*}O9i^=GHy;Aii4bwNb(wDB!J+cJ^0_&ryPuPAE(fn zFPJ$Y+R?~+gajw^aFOf1e&WP7JC){jshQ$*d{xPY=CY>`^Ac0-qZ>*|5q3IhUe1SC zk5Xg~`O6Z9w(iQNViTA*G;;mg7oi~;QVQR=bZNBKzZiLeAGfbm_szYVZaGa^RbJsE zrcGn4;OnH*Q^QB|H(kW5$Syb*Zd%K?dS4I-)4jl#-Y{?zC4T0SbIGkhBPFRTCc0Fj zjjz&ixyqC3xD^vK=DRgMtufg3szUsr_we*h-jM3L?V(4saU-v!fUg&UW4aSRY0xq) z5$fWXzZCu3X{yoa49|Yx`|z;sEq*x;>Aik!=4NOYn$ckpOF~%PDII-mcLuxpCo=uVg;5DPK#$6R{^G{TJ?bICuF z6H*MN*^<&!RMgaZP*NA)2YmI{X;Om1QDJ2Ffm+#=SBkOdhcUOQW`- zNVnnjnC5b0Iwwh&b+SNU=9Fa|{);;wJ{I>K4ck2Vdp!}hICAw*_Z}qIPp^o3eHp3A zBlgYl&!@^v zGr80B>XK0W%75cGr_RO1naQB>ScAHQP|1lTPA3r-qRU|EbLXbsYFv^!g|l2xb$a0^ zwGQ$Q{Kd_iLylAITO>>r(WYXey!p?O6-p?uySuyh%HLD%j{KhC<6>N5I^{aAZ(w@) zdv+N|l$|G#ExXw>ZTeE7!E%AlUa#qtl6@wt&6;8uhu@zbedhxY#mk2?B08MkB69e_=F}P4EO3TvBcEn z@0GX#A|10zp^8^#G1&{nW(K3_+kX&P;uPc*(+i01t(%?4+fSQ?8-pISoNJy&m2wM_ zHw9sJkck`~_Db~Dh$`y=nosF$JtawB=m=Op-idjo_rOy0WgEeGTSGaA@slrAxHE?5 zuFLtSGVd)vY-A>Ft$i;Uyb;~r^VKY7!z(?)b9mZwJx9_~E z7!-Zy7~#E=mXskKtySCUHe{`gQ8P-=5Z?W3;O-=nzr#T+2^xwQroIQO{m=@b6!9)5 zc<>8>G?riQk&W#U!)hh5Y(FV~-p5bhqXl(O&OdvjdYoPNeKMl6^<-J^;Cnaazx3YkRx*Mq90jWSC>>iE_=lMXGO5m>*V1=~mgPiM7qmNKotjiz58qI$+v?6a3rssBuY|Z*VL)?po00z%W=DvL4 zxxP|d_ND*q%pbU5KV)-B27x917s?E_W4y>D*jdS6VA5Pc_W+d_QQM#uue=GWf62+k z$m*IJa<}{a@DJEq`_@wY_1K#>a)mT=Oq^ur&YRJ}(oTxMb#`Pk|A|5T{wo0+Z~HH9 zgNO0|^bc8dU)7{kQv?l7B89T7OvMCY+%_!tPae^>iT6n>RsWBssZ!omj@slPv|uqb)lr_VBNB zn^E@Q6skdq7*&X6X8D_(&+pP_U0uZZpW4wg$jN6Pn$UdUd;k-N>KYQ~?~g7u_7;wn zq}qx7EVLoVBZ&c%BYw&p7n8(Q<2Sxqo0=Y~9ZTp;Xb89RS6=W~i^_5os1(iAiauB0PMkNgngPQ)i*y=ljkELNC z#RP3px@|tqjdHeHj_r>6QX9FL0ToR_TJoEo6|qB<&sj(Ol-J zdth(#RS->rZTNH4qbqMqhdKEc>h1lKujev|@;fbBjz`Ay>gr{TX|F!=cwYIuJyaC6hamSu>?Q2`fO>gPi{eY#-8UhQOx+{grqGArO1gI+N|smnWk;F66d#s zls0^797HEc%o6vPzx&j0Hach-_y}hS>kb-jhugB>BAYy|vAJ9Hg>o&};HA2<$88h@ zVxv5n@vEC%@GUpafX+XE{u7*vS}2D*MG|V?Q$sN>OHi}EQ%aP3+rn5g>GEF4V@;>& z-}Qmf-yoT?-&`0aWzr9bfY)tJs)Xvs%y+}sE#yzGW23Y zRNl*0TAvyXWl>pm?`Y(Tu^IHZaMf~3a_5uF)fx)Jl4{Dz9$0(Qu%%7QR`?;Qb*#SG z)B8Ca=5>3wh|7)pjO@0#=*D?(`UavSi%En38^34k{WpHU|G(mQ%nvB|OPj;LS^{hJ z6)?}%72Vw}xNs^B2G+J78L-MJ2z5aG7FzB^*W!4 z^G}h74+M+K&&SG1y>3*zn@D#bLBy!{_#$isr;o+TCLQ=q=x+V2BF2#o4QrU4v-Xe{ zqb3gXrITE`hF{3Lf9aRF-t`C%;Yk5;C-bM#2{geFD+4owzOab-FY;$8qxrl@J+Y9+4iu9)Z~xSJ- zeAp>x(^)``J#==8;2Eq9F~t#6;n5cUdCrMIoM3fz4`16r$w4!%W<-oa-+L%jC;Oi7*) zZB$^3NoJxZWM26r!ZBY?<^7lW{V)*|y^|NX@Py>bY2) zKAinAY_LoBt@ekW&^%Ng4U%UAsf!XJ}j_B)&PI=iEdqEyKxqju* zPwyU{3i=e}4V;#p+zC7$^2Unf&5fcH5nY0D=vIYqo4VRmOfz$XAsm`g+vg<~8fgs(^PoEtK1OoNar- z9QilfZZa}#Pu{v+-e_J|ahp!&+T*)FZ&@HwvtBPg zHt$YEi2c^{&+Y!a&%bn;;YrSeE9Tu-Ps^`9efIYEx)*m1^VjAUEHppYpL-`@-TghE z-*3;?dwom&)~pZy^8ZfT|7*Xz;jrq#18Dt-Y4*TNNIzEm0nSD?^GACxt)KcNyN4^e z=0!~cun(8}vfKCdxwrP_v(wrc|5tVJ#c~@n`33Lav>@*OmKAlI?)lVU=3egB%<_1499 zVp5r}ukZWw?~nbzZ-;IKm;ZhEZfCts_*{A_@LxLQqT6+z$K~w z_pORuf8XH$dI8RUOE=%JuzeeE?%iF!U%v8dO+rgt59{04(^pq5URWQL-S=nP2mAlo z9Sp|V;a8?cOYi;9uH3kw;Mtc6Z0&vv1^>NSIs3ZabnW?diT8xheUk!)W5)-#A6w)9 zA3nPAtMsX-{ini1YIk1T@zwdo*$Xr0Uv{@Yh@&%cIG@eIp#pfbyPI3v-wB60L0f9~_X3ybSmgn)Kfj;# z)=bUl1aP7M56~5}z_n6wVaX!Gk=kp+zwcO;{{9|tX*zK6=EskP{kQL4iJdz?vh%F? zvERyZ>q1X=3Z1$3v@`j`y_??WOtmdr#g?Z0Pd^6jPq424_PF8_*8ar5g2zT>z>IqS z*v3Cq)|;wn z|2_?)QxOB{R8%;A{sEqbexa^1CpzK`uVq?j%Joe*uc-;HTN)Zx)0y&-xizl$<6bONsW5D5l`%K_w zzdyjuX;~7-0>w9}e}8}fKX9SEPu!!q%PxHW1X@pGQTQlwOAn|zsk0n$~^WJfGLP3*2)v5j(AF?%Yd83_#%N>gTe~DWM4f0AGpO diff --git a/paddle/contrib/tape/function.h b/paddle/contrib/tape/function.h deleted file mode 100644 index 8c9694d9a2..0000000000 --- a/paddle/contrib/tape/function.h +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#include "paddle/contrib/tape/tape.h" -#include "paddle/contrib/tape/variable.h" -#include "paddle/fluid/framework/type_defs.h" - -namespace paddle { -namespace tape { - -class Function {}; - -class Fill { - public: - Fill(const std::string &initializer, const framework::AttributeMap &attrs) - : initializer_(initializer), attrs_(attrs) {} - - void operator()(VariableHandle var) { - get_global_tape().AddOp(initializer_, {}, {{"Out", {var}}}, attrs_); - } - - private: - const std::string initializer_; - const framework::AttributeMap attrs_; -}; - -class Mean { - public: - VariableHandle operator()(VariableHandle var) { - VariableHandle out(new Variable("mean")); - get_global_tape().AddOp("mean", {{"X", {var}}}, {{"Out", {out}}}, {}); - return out; - } -}; - -class Linear { - public: - Linear(int in_dim, int out_dim, const std::string &act) - : w_(new Variable("LinearWeight")), - b_(new Variable("LinearBias")), - act_(act) { - Tape init_tape; - - std::string initializer = "fill_constant"; - framework::AttributeMap attrs; - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{in_dim, out_dim}; - attrs["value"] = 1.0f; - init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs); - - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{out_dim}; - attrs["value"] = 1.0f; - init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs); - - init_tape.Forward(); - } - - VariableHandle operator()(VariableHandle input) { - VariableHandle pre_bias(new Variable("linear")); - get_global_tape().AddOp("mul", - {{"X", {input}}, {"Y", {w_}}}, - {{"Out", {pre_bias}}}, - {{"x_num_col_dims", 1}, {"y_num_col_dims", 1}}); - VariableHandle pre_act(new Variable("linear")); - get_global_tape().AddOp("elementwise_add", - {{"X", {pre_bias}}, {"Y", {b_}}}, - {{"Out", {pre_act}}}, - {{"axis", 1}}); - VariableHandle post_act(new Variable("linear")); - get_global_tape().AddOp( - act_, {{"X", {pre_act}}}, {{"Out", {post_act}}}, {}); - return post_act; - } - - std::vector Params() { return {w_, b_}; } - - private: - VariableHandle w_; - VariableHandle b_; - std::string act_; -}; - -class SGD { - public: - SGD(float learning_rate) : learning_rate_(new Variable("sgd")) { - Tape init_tape; - - std::string initializer = "fill_constant"; - framework::AttributeMap attrs; - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{1}; - attrs["value"] = learning_rate; - init_tape.AddOp(initializer, {}, {{"Out", {learning_rate_}}}, attrs); - - init_tape.Forward(); - } - - void operator()(VariableHandle input) { - PADDLE_ENFORCE(get_global_tape().HasBeenBackwarded(), - "optimization must happen after the backward"); - Tape temp_tape; - temp_tape.AddOp("sgd", - {{"Param", {input}}, - {"LearningRate", {learning_rate_}}, - {"Grad", {input->Grad()}}}, - {{"ParamOut", {input}}}, - {}); - temp_tape.Forward(); - } - - private: - VariableHandle learning_rate_; -}; -} -} diff --git a/paddle/contrib/tape/tape.cc b/paddle/contrib/tape/tape.cc deleted file mode 100644 index 531499b6fe..0000000000 --- a/paddle/contrib/tape/tape.cc +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/contrib/tape/tape.h" - -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/dim.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/place.h" -#include "paddle/fluid/pybind/pybind.h" - -namespace paddle { -namespace tape { - -// borrowed from -// https://stackoverflow.com/questions/874134/find-if-string-ends-with-another-string-in-c -inline bool ends_with(std::string const &value, std::string const &ending) { - if (ending.size() > value.size()) return false; - return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); -} - -std::ostream &operator<<(std::ostream &os, const framework::VarDesc &var_desc) { - os << var_desc.Name(); - os << "[" << var_desc.GetType() << "]"; - os << "[" << var_desc.GetDataType() << "]"; - os << "{"; - for (auto &i : var_desc.GetShape()) { - os << i << ","; - } - os << "}"; - return os; -} - -std::string to_string(const std::string &type, - const VariableHandleMap &in_vars, - const VariableHandleMap &out_vars, - const framework::AttributeMap &attrs) { - std::stringstream ss; - ss << type << " "; - for (auto ¶m_name : in_vars) { - for (auto &var : param_name.second) { - ss << param_name.first << ":(" << var->Desc() << ") "; - } - } - for (auto ¶m_name : out_vars) { - for (auto &var : param_name.second) { - ss << param_name.first << ":(" << var->Desc() << ") "; - } - } - return ss.str(); -} - -framework::OpDesc CreateOpDesc(const std::string &type, - const VariableHandleMap &in_vars, - const VariableHandleMap &out_vars, - const framework::AttributeMap &attrs) { - framework::VariableNameMap inputs; - for (auto ¶m_name : in_vars) { - for (auto &var : param_name.second) { - inputs[param_name.first].emplace_back(var->Name()); - } - } - framework::VariableNameMap outputs; - for (auto ¶m_name : out_vars) { - for (auto &var : param_name.second) { - outputs[param_name.first].emplace_back(var->Name()); - } - } - return framework::OpDesc(type, inputs, outputs, attrs); -} - -void InferShapeAndVarType(const std::string &type, - const VariableHandleMap &in_vars, - VariableHandleMap *out_vars, - const framework::AttributeMap &attrs) { - framework::OpDesc op_desc = CreateOpDesc(type, in_vars, *out_vars, attrs); - - // Create a temporary block for compile-time - framework::ProgramDesc program_desc; - framework::BlockDesc *block_desc = program_desc.MutableBlock(0); - PADDLE_ENFORCE(block_desc); - - for (auto ¶m_name : in_vars) { - for (auto &var : param_name.second) { - *block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto(); - } - } - for (auto ¶m_name : *out_vars) { - for (auto &var : param_name.second) { - *block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto(); - } - } - - LOG(INFO) << "- " << to_string(type, in_vars, *out_vars, attrs); - op_desc.InferShape(*block_desc); - op_desc.InferVarType(block_desc); - for (auto ¶m_name : *out_vars) { - for (auto &var : param_name.second) { - *var->MutableDesc()->Proto() = *block_desc->Var(var->Name())->Proto(); - } - } - LOG(INFO) << "+ " << to_string(type, in_vars, *out_vars, attrs); -} - -void Tape::AddOp(const std::string &type, - const VariableHandleMap &in_vars, - VariableHandleMap out_vars, - const framework::AttributeMap &attrs) { - InferShapeAndVarType(type, in_vars, &out_vars, attrs); - tape_.emplace_back(type, in_vars, out_vars, attrs); -} - -// Temporary Scope for Operator::Run() -class ScopeWrapper : public framework::Scope { - public: - ScopeWrapper(const VariableHandleMap &in_vars, - const VariableHandleMap &out_vars) { - for (auto &v : in_vars) { - for (auto &vv : v.second) { - if (!vars_.count(vv->Name())) { - vars_[vv->Name()].reset(vv->Var()); - } - } - } - for (auto &v : out_vars) { - for (auto &vv : v.second) { - if (!vars_.count(vv->Name())) { - vars_[vv->Name()].reset(vv->Var()); - } - } - } - } - - ~ScopeWrapper() { - for (auto &pair : vars_) { - pair.second.release(); - } - } -}; - -void Tape::Forward() { - LOG(INFO) << "Starting forward -------------------------"; - PADDLE_ENFORCE(!has_been_backwarded_); - while (current_position_ < tape_.size()) { - OpHandle &op = tape_[current_position_]; - - // Create Output Tensor, this is only necessary for OpWithKernel - for (auto ¶m2var : op.outputs_) { - for (auto &var : param2var.second) { - var->InitializeVariable(); - } - } - - framework::OpDesc op_desc = - CreateOpDesc(op.type_, op.inputs_, op.outputs_, op.attrs_); - ScopeWrapper scope(op.inputs_, op.outputs_); - framework::OpRegistry::CreateOp(op_desc)->Run(scope, platform::CPUPlace()); - current_position_++; - } - - LOG(INFO) << "Finishing forward -------------------------"; -} - -void Tape::Backward(VariableHandle target) { - PADDLE_ENFORCE(!has_been_backwarded_); - - Forward(); - - // TODO(tonyyang-svail): check output of last op is target - backward_tape_.reset(new Tape()); - - framework::AttributeMap attrs; - - // FIXME(tonyyang-svail): Need to infer_data_type - attrs["dtype"] = framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{1}; - attrs["value"] = 1.0f; - backward_tape_->AddOp( - "fill_constant", {}, {{"Out", {target->Grad()}}}, attrs); - - for (auto it = tape_.rbegin(); it != tape_.rend(); ++it) { - framework::OpDesc op_desc = - CreateOpDesc(it->type_, it->inputs_, it->outputs_, it->attrs_); - std::unordered_map grad_to_var; - std::vector> grad_op_descs = - framework::OpInfoMap::Instance() - .Get(op_desc.Type()) - .GradOpMaker()(op_desc, {}, &grad_to_var, {}); - - for (auto &op_desc : grad_op_descs) { - std::unordered_map name2var; - for (auto ¶m2vars : it->inputs_) { - for (auto &a : param2vars.second) { - name2var[a->Name()] = a; - } - } - for (auto ¶m2vars : it->outputs_) { - for (auto &a : param2vars.second) { - name2var[a->Name()] = a; - } - } - - VariableHandleMap in_vars; - VariableHandleMap out_vars; - std::map - loop_over{{&op_desc->Inputs(), &in_vars}, - {&op_desc->Outputs(), &out_vars}}; - for (auto &each : loop_over) { - auto &vmp = *each.first; - auto &vhm = *each.second; - for (auto &p2a : vmp) { - for (auto &argu : p2a.second) { - if (name2var.count(argu)) { - vhm[p2a.first].push_back(name2var[argu]); - } else { - PADDLE_ENFORCE(ends_with(argu, framework::kGradVarSuffix), - argu.c_str()); - std::string name = argu.substr( - 0, argu.size() - std::strlen(framework::kGradVarSuffix)); - PADDLE_ENFORCE(name2var.count(name), name.c_str()); - vhm[p2a.first].push_back(name2var[name]->Grad()); - } - } - } - } - - backward_tape_->AddOp( - op_desc->Type(), in_vars, out_vars, op_desc->GetAttrMap()); - } - - // TODO(tonyyang-svail): how to fill empty grad? - // TODO(tonyyang-svail): Sum var grad is necessary - } - - backward_tape_->Forward(); - has_been_backwarded_ = true; -} - -Tape &get_global_tape() { - static Tape T; - return T; -} - -void reset_global_tape() { get_global_tape() = Tape(); } -} -} diff --git a/paddle/contrib/tape/tape.h b/paddle/contrib/tape/tape.h deleted file mode 100644 index ed79de17a7..0000000000 --- a/paddle/contrib/tape/tape.h +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include -#include -#include -#include - -#include "paddle/contrib/tape/variable.h" - -namespace paddle { -namespace tape { - -using VariableHandleMap = std::map>; - -struct OpHandle { - OpHandle(const std::string &type, - const VariableHandleMap &in_vars, - const VariableHandleMap &out_vars, - const framework::AttributeMap &attrs) - : type_(type), inputs_(in_vars), outputs_(out_vars), attrs_(attrs) {} - - std::string type_; - VariableHandleMap inputs_; - VariableHandleMap outputs_; - framework::AttributeMap attrs_; -}; - -class Tape { - public: - void AddOp(const std::string &type, - const VariableHandleMap &in_vars, - VariableHandleMap out_vars, - const framework::AttributeMap &attrs); - void Forward(); - void Backward(VariableHandle target); - - bool HasBeenBackwarded() { return has_been_backwarded_; } - - private: - bool has_been_backwarded_ = false; - size_t current_position_ = 0; - - std::vector tape_; - std::shared_ptr backward_tape_; -}; - -Tape &get_global_tape(); - -void reset_global_tape(); -} -} diff --git a/paddle/contrib/tape/test_tape.cc b/paddle/contrib/tape/test_tape.cc deleted file mode 100644 index e9bfd21a71..0000000000 --- a/paddle/contrib/tape/test_tape.cc +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "gtest/gtest.h" -#include "paddle/contrib/tape/function.h" - -using namespace paddle::tape; - -TEST(Tape, TestMLP) { - LOG(INFO) << "TestMLP"; - Linear linear1(3, 3, "relu"); - Linear linear2(3, 3, "relu"); - Mean mean; - - SGD sgd(0.001); - - std::string initializer = "fill_constant"; - paddle::framework::AttributeMap attrs; - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{3, 3}; - attrs["value"] = 1.0f; - Fill filler(initializer, attrs); - - for (int i = 0; i < 2; ++i) { - reset_global_tape(); - - VariableHandle input(new Variable("input")); - filler(input); - - auto loss = mean(linear2(linear1(input))); - - get_global_tape().Backward(loss); - - for (auto w : linear1.Params()) { - sgd(w); - } - for (auto w : linear2.Params()) { - sgd(w); - } - } -} - -int main(int argc, char** argv) { - std::vector places; - places.emplace_back(paddle::platform::CPUPlace()); - paddle::platform::DeviceContextPool::Init(places); - - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/contrib/tape/variable.cc b/paddle/contrib/tape/variable.cc deleted file mode 100644 index 5ec1612909..0000000000 --- a/paddle/contrib/tape/variable.cc +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/contrib/tape/variable.h" - -namespace paddle { -namespace tape { - -void Variable::InitializeVariable() { - LOG(INFO) << "Initialzing " << desc_.Name() << " as " << desc_.GetType(); - framework::proto::VarType::Type var_type = desc_.GetType(); - if (var_type == framework::proto::VarType::LOD_TENSOR) { - var_.GetMutable(); - } else if (var_type == framework::proto::VarType::SELECTED_ROWS) { - var_.GetMutable(); - } else { - PADDLE_THROW("Variable type %d is not in [LOD_TENSOR, SELECTED_ROWS]", - var_type); - } -} -} -} diff --git a/paddle/contrib/tape/variable.h b/paddle/contrib/tape/variable.h deleted file mode 100644 index 35c328e69c..0000000000 --- a/paddle/contrib/tape/variable.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include - -#include "paddle/fluid/framework/operator.h" // framework::kGradVarSuffix -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/framework/variable.h" - -namespace paddle { -namespace tape { - -class Variable; -using VariableHandle = std::shared_ptr; - -/* - * Combination of - * framework::VarDesc desc_; - * framework::Variable var_; - */ -class Variable { - public: - Variable(const std::string pre_fix) - : desc_(pre_fix + std::to_string(count())) {} - - Variable(const std::string pre_fix, bool is_grad) - : desc_(pre_fix + (is_grad ? framework::kGradVarSuffix - : std::to_string(count()))) {} - - ~Variable() { LOG(INFO) << "Deleting " << Name(); } - - // Instantiate LoDTensor/SelectedRow - void InitializeVariable(); - - VariableHandle Grad() { - if (grad_.expired()) { - VariableHandle new_grad(new Variable(desc_.Name(), true)); - grad_ = new_grad; - return new_grad; - } else { - return VariableHandle(grad_); - } - } - - // Stochastic Gradient Descent with Momentum - // VariableHandle Momentum (); - - // void init(const std::string& initializer, - // const framework::AttributeMap& attrs); - - // void value() {}; - - const framework::VarDesc& Desc() const { return desc_; } - framework::VarDesc* MutableDesc() { return &desc_; } - - // TODO(tonyyang-svail): No need to expose name - std::string Name() const { return desc_.Name(); } - - framework::Variable* Var() { return &var_; } - - private: - int count() { - static int counter = 0; - return counter++; - } - - framework::VarDesc desc_; - framework::Variable var_; - - std::weak_ptr grad_; -}; -} -} From fe5de04bde3fd065b034c1a0d9a5b07b36ae36a4 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 19 Jun 2018 10:26:17 +0800 Subject: [PATCH 25/46] optimize doc for MomentumOptimizer --- python/paddle/fluid/optimizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 8c402cf9d5..92ae5ee074 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -323,11 +323,11 @@ class MomentumOptimizer(Optimizer): & if (use\_nesterov): - & param = param - gradient * learning\_rate + mu * velocity * learning\_rate + &\quad param = param - gradient * learning\_rate + mu * velocity * learning\_rate & else: - & param = param - learning\_rate * velocity + &\quad param = param - learning\_rate * velocity Args: learning_rate (float|Variable): the learning rate used to update parameters. \ From 8ea54e2f955a620d965118962eaf574fe456fe66 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 19 Jun 2018 12:36:31 +0800 Subject: [PATCH 26/46] Add docs --- python/paddle/fluid/average.py | 19 + python/paddle/fluid/backward.py | 71 +++- python/paddle/fluid/io.py | 669 +++++++++++++++++++++++++++----- 3 files changed, 646 insertions(+), 113 deletions(-) diff --git a/python/paddle/fluid/average.py b/python/paddle/fluid/average.py index 6abe8233b0..358e24df31 100644 --- a/python/paddle/fluid/average.py +++ b/python/paddle/fluid/average.py @@ -36,6 +36,25 @@ def _is_number_or_matrix_(var): class WeightedAverage(object): + """ + Calculate weighted average. + + The average calculating is accomplished via Python totally. + They do not change Paddle's Program, nor do anything to + modify NN model's configuration. They are completely + wrappers of Python functions. + + Examples: + .. code-block:: python + avg = fluid.average.WeightedAverage() + avg.add(value=2.0, weight=1) + avg.add(value=4.0, weight=2) + avg.eval() + + # The result is 3.333333333. + # For (2.0 * 1 + 4.0 * 2) / (1 + 2) = 3.333333333 + """ + def __init__(self): warnings.warn( "The %s is deprecated, please use fluid.metrics.Accuracy instead." % diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 4f9622d04d..95421704db 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -147,7 +147,7 @@ def _addup_repetitive_outputs_(op_descs): else: if len(renamed_vars[var_name]) == 1: new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) + str(var_rename_count[var_name]) var_rename_count[var_name] += 1 # rename original var_name renamed_vars[var_name][0] = new_name @@ -155,7 +155,7 @@ def _addup_repetitive_outputs_(op_descs): _rename_arg_(pending_sum_ops, var_name, new_name) new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) + str(var_rename_count[var_name]) var_rename_count[var_name] += 1 op_desc.rename_output(var_name, new_name) renamed_vars[var_name].append(new_name) @@ -434,18 +434,65 @@ def _get_stop_gradients_(program): def append_backward(loss, parameter_list=None, no_grad_set=None, callbacks=None): """ - Append backward part to main_program + Append backward part to main_program. - Args: - loss(Variable): The variable generated by cost function. - parameter_list(list[string]): Parameters that need to be updated by - optimizer. If None, it means all parameters need to be updated. - no_grad_set(set): Variables that have no gradients in Block 0. - All variables with `step_gradient=True` from all blocks will be - automatically added. + A complete neural network training is made up of forward and backward + propagation. However, when we configure a network, we only need to + specify its forwrd part. The backward part is generated automatically + according to the forward part by this function. - Return: - (list[(Variable,Variable)]): list of (parameter, gradient) pair. + In most cases, users do not need to invoke this function manually. It + will be automatically invoked by the optimizer's `minimize` function. + + Args: + loss(Variable): The loss variable of the network. + parameter_list(list[string]|None): Names of parameters that need + to be updated by optimizers. + If it is None, all parameters + will be updated. + Default: None + no_grad_set(set|None): Variables in the Block 0 whose gradients + should be ignored. All variables with + `step_gradient=True` from all blocks will + be automatically added into this set. + Default: None + callbacks(list[callable object]|None): The callbacks are used for + doing some custom jobs during + backward part building. All + callable objects in it will + be invoked once each time a + new gradient operator is added + into the program. The callable + object must has two input + parameters: 'block' and 'context'. + The 'block' is the block which + the new gradient operator will + be added to. The 'context' is a + map, whose keys are gradient + variable names and values are + corresponding original variables. + In addition to this, the 'context' + has another special key-value pair: + the key is string '__current_op_desc__' + and the value is the op_desc of the + gradient operator who has just + triggered the callable object. + + Returns: + list[(Variable,Variable)]: Pairs of parameter and its + corresponding gradients. The key is the parameter and the + value is gradient variable. + + Raises: + AssertionError: If `loss` is not an instance of Variable. + + Examples: + .. code-block:: python + + # network configuration code + # ... + avg_loss = fluid.layers.mean(loss) + param_grad_list = fluid.backward.append_backward(loss=avg_loss) """ assert isinstance(loss, framework.Variable) diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 6323c9899e..61613ef079 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -30,20 +30,42 @@ __all__ = [ def is_parameter(var): - """Check whether the variable is a Parameter. - - This function checks whether the input variable is a Parameter. + """ + Check whether the given variable is an instance of Parameter. Args: - var : The input variable. + var(Variable): The variable to be checked. Returns: - boolean result whether the variable is a Parameter. + bool: True if the given `var` is an instance of Parameter, + False if not. + + Examples: + .. code-block:: python + + param = fluid.default_main_program().global_block().var('fc.w') + res = fluid.io.is_parameter(param) """ return isinstance(var, Parameter) def is_persistable(var): + """ + Check whether the given variable is persistable. + + Args: + var(Variable): The variable to be checked. + + Returns: + bool: True if the given `var` is persistable + False if not. + + Examples: + .. code-block:: python + + param = fluid.default_main_program().global_block().var('fc.w') + res = fluid.io.is_persistable(param) + """ if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ var.desc.type() == core.VarDesc.VarType.FETCH_LIST: return False @@ -68,20 +90,69 @@ def save_vars(executor, predicate=None, filename=None): """ - Save variables to directory by executor. + Save variables to the given directory by executor. + + There are two ways to specify variables to be saved: The first way, list + variables in a list and assign it to the `vars`. The second way, assign the + `main_program` with an existing program, then all variables in the program + will be saved. The first way has a higher priority. In other words, if `vars` + are assigned, the `main_program` and the `predicate` will be ignored. - :param executor: executor that save variable - :param dirname: directory path - :param main_program: program. If vars is None, then filter all variables in this - program which fit `predicate`. Default default_main_program. - :param predicate: The Predicate describes a callable that returns a variable - as a bool. If it returns true, the corresponding input variable will be saved. - :param vars: variables need to be saved. If vars is specified, program & predicate - will be ignored - :param filename: The name of a single file that all vars are saved to. - If it is None, save variables to separate files. + The `dirname` are used to specify the folder where to save variables. + If you prefer to save variables in separate files in the folder `dirname`, + set `filename` None; if you prefer to save all variables in a single file, + use `filename` to specify it. - :return: None + Args: + executor(Executor): The executor to run for saving variables. + dirname(str): The directory path. + main_program(Program|None): The program whose variables will be saved. + If it is None, the default main program will + be used automatically. + Default: None + vars(list[Variable]|None): The list that contains all variables to save. + It has a higher priority than the `main_program`. + Default: None + predicate(function|None): If it is not None, only variables in the + `main_program` that makes predicate(variable)==True + will be saved. It only works when we are using the + `main_program` to specify variables (In other words + `vars` is None). + Default: None + filename(str|None): The file which to save all variables. If you prefer to save + variables separately, set it to None. + Default: None + + Returns: + None + + Raises: + TypeError: If `main_program` is not an instance of Program nor None. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + + # The first usage: using `main_program` to specify variables + def name_has_fc(var): + res = "fc" in var.name + return res + + prog = fluid.default_main_program() + fluid.io.save_vars(executor=exe, dirname=path, main_program=prog, + vars=None) + # All variables in `main_program` whose name includes "fc" will be saved. + # And variables are going to be saved separately. + + + # The second usage: using `vars` to specify variables + var_list = [var_a, var_b, var_c] + fluid.io.save_vars(executor=exe, dirname=path, vars=var_list, + filename="vars_file") + # var_a, var_b and var_c will be saved. And they are going to be + # saved in the same file named 'var_file' in the path "./my_paddle_model". """ if vars is None: if main_program is None: @@ -129,7 +200,42 @@ def save_vars(executor, def save_params(executor, dirname, main_program=None, filename=None): """ - Save all parameters to directory with executor. + This function filters out all parameters from the give `main_program` + and then save them to the folder `dirname` or the file `filename`. + + Use the `dirname` to specify the saving folder. If you would like to + save parameters in separate files, set `filename` None; if you would + like to save all parameters in a single file, use `filename` to specify + the file name. + + NOTICE: Some variables are not Parameter while they are necessary for + training. So you can NOT save and continue your training just by + `save_params()` and `load_params()`. Please use `save_persistables()` + and `load_persistables()` instead. + + Args: + executor(Executor): The executor to run for saving parameters. + dirname(str): The saving directory path. + main_program(Program|None): The program whose parameters will be + saved. If it is None, the default + main program will be used automatically. + Default: None + filename(str|None): The file to save all parameters. If you prefer + to save parameters in differnet files, set it + to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.save_params(executor=exe, dirname=param_path, + main_program=None) """ save_vars( executor, @@ -142,7 +248,37 @@ def save_params(executor, dirname, main_program=None, filename=None): def save_persistables(executor, dirname, main_program=None, filename=None): """ - Save all persistables to directory with executor. + This function filters out all variables with `persistable==True` from the + give `main_program` and then saves these variables to the folder `dirname` + or file `filename`. + + The `dirname` is used to specify the folder where persistable variables + are going to be saved. If you would like to save variables in separate + files, set `filename` None; if you would like to save all variables in a + single file, use `filename` to specify the file name. + + Args: + executor(Executor): The executor to run for saving persistable variables. + dirname(str): The directory path. + main_program(Program|None): The program whose persistbale variables will + be saved. If it is None, the default main + program will be used automatically. + Default: None + filename(str|None): The file to saved all variables. If you prefer to + save variables in differnet files, set it to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.save_persistables(executor=exe, dirname=param_path, + main_program=None) """ save_vars( executor, @@ -160,20 +296,69 @@ def load_vars(executor, predicate=None, filename=None): """ - Load variables from directory by executor. + Load variables from the given directory by executor. + + There are two ways to specify variables to be loaded: The first way, list + variables in a list and assign it to the `vars`. The second way, assign the + `main_program` with an existing program, then all variables in the program + will be loaded. The first way has a higher priority. In other words if `vars` + are assigned, the `main_program` and the `predicate` will be ignored. + + The `dirname` are used to specify the folder where to load variables. + If variables were saved in separate files in the folder `dirname`, + set `filename` None; if all variables were saved in a single file, + use `filename` to specify it. - :param executor: executor that load variable - :param dirname: directory path - :param main_program: program. If vars is None, then filter all variables in this - program which fit `predicate`. Default default_main_program(). - :param predicate: The Predicate describes a callable that returns a variable - as a bool. If it returns true, the corresponding input variable will be loaded. - :param vars: variables need to be loaded. If vars is specified, program & - predicate will be ignored - :param filename: The name of the single file that all vars are loaded from. - If it is None, load variables from separate files. + Args: + executor(Executor): The executor to run for loading variables. + dirname(str): The directory path. + main_program(Program|None): The program whose variables will be loaded. + If it is None, the default main program will + be used automatically. + Default: None + vars(list[Variable]|None): The list that contains all variables to load. + It has a higher priority than the `main_program`. + Default: None + predicate(function|None): If it is not None, only variables in the + `main_program` that makes predicate(variable)==True + will be loaded. It only works when we are using the + `main_program` to specify variables (In other words + `vars` is None). + Default: None + filename(str|None): The file which saved all required variables. If variables + were saved in differnet files, set it to None. + Default: None + + Returns: + None + + Raises: + TypeError: If `main_program` is not an instance of Program nor None. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + + # The first usage: using `main_program` to specify variables + def name_has_fc(var): + res = "fc" in var.name + return res - :return: None + prog = fluid.default_main_program() + fluid.io.load_vars(executor=exe, dirname=path, main_program=prog, + vars=None) + # All variables in `main_program` whose name includes "fc" will be loaded. + # And all the variables are supposed to have been saved in differnet files. + + + # The second usage: using `vars` to specify variables + var_list = [var_a, var_b, var_c] + fluid.io.load_vars(executor=exe, dirname=path, vars=var_list, + filename="vars_file") + # var_a, var_b and var_c will be loaded. And they are supposed to haven + # been saved in the same file named 'var_file' in the path "./my_paddle_model". """ if vars is None: if main_program is None: @@ -221,7 +406,42 @@ def load_vars(executor, def load_params(executor, dirname, main_program=None, filename=None): """ - load all parameters from directory by executor. + This function filters out all parameters from the give `main_program` + and then try to load these parameters from the folder `dirname` or + the file `filename`. + + Use the `dirname` to specify the folder where parameters were saved. If + parameters were saved in separate files in the folder `dirname`, set + `filename` None; if all parameters were saved in a single file, use + `filename` to specify the file name. + + NOTICE: Some variables are not Parameter while they are necessary for + training. So you can NOT save and continue your training just by + `save_params()` and `load_params()`. Please use `save_persistables()` + and `load_persistables()` instead. + + Args: + executor(Executor): The executor to run for loading parameters. + dirname(str): The directory path. + main_program(Program|None): The program whose parameters will be + loaded. If it is None, the default + main program will be used automatically. + Default: None + filename(str|None): The file which saved all parameters. If parameters + were saved in differnet files, set it to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.load_params(executor=exe, dirname=param_path, + main_program=None) """ load_vars( executor, @@ -233,7 +453,37 @@ def load_params(executor, dirname, main_program=None, filename=None): def load_persistables(executor, dirname, main_program=None, filename=None): """ - load all persistables from directory by executor. + This function filters out all variables with `persistable==True` from the + give `main_program` and then trys to load these variables from the folder + `dirname` or the file `filename`. + + Use the `dirname` to specify the folder where persistable variables were + saved. If variables were saved in separate files, set `filename` None; + if all variables were saved in a single file, use `filename` to specify + the file name. + + Args: + executor(Executor): The executor to run for loading persistable variables. + dirname(str): The directory path. + main_program(Program|None): The program whose persistbale variables will + be loaded. If it is None, the default main + program will be used automatically. + Default: None + filename(str|None): The file which saved all variables. If variables were + saved in differnet files, set it to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.load_persistables(executor=exe, dirname=param_path, + main_program=None) """ load_vars( executor, @@ -306,22 +556,47 @@ def save_inference_model(dirname, model_filename=None, params_filename=None): """ - Build a model especially for inference, - and save it to directory by the executor. + Prune the given `main_program` to build a new program especially for inference, + and then save it and all related parameters to given `dirname` by the `executor`. + + Args: + dirname(str): The directory path to save the inference model. + feeded_var_names(list[str]): Names of variables that need to be feeded data + during inference. + target_vars(list[Variable]): Variables from which we can get inference + results. + executor(Executor): The executor that saves the inference model. + main_program(Program|None): The original program, which will be pruned to + build the inference model. If is setted None, + the default main program will be used. + Default: None. + model_filename(str|None): The name of file to save the inference program + itself. If is setted None, a default filename + `__model__` will be used. + params_filename(str|None): The name of file to save all related parameters. + If it is setted None, parameters will be saved + in separate files . - :param dirname: directory path - :param feeded_var_names: Names of variables that need to be feeded data during inference - :param target_vars: Variables from which we can get inference results. - :param executor: executor that save inference model - :param main_program: original program, which will be pruned to build the inference model. - Default default_main_program(). - :param model_filename: The name of file to save inference program. - If not specified, default filename `__model__` will be used. - :param params_filename: The name of file to save parameters. - It is used for the case that all parameters are saved in a single binary file. - If not specified, parameters are considered saved in separate files. + Returns: + None + + Raises: + ValueError: If `feed_var_names` is not a list of basestring. + ValueError: If `target_vars` is not a list of Variable. + + Examples: + .. code-block:: python + exe = fluid.Executor(fluid.CPUPlace()) + path = "./infer_model" + fluid.io.save_inference_model(dirname=path, feeded_var_names=['img'], + target_vars=[predict_var], executor=exe) + + # In this exsample, the function will prune the default main program + # to make it suitable for infering the `predict_var`. The pruned + # inference program is going to be saved in the "./infer_model/__model__" + # and parameters are going to be saved in separate files under folder + # "./infer_model". - :return: None """ if isinstance(feeded_var_names, basestring): feeded_var_names = [feeded_var_names] @@ -382,18 +657,49 @@ def load_inference_model(dirname, """ Load inference model from a directory - :param dirname: directory path - :param executor: executor that load inference model - :param model_filename: The name of file to load inference program. - If not specified, default filename `__model__` will be used. - :param params_filename: The name of file to load parameters. - It is used for the case that all parameters are saved in a single binary file. - If not specified, parameters are considered saved in separate files. + Args: + dirname(str): The directory path + executor(Executor): The executor to run for loading inference model. + model_filename(str|None): The name of file to load inference program. + If it is None, the default filename + '__model__' will be used. + Default: None + params_filename(str|None): The name of file to load all parameters. + It is only used for the case that all + parameters were saved in a single binary + file. If parameters were saved in separate + files, set it as 'None'. + + Returns: + tuple: The return of this function is a tuple with three elements: + (program, feed_target_names, fetch_targets). The `program` is a + Program, it's the program for inference. The `feed_target_names` is + a list of str, it contains Names of variables that need to feed + data in the inference program. The `fetch_targets` is a list of + Variable. It contains variables from which we can get inference + results. + + Raises: + ValueError: If `dirname` is not a existing directory. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./infer_model" + [inference_program, feed_target_names, fetch_targets] = + fluid.io.load_inference_model(dirname=path, executor=exe) + results = exe.run(inference_program, + feed={feed_target_names[0]: tensor_img}, + fetch_list=fetch_targets) + + # In this exsample, the inference program is saved in the + # "./infer_model/__model__" and parameters were saved in + # separate files in ""./infer_model". + # After getting inference program, feed target names and + # fetch targets, we can use an Executor to run the inference + # program to get the inference result. - :return: [program, feed_target_names, fetch_targets] - program: program especially for inference. - feed_target_names: Names of variables that need to feed data - fetch_targets: Variables from which we can get inference results. """ if not os.path.isdir(dirname): raise ValueError("There is no directory named '%s'", dirname) @@ -424,12 +730,25 @@ def load_inference_model(dirname, def get_parameter_value(para, executor): """ - Get the LoDTensor for the parameter + Get the LoDTensor value of the given parameter. + + Args: + para(Parameter): The parameter to get value from. + executor(Executor): The executor to run for retrieving the value. + + Returns: + numpy.array: The given parameter's values. + + Raises: + AssertionError: If the `para` is not an instance of Parameter. + + Examples: + .. code-block:: python - :param executor: executor for retrieving the value - :param para: the given parameter + exe = fluid.Executor(fluid.CPUPlace()) + param = fluid.default_main_program().global_block().var('fc.w') + p = fluid.io.get_parameter_value(param, exe) - :return: the LoDTensor for the parameter """ assert is_parameter(para) @@ -441,14 +760,30 @@ def get_parameter_value(para, executor): def get_parameter_value_by_name(name, executor, program=None): """ - Get the LoDTensor for paramter with the given name + Get the LoDTensor value of a certain parameter by its name. - :param executor: executor for retrieving the value - :param name: the name of the parameter - :param program: the program where the variable is found - Default default_main_program(). + Args: + name(str): The parameter's name. + executor(Executor): The executor to run for retrieving the value. + program(Program | None): The program where to find the parameter. + If it's set to be None, the function will + try to find the parameter in the default + main program. + + Returns: + numpy.array: The parameter's values. + + Raises: + TypeError: If given `name` is not an instance of basestring. + TypeError: If the parameter with the given name doesn't exist. + AssertionError: If there is a varibale named `name` in the + given program but it is not a Parameter. - :return: the LoDTensor for the variable + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + p = fluid.io.get_parameter_value('fc.w', exe) """ if program is None: program = default_main_program() @@ -469,17 +804,59 @@ def save_checkpoint(executor, trainer_args=None, main_program=None, max_num_checkpoints=3): - """ - Save Checkpoint will save persistable LodTensor variables from main_program in checkpoint directory, - the directory named by serial number from 0 to (n -1), save_checkpoint use LRU strategy - to keep numbers of checkpoint directory, the numbers of checkpoint directory are max_num_checkpoints at most, - The interval between two saved checkpoints must greater than save_interval_secs. + """" + This function filters out all checkpoint variables from the give + main_program and then saves these variables to the 'checkpoint_dir' + directory. + + In the training precess, we generally save a checkpoint in each + iteration. So there might be a lot of checkpoints in the + 'checkpoint_dir'. To avoid them taking too much disk space, the + `max_num_checkpoints` are introduced to limit the total number of + checkpoints. If the number of existing checkpints is greater than + the `max_num_checkpoints`, the oldest ones will be scroll deleted. + + A variable is a checkpoint variable and will be loaded if it meets + all the following conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - :param executor executor for save the value - :param checkpoint_dir the checkpoint directory - :param trainer_id currect trainer id, if id is equal to 0, the trainer is chief - :param main_program will save all variables in program - :param max_num_checkpoints will keep numbers of checkpoint serials not bigger than max_num_checkpoints + Args: + executor(Executor): The executor to run for save checkpoint. + checkpoint_dir(str): The folder where to save checkpoints. + trainer_id(int): currect trainer id, if id is equal to 0, the trainer + is chief. + trainer_args(dict|None): Current training arguments. Such as 'epoch_id' + and 'step_id'. + Defaut: None + main_program(Program|None): The program whose checkpoint variables will + be saved. If it is None, the default main program will be used. + max_num_checkpoints(int): The max number of total number of existing + checkpoints. + Default: 3 + + Returns: + None + + Raises: + ValueError: If `checkpoint_dir` is None. + AssertionError: If `trainer_args` is not a dict. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./checkpoints" + prog = fluid.default_main_program() + trainer_args = {"epoch_id": 200, + "step_id": 20} # just an example + fluid.io.save_checkpoint(executor=exe, + checkpoint_dir=path, + trainer_id=0, + trainer_args=trainer_args, + main_program=prog, + max_num_checkpoints=3) """ if checkpoint_dir is None: raise ValueError("'checkpoint_dir' should not be None") @@ -503,13 +880,50 @@ def save_checkpoint(executor, def load_checkpoint(executor, checkpoint_dir, serial, main_program): """ - Load checkpoint from a directory by executor, - it will find the most recent saved checkpoint file and load it auto. + This function filters out all checkpoint variables from the give + main_program and then try to load these variables from the + 'checkpoint_dir' directory. + + In the training precess, we generally save a checkpoint in each + iteration. So there are more than one checkpoint in the + 'checkpoint_dir'(each checkpoint has its own sub folder), use + 'serial' to specify which serial of checkpoint you would like to + load. + + A variable is a checkpoint variable and will be loaded if it meets + all the following conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for loading checkpoint. + checkpoint_dir(str): The folder where all checkpoints are. + serial(int): The serial of checkpoint you would like to load. + main_program(Program): The program whose checkpoint variables will + be loaded. - :param executor executor for load the value - :param checkpoint_dir the checkpoint directory - :param serial the serial folder in checkpoint directory will be load - :param main_program will load all variables in program + Returns: + None + + Raises: + ValueError: If `checkpoint_dir` is None. + ValueError: If `serial` is None or `serial` is less than 0. + ValueError: If `main_program` is None. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./checkpoints" + prog = fluid.default_main_program() + fluid.io.load_checkpoint(executor=exe, checkpoint_dir=path, + serial=9, main_program=prog) + + # In this example, `load_checkpoint` function + # will first filters out all checkpoint variables in the default + # main program, and then try to load these variables form the + # folder "./checkpoints/checkpoint_9/__model__". """ if checkpoint_dir is None: @@ -528,10 +942,10 @@ def load_checkpoint(executor, checkpoint_dir, serial, main_program): def clean_checkpoint(checkpoint_dir, delete_dir=False): """ clean the checkpoint dir, when the train exits normally, the trainer will call clean_checkpoint to delete checkpoint directory saved before. - delete_dir only works when the directory is empty, otherwise, OSError is raised. + delete_dir only works when the directory is empty, otherwise, OSError is raised. - :param checkpoint_dir - :param delete_dir + : param checkpoint_dir + : param delete_dir """ if checkpoint_dir is None: @@ -547,13 +961,40 @@ def load_persist_vars_without_grad(executor, program, has_model_dir=False): """ - load_persist_vars_without_grad will load variables from a directory by an executor, - the variable named end with "@GRAD" will not be loaded. + This function filters out all checkpoint variables from the give + program and then try to load these variables from the given directory. + + A variable is a checkpoint variable if it meets all the following + conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - :param executor executor for load the value - :param dirname the checkpoint directory - :param program will load all variables in program - :param has_model_dir if has_model_dir is True, will load variables from sub directory named __model__ + Args: + executor(Executor): The executor to run for loading variables. + dirname(str): The directory path. + program(Program): The program whose checkpoint variables will + be loaded. + has_model_dir(bool): if True, the function loads variables + from a sub directory named '__model__'. + Default: False + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.load_persist_vars_without_grad(executor=exe, + dirname=param_path, program=prog, has_model_dir=True) + + # In this example, `load_persist_vars_without_grad` function + # will first filters out all checkpoint variables in the default + # main program, and then trys to load these variables form the + # folder "./my_paddle_model/__model__". """ if has_model_dir: @@ -569,12 +1010,38 @@ def load_persist_vars_without_grad(executor, def save_persist_vars_without_grad(executor, dirname, program): """ - save_persist_vars_without_grad will save variables to a directory by an executor, - the variable named end with "@GRAD" will not be saved. + This function filters out all checkpoint variables from the give + program and then save these variables to a sub-folder '__model__' of + the given directory. + + A variable is a checkpoint variable if it meets all the following + conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for saving variables. + dirname(str): The directory path. + program(Program): The program whose checkpoint variables will + be saved. + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.save_persist_vars_without_grad(executor=exe, + dirname=param_path, program=prog) - :param executor executor for load the value - :param dirname the checkpoint directory - :param program will load all variables in program + # In this example, `save_persist_vars_without_grad` function + # will first filters out all checkpoint variables in the default + # main program, and then saves these variables to the folder + # "./my_paddle_model/__model__". """ cur_dir = _get_model_dir(dirname) save_vars( @@ -620,7 +1087,7 @@ def _is_checkpoint_var(var): the checkpoint will not save or load all the variables. var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded. - :param var + : param var """ if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \ @@ -701,7 +1168,7 @@ def _write_success(dirname): """ write an empty file named "_SUCCESS" in checkpoint dir, indicate this checkpoint is correct. - :param dirname + : param dirname """ success_file = os.path.join(dirname, SUCCESS_MARK_FILENAME) with open(success_file, 'a') as f: @@ -713,7 +1180,7 @@ def get_latest_checkpoint_serial(checkpoint_dir): """ get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory - :param checkpoint_dir + : param checkpoint_dir """ if not checkpoint_dir: return -1 From 9a25f2895cf1b9e65542442a5f3fed666b52b37a Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 19 Jun 2018 11:40:32 +0800 Subject: [PATCH 27/46] update the default cpu memory with MKLDNN --- paddle/fluid/platform/cpu_info.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index c708337f8f..f832d72b53 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -30,7 +30,9 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1, DEFINE_uint64(initial_cpu_memory_in_mb, #ifdef PADDLE_WITH_MKLDNN - 1000, + /* Aligned with mozga-intel, MKLDNN need at least 5000 MB + * to obtain the best performance*/ + 5000, #else 500, #endif From 1473033fb3b38d357fe1b43d4ec45d59ce7cff4d Mon Sep 17 00:00:00 2001 From: Dang Qingqing Date: Tue, 19 Jun 2018 13:27:38 +0800 Subject: [PATCH 28/46] Polish profiler Python API. --- python/paddle/fluid/profiler.py | 117 ++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/profiler.py b/python/paddle/fluid/profiler.py index e2bd1d4c9a..6a321ae024 100644 --- a/python/paddle/fluid/profiler.py +++ b/python/paddle/fluid/profiler.py @@ -42,6 +42,9 @@ def cuda_profiler(output_file, output_mode=None, config=None): counters/options for profiling by `config` argument. The default config is ['gpustarttimestamp', 'gpustarttimestamp', 'gridsize3d', 'threadblocksize', 'streamid', 'enableonstart 0', 'conckerneltrace']. + Then users can use NVIDIA Visual Profiler + (https://developer.nvidia.com/nvidia-visual-profiler) tools to load this + this output file to visualize results. Args: output_file (string) : The output file name, the result will be @@ -50,6 +53,33 @@ def cuda_profiler(output_file, output_mode=None, config=None): Comma separated values format. It should be 'kvp' or 'csv'. config (list of string) : The profiler options and counters can refer to "Compute Command Line Profiler User Guide". + + Raises: + ValueError: If `output_mode` is not in ['kvp', 'csv']. + + Examples: + + .. code-block:: python + + import paddle.fluid as fluid + import paddle.fluid.profiler as profiler + + epoc = 8 + dshape = [4, 3, 28, 28] + data = fluid.layers.data(name='data', shape=[3, 28, 28], dtype='float32') + conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + output_file = 'cuda_profiler.txt' + with profiler.cuda_profiler(output_file, 'csv') as nvprof: + for i in range(epoc): + input = np.random.random(dshape).astype('float32') + exe.run(fluid.default_main_program(), feed={'data': input}) + # then use NVIDIA Visual Profiler (nvvp) to load this output file + # to visualize results. """ if output_mode is None: output_mode = 'csv' @@ -69,19 +99,52 @@ def cuda_profiler(output_file, output_mode=None, config=None): def reset_profiler(): - """The profiler clear interface. - reset_profiler will clear the previous time record. + """ + Clear the previous time record. This interface does not work for + `fluid.profiler.cuda_profiler`, it only works for + `fluid.profiler.start_profiler`, `fluid.profiler.stop_profiler`, + and `fluid.profiler.profiler`. + + Examples: + + .. code-block:: python + + import paddle.fluid.profiler as profiler + with profiler.profiler(state, 'total', '/tmp/profile'): + for iter in range(10): + if iter == 2: + profiler.reset_profiler() + # ... """ core.reset_profiler() def start_profiler(state): - """Enable the profiler. + """ + Enable the profiler. Uers can use `fluid.profiler.start_profiler` and + `fluid.profiler.stop_profiler` to insert the code, except the usage of + `fluid.profiler.profiler` interface. Args: state (string) : The profiling state, which should be 'CPU', 'GPU' or 'All'. 'CPU' means only profile CPU. 'GPU' means profiling GPU as well. 'All' also generates timeline. + + Raises: + ValueError: If `state` is not in ['CPU', 'GPU', 'All']. + + Examples: + + .. code-block:: python + + import paddle.fluid.profiler as profiler + + profiler.start_profiler('GPU') + for iter in range(10): + if iter == 2: + profiler.reset_profiler() + # except each iteration + profiler.stop_profiler('total', '/tmp/profile') """ if core.is_profiler_enabled(): return @@ -97,7 +160,10 @@ def start_profiler(state): def stop_profiler(sorted_key=None, profile_path='/tmp/profile'): - """Stop the profiler. + """ + Stop the profiler. Uers can use `fluid.profiler.start_profiler` and + `fluid.profiler.stop_profiler` to insert the code, except the usage of + `fluid.profiler.profiler` interface. Args: sorted_key (string) : If None, the profiling results will be printed @@ -111,6 +177,23 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'): The `ave` means sorting by the average execution time. profile_path (string) : If state == 'All', it will write a profile proto output file. + + Raises: + ValueError: If `sorted_key` is not in + ['calls', 'total', 'max', 'min', 'ave']. + + Examples: + + .. code-block:: python + + import paddle.fluid.profiler as profiler + + profiler.start_profiler('GPU') + for iter in range(10): + if iter == 2: + profiler.reset_profiler() + # except each iteration + profiler.stop_profiler('total', '/tmp/profile') """ if not core.is_profiler_enabled(): return @@ -137,7 +220,12 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'): Different from cuda_profiler, this profiler can be used to profile both CPU and GPU program. By defalut, it records the CPU and GPU operator kernels, if you want to profile other program, you can refer the profiling tutorial - to add more records. + to add more records in C++ code. + + If the state == 'All', a profile proto file will be written to + `profile_path`. This file records timeline information during the execution. + Then users can visualize this file to see the timeline, please refer + https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/howto/optimization/timeline.md Args: state (string) : The profiling state, which should be 'CPU' or 'GPU', @@ -156,6 +244,25 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'): The `ave` means sorting by the average execution time. profile_path (string) : If state == 'All', it will write a profile proto output file. + + Raises: + ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is + not in ['calls', 'total', 'max', 'min', 'ave']. + + Examples: + + .. code-block:: python + + import paddle.fluid.profiler as profiler + + with profiler.profiler('All', 'total', '/tmp/profile') as prof: + for pass_id in range(pass_num): + for batch_id, data in enumerate(train_reader()): + exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[], + use_program_cache=True) + # ... """ start_profiler(state) yield From 8746725a977994a336d85c9181641294ff86c0a2 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 19 Jun 2018 14:12:49 +0800 Subject: [PATCH 29/46] fix errors --- python/paddle/fluid/io.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 61613ef079..88e7e3bb20 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -407,7 +407,7 @@ def load_vars(executor, def load_params(executor, dirname, main_program=None, filename=None): """ This function filters out all parameters from the give `main_program` - and then try to load these parameters from the folder `dirname` or + and then trys to load these parameters from the folder `dirname` or the file `filename`. Use the `dirname` to specify the folder where parameters were saved. If @@ -586,6 +586,7 @@ def save_inference_model(dirname, Examples: .. code-block:: python + exe = fluid.Executor(fluid.CPUPlace()) path = "./infer_model" fluid.io.save_inference_model(dirname=path, feeded_var_names=['img'], @@ -693,7 +694,7 @@ def load_inference_model(dirname, feed={feed_target_names[0]: tensor_img}, fetch_list=fetch_targets) - # In this exsample, the inference program is saved in the + # In this exsample, the inference program was saved in the # "./infer_model/__model__" and parameters were saved in # separate files in ""./infer_model". # After getting inference program, feed target names and @@ -804,20 +805,20 @@ def save_checkpoint(executor, trainer_args=None, main_program=None, max_num_checkpoints=3): - """" + """ This function filters out all checkpoint variables from the give - main_program and then saves these variables to the 'checkpoint_dir' + main_program and then saves these variables to the `checkpoint_dir` directory. In the training precess, we generally save a checkpoint in each iteration. So there might be a lot of checkpoints in the - 'checkpoint_dir'. To avoid them taking too much disk space, the + `checkpoint_dir`. To avoid them taking too much disk space, the `max_num_checkpoints` are introduced to limit the total number of checkpoints. If the number of existing checkpints is greater than - the `max_num_checkpoints`, the oldest ones will be scroll deleted. + the `max_num_checkpoints`, oldest ones will be scroll deleted. - A variable is a checkpoint variable and will be loaded if it meets - all the following conditions: + A variable is a checkpoint variable and will be saved if it meets + all following conditions: 1. It's persistable. 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". @@ -882,16 +883,16 @@ def load_checkpoint(executor, checkpoint_dir, serial, main_program): """ This function filters out all checkpoint variables from the give main_program and then try to load these variables from the - 'checkpoint_dir' directory. + `checkpoint_dir` directory. In the training precess, we generally save a checkpoint in each iteration. So there are more than one checkpoint in the - 'checkpoint_dir'(each checkpoint has its own sub folder), use - 'serial' to specify which serial of checkpoint you would like to + `checkpoint_dir`(each checkpoint has its own sub folder), use + `serial` to specify which serial of checkpoint you would like to load. A variable is a checkpoint variable and will be loaded if it meets - all the following conditions: + all following conditions: 1. It's persistable. 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". @@ -962,9 +963,9 @@ def load_persist_vars_without_grad(executor, has_model_dir=False): """ This function filters out all checkpoint variables from the give - program and then try to load these variables from the given directory. + program and then trys to load these variables from the given directory. - A variable is a checkpoint variable if it meets all the following + A variable is a checkpoint variable if it meets all following conditions: 1. It's persistable. 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. @@ -1014,7 +1015,7 @@ def save_persist_vars_without_grad(executor, dirname, program): program and then save these variables to a sub-folder '__model__' of the given directory. - A variable is a checkpoint variable if it meets all the following + A variable is a checkpoint variable if it meets all following conditions: 1. It's persistable. 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. From efcbe27263d858dab56ed887b782b2a1e00c318d Mon Sep 17 00:00:00 2001 From: Xin Pan Date: Tue, 19 Jun 2018 14:37:47 +0800 Subject: [PATCH 30/46] Refine detection_map doc. --- paddle/fluid/operators/detection_map_op.cc | 12 +++--- python/paddle/fluid/layers/detection.py | 45 +++++++++++++++++++++- 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index 716c8625d3..d7f49a9590 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -175,12 +175,12 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( Detection mAP evaluate operator. The general steps are as follows. First, calculate the true positive and - false positive according to the input of detection and labels, then - calculate the mAP evaluate value. - Supporting '11 point' and 'integral' mAP algorithm. Please get more information - from the following articles: - https://sanchom.wordpress.com/tag/average-precision/ - https://arxiv.org/abs/1512.02325 +false positive according to the input of detection and labels, then +calculate the mAP evaluate value. +Supporting '11 point' and 'integral' mAP algorithm. Please get more information +from the following articles: +https://sanchom.wordpress.com/tag/average-precision/ +https://arxiv.org/abs/1512.02325 )DOC"); } diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index d5471d182b..200db87f17 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -16,7 +16,7 @@ All layers just related to the detection neural network. """ from layer_function_generator import generate_layer_fn -from layer_function_generator import autodoc +from layer_function_generator import autodoc, templatedoc from ..layer_helper import LayerHelper import tensor import nn @@ -155,7 +155,7 @@ def detection_output(loc, return nmsed_outs -@autodoc() +@templatedoc() def detection_map(detect_res, label, class_num, @@ -166,6 +166,47 @@ def detection_map(detect_res, input_states=None, out_states=None, ap_version='integral'): + """ + ${comment} + + Args: + detect_res: ${detect_res_comment} + label: ${label_comment} + class_num: ${class_num_comment} + background_label: ${background_label_comment} + overlap_threshold: ${overlap_threshold_comment} + evaluate_difficult: ${evaluate_difficult_comment} + has_state: ${has_state_comment} + input_states: If not None, It contains 3 elements: + 1. pos_count ${pos_count_comment}. + 2. true_pos ${true_pos_comment}. + 3. false_pos ${false_pos_comment}. + out_states: If not None, it contains 3 elements. + 1. accum_pos_count ${accum_pos_count_comment}. + 2. accum_true_pos ${accum_true_pos_comment}. + 3. accum_false_pos ${accum_false_pos_comment}. + ap_version: ${ap_type_comment} + + Returns: + ${map_comment} + + + Examples: + .. code-block:: python + + detect_res = fluid.layers.data( + name='detect_res', + shape=[10, 6], + append_batch_size=False, + dtype='float32') + label = fluid.layers.data( + name='label', + shape=[10, 6], + append_batch_size=False, + dtype='float32') + + map_out = fluid.layers.detection_map(detect_res, label, 21) + """ helper = LayerHelper("detection_map", **locals()) def __create_var(type): From a29cb4be2afcc983e4609e3efc7981413cfc6551 Mon Sep 17 00:00:00 2001 From: Qiyang Min Date: Tue, 19 Jun 2018 01:50:35 -0500 Subject: [PATCH 31/46] Fix decay bug (#11520) * Add sub_blocks of lr_decay_op to pserver_prog after distribute_transpiler * Remove unused logs and logics * 1. Add ops to new block (considering the nested block condition) 2. Follow the original hierarchy of blocks 3. Change the function's name and remove debug lines --- paddle/fluid/framework/executor.cc | 5 +- python/paddle/fluid/framework.py | 8 ++- .../fluid/transpiler/distribute_transpiler.py | 58 +++++++++++++++++-- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 429482bd03..b30a9806eb 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -295,13 +295,14 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, std::unique_ptr Executor::Prepare( const ProgramDesc& program, int block_id) { - auto* ctx = new ExecutorPrepareContext(program, block_id); + std::unique_ptr ctx( + new ExecutorPrepareContext(program, block_id)); PADDLE_ENFORCE_LT(static_cast(block_id), program.Size()); auto& block = program.Block(block_id); for (auto& op_desc : block.AllOps()) { ctx->ops_.push_back(OpRegistry::CreateOp(*op_desc)); } - return std::unique_ptr(ctx); + return ctx; } std::vector> Executor::Prepare( diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index df0625649d..42d3c9c153 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -644,7 +644,13 @@ class Operator(object): def set_attr(self, name, val): self.attrs[name] = val - self.desc.set_attr(name, val) + if isinstance(val, Block): + self.desc.set_block_attr(name, val.desc) + elif isinstance(val, core.BlockDesc) or \ + isinstance(val, core.ProgramDesc): + self.desc.set_serialized_attr(name, val.serialize_to_string()) + else: + self.desc.set_attr(name, val) @property def attr_names(self): diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 9c604170b8..99146bcfe5 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -24,7 +24,7 @@ Steps to transpile trainer: 1. split variable to multiple blocks, aligned by product(dim[1:]) (width). 2. rename splited grad variables to add trainer_id suffix ".trainer_%d". 3. modify trainer program add split_op to each grad variable. -4. append send_op to send splited variables to server and +4. append send_op to send splited variables to server and 5. add recv_op to fetch params(splited blocks or origin param) from server. 6. append concat_op to merge splited blocks to update local weights. @@ -44,7 +44,7 @@ import numpy as np from ps_dispatcher import RoundRobin, HashName, PSDispatcher from .. import core, framework from ..framework import Program, default_main_program, \ - default_startup_program, \ + default_startup_program, Block, \ Variable, Parameter, grad_var_name from details import * @@ -471,7 +471,7 @@ class DistributeTranspiler: self._append_pserver_ops(block, op, endpoint, grad_to_block_id, self.origin_program, merged_var) else: - self._append_pserver_non_opt_ops(block, op, endpoint) + self._append_pserver_non_opt_ops(block, op) def __op_have_grad_input__(op): for varname in op.input_arg_names: @@ -479,13 +479,39 @@ class DistributeTranspiler: return varname return "" + def __clone_lr_op_sub_block__(op, program, new_block): + if not op.has_attr('sub_block'): + return + + origin_block_desc = op.attr('sub_block') + origin_block = self.origin_program.block(origin_block_desc.id) + assert isinstance(origin_block, Block) + # we put the new sub block to new block to follow the block + # hierarchy of the original blocks + new_sub_block = program.create_block(new_block.idx) + + # clone vars + for var in origin_block.vars: + new_sub_block.clone_variable(var) + + # clone ops + for op in origin_block.ops: + self._clone_lr_op(program, new_sub_block, op) + # clone sub_block of op + __clone_lr_op_sub_block__(op, program, new_sub_block) + + # reset the block of op + op.set_attr('sub_block', new_sub_block) + # append lr decay ops to the child block if exists lr_ops = self._get_lr_ops() if len(lr_ops) > 0: lr_decay_block = pserver_program.create_block( pserver_program.num_blocks - 1) for _, op in enumerate(lr_ops): - self._append_pserver_non_opt_ops(lr_decay_block, op, endpoint) + self._append_pserver_non_opt_ops(lr_decay_block, op) + # append sub blocks to pserver_program in lr_decay_op + __clone_lr_op_sub_block__(op, pserver_program, lr_decay_block) # append op to the current block grad_to_block_id = [] @@ -1116,7 +1142,29 @@ class DistributeTranspiler: break return grad_block - def _append_pserver_non_opt_ops(self, optimize_block, opt_op, endpoint): + def _clone_lr_op(self, program, block, op): + inputs = self._get_input_map_from_op( + self.origin_program.global_block().vars, op) + for key, varlist in inputs.iteritems(): + if not isinstance(varlist, list): + varlist = [varlist] + for var in varlist: + if var not in program.global_block().vars: + block.clone_variable(var) + + outputs = self._get_output_map_from_op( + self.origin_program.global_block().vars, op) + for key, varlist in outputs.iteritems(): + if not isinstance(varlist, list): + varlist = [varlist] + for var in varlist: + if var not in program.global_block().vars: + block.clone_variable(var) + + block.append_op( + type=op.type, inputs=inputs, outputs=outputs, attrs=op.attrs) + + def _append_pserver_non_opt_ops(self, optimize_block, opt_op): program = optimize_block.program # Append the ops for parameters that do not need to be optimized/updated inputs = self._get_input_map_from_op( From 96b4904d2fe126b8e29408ae84923714c02ef5ef Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Tue, 12 Jun 2018 10:38:37 +0200 Subject: [PATCH 32/46] MKLDNN layout: Support for sum operator --- paddle/fluid/operators/parallel_do_op.cc | 2 +- paddle/fluid/operators/recurrent_op.cc | 3 +- paddle/fluid/operators/sum_mkldnn_op.cc | 242 ++++++++++++++++++ paddle/fluid/operators/sum_op.cc | 32 ++- paddle/fluid/operators/while_op.cc | 4 +- python/paddle/fluid/backward.py | 11 +- python/paddle/fluid/layers/nn.py | 143 ++++++----- python/paddle/fluid/layers/tensor.py | 30 ++- .../fluid/transpiler/distribute_transpiler.py | 6 +- python/paddle/reader/decorator.py | 4 +- 10 files changed, 375 insertions(+), 102 deletions(-) create mode 100644 paddle/fluid/operators/sum_mkldnn_op.cc diff --git a/paddle/fluid/operators/parallel_do_op.cc b/paddle/fluid/operators/parallel_do_op.cc index 1012640d5e..c9744db3d0 100644 --- a/paddle/fluid/operators/parallel_do_op.cc +++ b/paddle/fluid/operators/parallel_do_op.cc @@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase { auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}}, - framework::AttributeMap{}); + framework::AttributeMap{{"use_mkldnn", {false}}}); VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]); sum_op->Run(*sub_scopes[0], places[0]); WaitOnPlace(places[0]); diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index 9c1cee7022..162bfcbb08 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase { auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, - {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); + {{"Out", {pg_names[param_id]}}}, + framework::AttributeMap{{"use_mkldnn", {false}}}); sum_op->Run(cur_scope, place); cur_scope.Rename(new_inside_name, inside_grad_name); diff --git a/paddle/fluid/operators/sum_mkldnn_op.cc b/paddle/fluid/operators/sum_mkldnn_op.cc new file mode 100644 index 0000000000..1f0c3ab023 --- /dev/null +++ b/paddle/fluid/operators/sum_mkldnn_op.cc @@ -0,0 +1,242 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*Licensed under the Apache License, Version 2.0(the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "mkldnn.hpp" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/math/selected_rows_functor.h" +#include "paddle/fluid/operators/sum_op.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/mkldnn_helper.h" + +namespace paddle { +namespace operators { + +using paddle::framework::Tensor; +using paddle::platform::MKLDNNDeviceContext; +using paddle::platform::CPUDeviceContext; +using framework::DataLayout; +using mkldnn::memory; +using mkldnn::primitive; +using mkldnn::stream; +using mkldnn::sum; +using mkldnn::reorder; +using platform::to_void_cast; + +template +class SumMKLDNNOpKernel : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), + "It must use CPUPlace."); + auto& dev_ctx = ctx.template device_context(); + const auto& mkldnn_engine = dev_ctx.GetEngine(); + + auto in_vars = ctx.MultiInputVar("X"); + + const int N = in_vars.size(); + auto out_var = ctx.OutputVar("Out"); + bool in_place = out_var == in_vars[0]; + + if (out_var->IsType()) { + LoDTensor* output = ctx.Output("Out"); + T* output_data = output->mutable_data(ctx.GetPlace()); + + std::vector dst_tz = framework::vectorize2int(output->dims()); + auto src_tz = dst_tz; + memory::format output_format{memory::format::format_undef}; + std::vector scales; + std::vector srcs_mpd; + std::vector srcs_mem; + + PADDLE_ENFORCE(in_vars[0]->IsType(), + "Input[0] must be LoDTensors"); + auto& input0 = in_vars[0]->Get(); + PADDLE_ENFORCE(input0.layout() == DataLayout::kMKLDNN && + input0.format() != memory::format::format_undef, + "Wrong layout/format for inputs[0]"); + + memory::format input_format = input0.format(); + + if (src_tz.size() == 1 && (input_format == memory::format::nchw || + input_format == memory::format::nhwc)) { + input_format = memory::format::x; + } + if (src_tz.size() == 2 && (input_format == memory::format::nchw || + input_format == memory::format::nhwc)) { + input_format = memory::format::nc; + } + + for (int i = in_place ? 1 : 0; i < N; i++) { + PADDLE_ENFORCE(in_vars[i]->IsType(), + "all inputs must be all LoDTensors"); + auto& input = in_vars[i]->Get(); + PADDLE_ENFORCE(input.layout() == DataLayout::kMKLDNN && + input.format() != memory::format::format_undef, + "Wrong layout/format for inputs"); + + if (input.numel() == 0) { + continue; + } + + const T* input_data = input.data(); + + auto src_md = + memory::desc(src_tz, memory::data_type::f32, input_format); + auto src_mpd = memory::primitive_desc(src_md, mkldnn_engine); + auto src_mem = memory(src_mpd, to_void_cast(input_data)); + srcs_mpd.push_back(src_mpd); + srcs_mem.push_back(src_mem); + scales.push_back(1.0); + } + + auto dst_md = + memory::desc(dst_tz, memory::data_type::f32, memory::format::any); + + auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd); + + std::shared_ptr dst_mem; + if (in_place) + dst_mem.reset(new memory(sum_pd.dst_primitive_desc())); + else + dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data)); + + std::vector inputs; + for (size_t i = 0; i < srcs_mem.size(); ++i) { + inputs.push_back(srcs_mem[i]); + } + + auto sum_prim = mkldnn::sum(sum_pd, inputs, *dst_mem); + output_format = + (memory::format)sum_pd.dst_primitive_desc().desc().data.format; + + primitive reorder_prim; + std::shared_ptr target_mem; + if (in_place) { + output_format = input_format; + target_mem.reset(new memory( + {{{src_tz}, memory::data_type::f32, output_format}, mkldnn_engine}, + output_data)); + reorder_prim = reorder(*dst_mem, *target_mem); + } + + std::vector pipeline; + pipeline.push_back(sum_prim); + if (in_place) pipeline.push_back(reorder_prim); + stream(stream::kind::eager).submit(pipeline).wait(); + + output->set_layout(DataLayout::kMKLDNN); + output->set_format(output_format); + } else if (out_var->IsType()) { + // TODO(@mozga-intel) Add MKLDNN SelectedRows support + std::unique_ptr in0; + if (in_place) { + // If is in_place, we store the input[0] to in0 + auto& in_sel0 = in_vars[0]->Get(); + auto& rows = in_sel0.rows(); + in0.reset(new framework::SelectedRows(rows, in_sel0.height())); + in0->mutable_value()->ShareDataWith(in_sel0.value()); + } + + auto get_selected_row = [&](size_t i) -> const SelectedRows& { + if (i == 0 && in0) { + return *in0.get(); + } else { + return in_vars[i]->Get(); + } + }; + auto* out = ctx.Output("Out"); + out->mutable_rows()->clear(); + auto* out_value = out->mutable_value(); + + // Runtime InferShape + size_t first_dim = 0; + for (int i = 0; i < N; i++) { + auto& sel_row = get_selected_row(i); + first_dim += sel_row.rows().size(); + } + auto in_dim = + framework::vectorize(get_selected_row(N - 1).value().dims()); + in_dim[0] = static_cast(first_dim); + + out_value->Resize(framework::make_ddim(in_dim)); + + // if all the input sparse vars are empty, no need to + // merge these vars. + if (first_dim == 0UL) { + return; + } + out_value->mutable_data(ctx.GetPlace()); + math::SelectedRowsAddTo functor; + int64_t offset = 0; + for (int i = 0; i < N; i++) { + auto& sel_row = get_selected_row(i); + if (sel_row.rows().size() == 0) { + continue; + } + PADDLE_ENFORCE_EQ(out->height(), sel_row.height()); + functor(ctx.template device_context(), sel_row, + offset, out); + offset += sel_row.value().numel(); + } + } else if (out_var->IsType()) { + // TODO(@mozga-intel) Add MKLDNN LoDTensorArray support + auto& out_array = *out_var->GetMutable(); + for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) { + PADDLE_ENFORCE(in_vars[i]->IsType(), + "Only support all inputs are TensorArray"); + auto& in_array = in_vars[i]->Get(); + + for (size_t i = 0; i < in_array.size(); ++i) { + if (in_array[i].numel() != 0) { + if (i >= out_array.size()) { + out_array.resize(i + 1); + } + if (out_array[i].numel() == 0) { + framework::TensorCopy(in_array[i], in_array[i].place(), + ctx.device_context(), &out_array[i]); + out_array[i].set_lod(in_array[i].lod()); + } else { + PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod()); + auto in = EigenVector::Flatten(in_array[i]); + auto result = EigenVector::Flatten(out_array[i]); + result.device(*ctx.template device_context() + .eigen_device()) = result + in; + } + } + } + } + } else { + PADDLE_THROW("Unexpected branch, output variable type is %s", + out_var->Type().name()); + } + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_KERNEL(sum, MKLDNN, ::paddle::platform::CPUPlace, + paddle::operators::SumMKLDNNOpKernel); diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index 863baba9ea..fe7c7039c7 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -18,6 +18,10 @@ limitations under the License. */ #include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/operators/detail/safe_ref.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + namespace paddle { namespace operators { using framework::Tensor; @@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { auto x_vars = ctx.MultiInputVar("X"); + + framework::LibraryType library{framework::LibraryType::kPlain}; + framework::DataLayout layout{framework::DataLayout::kAnyLayout}; + +#ifdef PADDLE_WITH_MKLDNN + if (library == framework::LibraryType::kPlain && + platform::CanMKLDNNBeUsed(ctx)) { + library = framework::LibraryType::kMKLDNN; + layout = framework::DataLayout::kMKLDNN; + } +#endif + if (x_vars[0]->IsType()) { int dtype = -1; for (auto& x_var : x_vars) { @@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel { "Sum operator should have at least one tensor"); return framework::OpKernelType( - static_cast(dtype), - ctx.device_context()); + static_cast(dtype), ctx.GetPlace(), + layout, library); } else if (x_vars[0]->IsType()) { for (auto& var : x_vars) { auto& value = var->Get().value(); if (value.IsInitialized()) { return framework::OpKernelType(framework::ToDataType(value.type()), - ctx.device_context()); + ctx.device_context(), layout, library); } } // if input sparse vars are not initialized, use an default kernel type. return framework::OpKernelType(framework::proto::VarType::FP32, - ctx.device_context()); + ctx.device_context(), layout, library); } else if (x_vars[0]->IsType()) { for (auto& x_var : x_vars) { auto& array = x_var->Get(); for (auto& each : array) { if (each.numel() != 0) { return framework::OpKernelType(framework::ToDataType(each.type()), - ctx.device_context()); + ctx.device_context(), layout, + library); } } } @@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "(vector) The input tensors of sum operator.") .AsDuplicable(); AddOutput("Out", "(Tensor) The output tensor of sum operator.").Reuse("X"); + AddAttr("use_mkldnn", + "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false); AddComment(R"DOC( Sum operator. @@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference { framework::BlockDesc* block) const override { auto& inputs = op_desc.Input("X"); auto var_type = framework::proto::VarType::SELECTED_ROWS; - for (auto& name : op_desc.Input("X")) { VLOG(10) << name << " " << block->FindRecursiveOrCreateVar(name).GetType(); @@ -206,6 +225,7 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker, ops::SumOpVarTypeInference); + REGISTER_OP_CPU_KERNEL( sum, ops::SumKernel, ops::SumKernel, diff --git a/paddle/fluid/operators/while_op.cc b/paddle/fluid/operators/while_op.cc index 175c3ac5d7..f440058e8d 100644 --- a/paddle/fluid/operators/while_op.cc +++ b/paddle/fluid/operators/while_op.cc @@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase { ->set_lod(inside_tensor.lod()); } } - auto new_inside_name = cur_scope.Rename(inside_grad_name); auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, - {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); + {{"Out", {pg_names[param_id]}}}, + framework::AttributeMap{{"use_mkldnn", {false}}}); sum_op->Run(cur_scope, dev_place); cur_scope.Rename(new_inside_name, inside_grad_name); } diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 4f9622d04d..19c9b2fad4 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs): for idx, op_desc in enumerate(op_descs): for var_name in op_desc.input_arg_names(): if len(renamed_vars[var_name]) > 1: - pending_sum_ops.append( - (_create_op_desc_("sum", {"X": renamed_vars[var_name]}, - {"Out": [var_name]}, {}), idx)) + pending_sum_ops.append((_create_op_desc_( + "sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]}, + {"use_mkldnn": False}), idx)) renamed_vars[var_name] = [var_name] for var_name in op_desc.output_arg_names(): if var_name == core.empty_var_name( @@ -161,8 +161,9 @@ def _addup_repetitive_outputs_(op_descs): renamed_vars[var_name].append(new_name) for var_name, inputs in renamed_vars.iteritems(): if len(inputs) > 1: - pending_sum_ops.append((_create_op_desc_( - "sum", {"X": inputs}, {"Out": [var_name]}, {}), len(op_descs))) + pending_sum_ops.append( + (_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]}, + {"use_mkldnn": False}), len(op_descs))) # sum_op descs are sorted according to their insert position for p in reversed(pending_sum_ops): op_descs.insert(p[1], p[0]) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index f6f188df0d..aaba7b55f0 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -All layers just related to the neural network. +All layers just related to the neural network. """ from ..layer_helper import LayerHelper @@ -108,14 +108,14 @@ def fc(input, """ **Fully Connected Layer** - This function creates a fully connected layer in the network. It can take - multiple tensors as its inputs. It creates a variable called weights for - each input tensor, which represents a fully connected weight matrix from - each input unit to each output unit. The fully connected layer multiplies - each input tensor with its coresponding weight to produce an output Tensor. - If multiple input tensors are given, the results of multiple multiplications - will be sumed up. If bias_attr is not None, a bias variable will be created - and added to the output. Finally, if activation is not None, it will be applied + This function creates a fully connected layer in the network. It can take + multiple tensors as its inputs. It creates a variable called weights for + each input tensor, which represents a fully connected weight matrix from + each input unit to each output unit. The fully connected layer multiplies + each input tensor with its coresponding weight to produce an output Tensor. + If multiple input tensors are given, the results of multiple multiplications + will be sumed up. If bias_attr is not None, a bias variable will be created + and added to the output. Finally, if activation is not None, it will be applied to the output as well. This process can be formulated as follows: @@ -197,7 +197,10 @@ def fc(input, else: pre_bias = helper.create_tmp_variable(dtype) helper.append_op( - type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias}) + type="sum", + inputs={"X": mul_results}, + outputs={"Out": pre_bias}, + attrs={"use_mkldnn": use_mkldnn}) # add bias pre_activation = helper.append_bias_op(pre_bias, dim_start=num_flatten_dims) # add activation @@ -846,7 +849,7 @@ def crf_decoding(input, param_attr, label=None): Returns: Variable: ${viterbi_path_comment} - + Examples: .. code-block:: python @@ -1084,7 +1087,7 @@ def chunk_eval(input, Here is a NER example of labeling for these tagging schemes: .. code-block:: python - + ====== ====== ====== ===== == ============ ===== ===== ===== == ========= Li Ming works at Agricultural Bank of China in Beijing. ====== ====== ====== ===== == ============ ===== ===== ===== == ========= @@ -1110,7 +1113,7 @@ def chunk_eval(input, is the num of chunk types, and `tag_type` get its value from the following table. .. code-block:: python - + Scheme Begin Inside End Single plain 0 - - - IOB 0 1 - - @@ -1146,7 +1149,7 @@ def chunk_eval(input, tuple: tuple containing: precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks - + Examples: .. code-block:: python @@ -1246,7 +1249,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True): """ This function computes the softmax activation among all time-steps for each sequence. The dimension of each time-step should be 1. Thus, the shape of - input Tensor can be either :math:`[N, 1]` or :math:`[N]`, where :math:`N` + input Tensor can be either :math:`[N, 1]` or :math:`[N]`, where :math:`N` is the sum of the length of all sequences. For i-th sequence in a mini-batch: @@ -1266,7 +1269,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True): param_attr (ParamAttr|None): attributes for parameter use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \ library is installed. Default: True - + Returns: Variable: output of sequence_softmax @@ -1827,11 +1830,11 @@ def pool2d(input, ${comment} Args: - input (Variable): The input tensor of pooling operator. The format of - input tensor is NCHW, where N is batch size, C is - the number of channels, H is the height of the + input (Variable): The input tensor of pooling operator. The format of + input tensor is NCHW, where N is batch size, C is + the number of channels, H is the height of the feature, and W is the width of the feature. - pool_size (int): The side length of pooling windows. All pooling + pool_size (int): The side length of pooling windows. All pooling windows are squares with pool_size on a side. pool_type: ${pooling_type_comment} pool_stride (int): stride of the pooling layer. @@ -1840,7 +1843,7 @@ def pool2d(input, use_cudnn: ${use_cudnn_comment} ceil_mode: ${ceil_mode_comment} use_mkldnn: ${use_mkldnn_comment} - name (str|None): A name for this layer(optional). If set None, the + name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. Returns: @@ -1858,10 +1861,10 @@ def pool2d(input, data = fluid.layers.data( name='data', shape=[3, 32, 32], dtype='float32') conv2d = fluid.layers.pool2d( - input=data, - pool_size=2, - pool_type='max', - pool_stride=1, + input=data, + pool_size=2, + pool_type='max', + pool_stride=1, global_pooling=False) """ if pool_type not in ["max", "avg"]: @@ -2226,14 +2229,14 @@ def beam_search_decode(ids, scores, name=None): This layers is to pack the output of beam search layer into sentences and associated scores. It is usually called after the beam search layer. Typically, the output of beam search layer is a tensor of selected ids, with - a tensor of the score of each id. Beam search layer's output ids, however, - are generated directly during the tree search, and they are stacked by each - level of the search tree. Thus we need to reorganize them into sentences, + a tensor of the score of each id. Beam search layer's output ids, however, + are generated directly during the tree search, and they are stacked by each + level of the search tree. Thus we need to reorganize them into sentences, based on the score of each id. This layer takes the output of beam search layer as input and repack them into sentences. Args: - ids (Variable): The selected ids, output of beam search layer. + ids (Variable): The selected ids, output of beam search layer. scores (Variable): The associated scores of the ids, out put of beam search layer. name (str): The name of this layer. It is optional. @@ -2241,7 +2244,7 @@ def beam_search_decode(ids, scores, name=None): Returns: tuple(Variable): a tuple of two output tensors: sentence_ids, sentence_scores. sentence_ids is a tensor with shape [size, length], where size is the - beam size of beam search, and length is the length of each sentence. + beam size of beam search, and length is the length of each sentence. Note that the length of sentences may vary. sentence_scores is a tensor with the same shape as sentence_ids. @@ -2901,7 +2904,7 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None): `None`, compute the mean over all elements of :attr:`input` and return a variable with a single element, otherwise it must be in the range :math:`[-rank(input), rank(input))`. If - :math:`dim[i] < 0`, the dimension to reduce is + :math:`dim[i] < 0`, the dimension to reduce is :math:`rank(input) + dim[i]`. keep_dim (bool): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension @@ -3372,16 +3375,16 @@ def topk(input, k, name=None): Args: input(Variable): The input variable which can be a vector or Tensor with higher rank. - k(int): The number of top elements to look for along the last dimension + k(int): The number of top elements to look for along the last dimension of input. name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + will be named automatically. Default: None Returns: - Tuple[Variable]: A tuple with two elements. Each element is a Variable. - The first one is k largest elements along each last - dimensional slice. The second one is indices of values + Tuple[Variable]: A tuple with two elements. Each element is a Variable. + The first one is k largest elements along each last + dimensional slice. The second one is indices of values within the last dimension of input. Raises: @@ -3576,15 +3579,15 @@ def warpctc(input, label, blank=0, norm_by_times=False): It's shape is [Lp, num_classes + 1], where Lp is the sum of all input sequences' length and num_classes is the true number of classes. (not including the blank label). - label (Variable): The ground truth of variable-length sequence, + label (Variable): The ground truth of variable-length sequence, which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1], where Lg is th sum of all labels' length. blank (int, default 0): The blank label index of Connectionist Temporal Classification (CTC) loss, which is in the half-opened interval [0, num_classes + 1). - norm_by_times(bool, default false): Whether to normalize the gradients - by the number of time-step, which is also the sequence's length. - There is no need to normalize the gradients if warpctc layer was + norm_by_times(bool, default false): Whether to normalize the gradients + by the number of time-step, which is also the sequence's length. + There is no need to normalize the gradients if warpctc layer was follewed by a mean_op. Returns: @@ -3690,8 +3693,8 @@ def nce(input, input (Variable): input variable. label (Variable): label. num_total_classes (int):${num_total_classes_comment} - sample_weight (Variable|None): A Variable of shape [batch_size, 1] - storing a weight for each sample. The default weight for each + sample_weight (Variable|None): A Variable of shape [batch_size, 1] + storing a weight for each sample. The default weight for each sample is 1.0. param_attr (ParamAttr|None): attributes for parameter bias_attr (ParamAttr|None): attributes for bias @@ -4081,7 +4084,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`. It takes the first dimension of :attr:`x` and :attr:`y` as batch size. For each instance, it computes the smooth L1 loss element by element first - and then sums all the losses. So the shape of ouput Variable is + and then sums all the losses. So the shape of ouput Variable is [batch_size, 1]. Args: @@ -4090,14 +4093,14 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): y (Variable): A tensor with rank at least 2. The target value of smooth L1 loss op with same shape as :attr:`x`. inside_weight (Variable|None): A tensor with rank at least 2. This - input is optional and should have same shape with :attr:`x`. If - provided, the result of (:attr:`x` - :attr:`y`) will be multiplied + input is optional and should have same shape with :attr:`x`. If + provided, the result of (:attr:`x` - :attr:`y`) will be multiplied by this tensor element by element. outside_weight (Variable|None): A tensor with rank at least 2. This - input is optional and should have same shape with :attr:`x`. If - provided, the out smooth L1 loss will be multiplied by this tensor + input is optional and should have same shape with :attr:`x`. If + provided, the out smooth L1 loss will be multiplied by this tensor element by element. - sigma (float|None): Hyper parameter of smooth L1 loss layer. A float + sigma (float|None): Hyper parameter of smooth L1 loss layer. A float scalar with default value 1.0. Returns: @@ -4143,7 +4146,7 @@ def one_hot(input, depth): Examples: .. code-block:: python - + label = layers.data(name="label", shape=[1], dtype="float32") one_hot_label = layers.one_hot(input=label, depth=10) """ @@ -4297,10 +4300,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): def lod_reset(x, y=None, target_lod=None): """ Set LoD of :attr:`x` to a new one specified by :attr:`y` or - :attr:`target_lod`. When :attr:`y` provided, :attr:`y.lod` would be - considered as target LoD first, otherwise :attr:`y.data` would be - considered as target LoD. If :attr:`y` is not provided, target LoD should - be specified by :attr:`target_lod`. If target LoD is specified by + :attr:`target_lod`. When :attr:`y` provided, :attr:`y.lod` would be + considered as target LoD first, otherwise :attr:`y.data` would be + considered as target LoD. If :attr:`y` is not provided, target LoD should + be specified by :attr:`target_lod`. If target LoD is specified by :attr:`Y.data` or :attr:`target_lod`, only one level LoD is supported. .. code-block:: text @@ -4354,7 +4357,7 @@ def lod_reset(x, y=None, target_lod=None): Args: x (Variable): Input variable which could be a Tensor or LodTensor. - y (Variable|None): If provided, output's LoD would be derived + y (Variable|None): If provided, output's LoD would be derived from :attr:`y`. target_lod (list|tuple|None): One level LoD which should be considered as target LoD when :attr:`y` not provided. @@ -4670,7 +4673,7 @@ def image_resize(input, """ **Resize a Batch of Images** - The input must be a tensor of the shape (num_batches, channels, in_h, in_w), + The input must be a tensor of the shape (num_batches, channels, in_h, in_w), and the resizing only applies on the last two dimensions(hight and width). Supporting resample methods: @@ -4766,9 +4769,9 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): def image_resize_short(input, out_short_len, resample='BILINEAR'): """ - Resize a batch of images. The short edge of input images will be - resized to the given 'out_short_len'. The long edge of input images - will be resized proportionately to make images' length-width ratio + Resize a batch of images. The short edge of input images will be + resized to the given 'out_short_len'. The long edge of input images + will be resized proportionately to make images' length-width ratio constant. Args: @@ -4801,7 +4804,7 @@ def gather(input, index): """ **Gather Layer** - Output is obtained by gathering entries of the outer-most dimension + Output is obtained by gathering entries of the outer-most dimension of X indexed by `index` and concatenate them together. .. math:: @@ -4826,7 +4829,7 @@ def gather(input, index): [5, 6]] Args: - input (Variable): The source input with rank>=1. + input (Variable): The source input with rank>=1. index (Variable): The index input with rank=1. Returns: @@ -4862,7 +4865,7 @@ def random_crop(x, shape, seed=None): Returns: ${out_comment} - + Examples: >>> img = fluid.layers.data("img", [3, 256, 256]) >>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224]) @@ -4908,7 +4911,7 @@ def log(x): Out = \\ln(x) Args: - x (Variable): Input tensor. + x (Variable): Input tensor. Returns: Variable: The natural log of the input tensor computed element-wise. @@ -4937,7 +4940,7 @@ def relu(x): Out = \\max(0, x) Args: - x (Variable): The input tensor. + x (Variable): The input tensor. Returns: Variable: The output tensor with the same shape as input. @@ -4958,15 +4961,15 @@ def relu(x): def mean_iou(input, label, num_classes): """ Mean Intersection-Over-Union is a common evaluation metric for - semantic image segmentation, which first computes the IOU for each - semantic class and then computes the average over classes. - IOU is defined as follows: - + semantic image segmentation, which first computes the IOU for each + semantic class and then computes the average over classes. + IOU is defined as follows: + .. math:: IOU = \\frac{true\_positiv}{(true\_positive + false\_positive + false\_negative)}. - The predictions are accumulated in a confusion matrix and mean-IOU + The predictions are accumulated in a confusion matrix and mean-IOU is then calculated from it. @@ -4979,12 +4982,12 @@ def mean_iou(input, label, num_classes): Returns: mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1]. out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class. - out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class. + out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class. Examples: .. code-block:: python - + iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes) """ helper = LayerHelper('mean_iou', **locals()) diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 149e77b524..b7a8bff30d 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -230,7 +230,11 @@ def sums(input, out=None): helper = LayerHelper('sum', **locals()) if out is None: out = helper.create_tmp_variable(dtype=helper.input_dtype()) - helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out}) + helper.append_op( + type='sum', + inputs={'X': input}, + outputs={'Out': out}, + attrs={'use_mkldnn': False}) return out @@ -380,7 +384,7 @@ def argmin(x, axis=0): """ **argmin** - This function computes the indices of the min elements + This function computes the indices of the min elements of the input tensor's element along the provided axis. Args: @@ -395,7 +399,7 @@ def argmin(x, axis=0): .. code-block:: python out = fluid.layers.argmin(x=in, axis=0) - out = fluid.layers.argmin(x=in, axis=-1) + out = fluid.layers.argmin(x=in, axis=-1) """ helper = LayerHelper("arg_min", **locals()) out = helper.create_tmp_variable(VarDesc.VarType.INT64) @@ -411,7 +415,7 @@ def argmax(x, axis=0): """ **argmax** - This function computes the indices of the max elements + This function computes the indices of the max elements of the input tensor's element along the provided axis. Args: @@ -426,7 +430,7 @@ def argmax(x, axis=0): .. code-block:: python out = fluid.layers.argmax(x=in, axis=0) - out = fluid.layers.argmax(x=in, axis=-1) + out = fluid.layers.argmax(x=in, axis=-1) """ helper = LayerHelper("arg_max", **locals()) out = helper.create_tmp_variable(VarDesc.VarType.INT64) @@ -495,9 +499,9 @@ def reverse(x, axis): Args: x(Vairbale): the input to be reversed. - axis(int|tuple|list): Axis that along which order of elements - is reversed. If it is a tuple or a list, reversing - will be apply on each axis in the tuple or list. + axis(int|tuple|list): Axis that along which order of elements + is reversed. If it is a tuple or a list, reversing + will be apply on each axis in the tuple or list. Returns: Variable: The reversed tensor. @@ -528,9 +532,9 @@ def save(x, file_path, overwrite=True): Args: x(variable): The Tensor/LoDTensor to be saved. file_path(str): The file path where the variable will be saved. - overwrite(bool): Whether or not cover the given file when it has already - existed. If it's set 'False' and the file is existed, a runtime - error will be thrown. + overwrite(bool): Whether or not cover the given file when it has already + existed. If it's set 'False' and the file is existed, a runtime + error will be thrown. """ helper = LayerHelper("save", **locals()) helper.append_op( @@ -550,8 +554,8 @@ def save_combine(x, file_path, overwrite=True): a single file. file_path(str): The file path where variables will be saved. overwrite(bool): Whether or not cover the given file when it has already - existed. If it's set 'False' and the file is existed, a runtime - error will be thrown. + existed. If it's set 'False' and the file is existed, a runtime + error will be thrown. Returns: There is no return value. diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 99146bcfe5..d62a184e97 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -824,7 +824,8 @@ class DistributeTranspiler: table_opt_block.append_op( type="sum", inputs={"X": pserver_side_table_grad_list}, - outputs={"Out": [grad_var]}) + outputs={"Out": [grad_var]}, + attrs={"use_mkldnn": False}) else: # in async_mode, for table gradient, it also need to be splited to each parameter server origin_grad_name = grad_var.name @@ -1056,7 +1057,8 @@ class DistributeTranspiler: optimize_block.append_op( type="sum", inputs={"X": vars2merge}, - outputs={"Out": merged_var}) + outputs={"Out": merged_var}, + attrs={"use_mkldnn": False}) # TODO(panyx0718): What if it's SELECTED_ROWS. if not merged_var.type == core.VarDesc.VarType.SELECTED_ROWS: optimize_block.append_op( diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index 44a6e34463..1f83cabb84 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -336,7 +336,7 @@ def _buf2lines(buf, line_break="\n"): class PipeReader: """ - PipeReader read data by stream from a command, take it's + PipeReader read data by stream from a command, take it's stdout into a pipe buffer and redirect it to the parser to parse, then yield data as your desired format. @@ -352,7 +352,7 @@ class PipeReader: An example: .. code-block:: python - + def example_reader(): for f in myfiles: pr = PipeReader("cat %s"%f) From 6512be59ece0a452ea8784ee5f04edacc4881692 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Fri, 15 Jun 2018 17:06:35 +0200 Subject: [PATCH 33/46] MKLDNN layout: the code-review changes --- paddle/fluid/operators/sum_mkldnn_op.cc | 9 ++++----- paddle/fluid/platform/mkldnn_helper.h | 6 ++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/sum_mkldnn_op.cc b/paddle/fluid/operators/sum_mkldnn_op.cc index 1f0c3ab023..0e201420ce 100644 --- a/paddle/fluid/operators/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/sum_mkldnn_op.cc @@ -118,19 +118,18 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd); std::shared_ptr dst_mem; - if (in_place) + if (in_place) { dst_mem.reset(new memory(sum_pd.dst_primitive_desc())); - else + } else { dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data)); - + } std::vector inputs; for (size_t i = 0; i < srcs_mem.size(); ++i) { inputs.push_back(srcs_mem[i]); } auto sum_prim = mkldnn::sum(sum_pd, inputs, *dst_mem); - output_format = - (memory::format)sum_pd.dst_primitive_desc().desc().data.format; + output_format = (memory::format)platform::GetMKLDNNFormat(sum_pd); primitive reorder_prim; std::shared_ptr target_mem; diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index de711b7d23..2689d5e078 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -99,5 +99,11 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) { memory.get_primitive_desc().desc().data.format); } +inline mkldnn::memory::format GetMKLDNNFormat( + const mkldnn::sum::primitive_desc& memory) { + return static_cast( + memory.dst_primitive_desc().desc().data.format); +} + } // namespace platform } // namespace paddle From 701102283c4a80877a6c4c75b1f6fe170dc0d16d Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Fri, 15 Jun 2018 18:17:26 +0200 Subject: [PATCH 34/46] MKLDNN layouts: Gaussian random layout --- .../operators/gaussian_random_mkldnn_op.cc | 73 +++++++++++++++++++ paddle/fluid/operators/gaussian_random_op.cc | 21 +++++- 2 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 paddle/fluid/operators/gaussian_random_mkldnn_op.cc diff --git a/paddle/fluid/operators/gaussian_random_mkldnn_op.cc b/paddle/fluid/operators/gaussian_random_mkldnn_op.cc new file mode 100644 index 0000000000..2748ad3e63 --- /dev/null +++ b/paddle/fluid/operators/gaussian_random_mkldnn_op.cc @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/operators/mean_op.h" + +#include "mkldnn.hpp" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/math/selected_rows_functor.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/mkldnn_helper.h" + +#include "paddle/fluid/framework/eigen.h" +namespace paddle { +namespace operators { + +using paddle::framework::Tensor; +using paddle::platform::MKLDNNDeviceContext; +using paddle::platform::MKLDNNMemDesc; +using paddle::platform::CPUDeviceContext; + +using mkldnn::memory; // Note: paddle has also "memory" namespace +using mkldnn::primitive; +using mkldnn::softmax_forward; +using mkldnn::prop_kind; +using mkldnn::stream; + +using framework::DataLayout; +template +class GaussianMKLDNNKernel : public paddle::framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + float mean = context.Attr("mean"); + float std = context.Attr("std"); + auto* tensor = context.Output("Out"); + T* data = tensor->mutable_data(context.GetPlace()); + + unsigned int seed = static_cast(context.Attr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + std::normal_distribution dist(mean, std); + int64_t size = tensor->numel(); + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(engine); + } + + // The format of output is set as the mkldnn's format + // TODO(@mozga-intel) The format of matrix sets inside the another layers. + tensor->set_layout(DataLayout::kMKLDNN); + tensor->set_format(mkldnn::memory::format::Ohwi16o); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_KERNEL(gaussian_random, MKLDNN, ::paddle::platform::CPUPlace, + ops::GaussianMKLDNNKernel); diff --git a/paddle/fluid/operators/gaussian_random_op.cc b/paddle/fluid/operators/gaussian_random_op.cc index 815c1bb509..1488aab192 100644 --- a/paddle/fluid/operators/gaussian_random_op.cc +++ b/paddle/fluid/operators/gaussian_random_op.cc @@ -15,6 +15,10 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + namespace paddle { namespace operators { @@ -62,9 +66,20 @@ class GaussianRandomOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { + framework::LibraryType library{framework::LibraryType::kPlain}; + framework::DataLayout layout{framework::DataLayout::kAnyLayout}; + +#ifdef PADDLE_WITH_MKLDNN + if (library == framework::LibraryType::kPlain && + platform::CanMKLDNNBeUsed(ctx)) { + library = framework::LibraryType::kMKLDNN; + layout = framework::DataLayout::kMKLDNN; + } +#endif + return framework::OpKernelType( static_cast(ctx.Attr("dtype")), - ctx.device_context()); + ctx.device_context(), layout, library); } }; @@ -95,7 +110,9 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { "(int, default 5(FP32)) " "Output data type.") .SetDefault(framework::proto::VarType::FP32); - + AddAttr("use_mkldnn", + "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false); AddComment(R"DOC( GaussianRandom Operator. From 8cc249ef105ab01ef4135970fd38d4a6279ef089 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 19 Jun 2018 17:44:19 +0800 Subject: [PATCH 35/46] make data_feeder support dynamic shape --- python/paddle/fluid/data_feeder.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index ac39600201..64e27b5891 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -29,6 +29,14 @@ class DataToLoDTensorConverter(object): self.place = place self.lod_level = lod_level self.shape = shape + self.dynamic_shape = False + negtive_count = 0 + for s in self.shape: + if s < 0: + negtive_count += 1 + if negtive_count > 1: + self.shape = None + break if dtype == core.VarDesc.VarType.FP32: self.dtype = 'float32' elif dtype == core.VarDesc.VarType.INT64: @@ -61,7 +69,9 @@ class DataToLoDTensorConverter(object): self._feed_impl_(each_data, lod[1:], lod_level - 1) def done(self): - arr = numpy.array(self.data, dtype=self.dtype).reshape(self.shape) + arr = numpy.array(self.data, dtype=self.dtype) + if self.shape: + arr = arr.reshape(self.shape) t = core.LoDTensor() t.set(arr, self.place) if self.lod_level > 0: From dfe54a4fbefa2472f52d1aff4f67812616663860 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 19 Jun 2018 17:49:30 +0800 Subject: [PATCH 36/46] update --- python/paddle/fluid/data_feeder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index 64e27b5891..f96a2d2827 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -29,7 +29,6 @@ class DataToLoDTensorConverter(object): self.place = place self.lod_level = lod_level self.shape = shape - self.dynamic_shape = False negtive_count = 0 for s in self.shape: if s < 0: From 9ff77a76dee0ad62e161eda601b45a900831d4c9 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 19 Jun 2018 17:56:03 +0800 Subject: [PATCH 37/46] fix mkldnn compile issue --- cmake/external/mkldnn.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 25c07850dd..48d3d2db7f 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -45,7 +45,8 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML") ELSE() MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN") ENDIF() -SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-unused-result") +SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result") +SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value") SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}") SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}") ExternalProject_Add( From b88cda84f48a3345212c9fe2e79a0c94d2c588ef Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Tue, 19 Jun 2018 11:59:15 +0200 Subject: [PATCH 38/46] MKLDNN sum unit-test --- paddle/fluid/operators/sum_mkldnn_op.cc | 1 - .../tests/unittests/test_sum_mkldnn_op.py | 26 +++++++++++++++++++ .../fluid/tests/unittests/test_sum_op.py | 6 +++++ 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py diff --git a/paddle/fluid/operators/sum_mkldnn_op.cc b/paddle/fluid/operators/sum_mkldnn_op.cc index 0e201420ce..f78d977760 100644 --- a/paddle/fluid/operators/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/sum_mkldnn_op.cc @@ -53,7 +53,6 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { "It must use CPUPlace."); auto& dev_ctx = ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - auto in_vars = ctx.MultiInputVar("X"); const int N = in_vars.size(); diff --git a/python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py b/python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py new file mode 100644 index 0000000000..7956897d68 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py @@ -0,0 +1,26 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from test_sum_op import TestSumOp + + +class TestMKLDNN(TestSumOp): + def init_kernel_type(self): + self.use_mkldnn = True + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py index 2faf5b1064..1d90414e13 100644 --- a/python/paddle/fluid/tests/unittests/test_sum_op.py +++ b/python/paddle/fluid/tests/unittests/test_sum_op.py @@ -20,12 +20,15 @@ from op_test import OpTest class TestSumOp(OpTest): def setUp(self): self.op_type = "sum" + self.use_mkldnn = False + self.init_kernel_type() x0 = np.random.random((3, 4)).astype('float32') x1 = np.random.random((3, 4)).astype('float32') x2 = np.random.random((3, 4)).astype('float32') self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]} y = x0 + x1 + x2 self.outputs = {'Out': y} + self.attrs = {'use_mkldnn': self.use_mkldnn} def test_check_output(self): self.check_output() @@ -33,6 +36,9 @@ class TestSumOp(OpTest): def test_check_grad(self): self.check_grad(['x0'], 'Out') + def init_kernel_type(self): + pass + if __name__ == "__main__": unittest.main() From 7b9aa6019882a0f66c903e9b5e14d614fcd6bb54 Mon Sep 17 00:00:00 2001 From: mozga-intel Date: Tue, 19 Jun 2018 12:07:56 +0200 Subject: [PATCH 39/46] MKLDNN gausian_random tests --- .../operators/gaussian_random_mkldnn_op.cc | 20 +------------- .../test_gaussian_random_mkldnn_op.py | 26 +++++++++++++++++++ .../unittests/test_gaussian_random_op.py | 13 +++++++++- 3 files changed, 39 insertions(+), 20 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py diff --git a/paddle/fluid/operators/gaussian_random_mkldnn_op.cc b/paddle/fluid/operators/gaussian_random_mkldnn_op.cc index 2748ad3e63..76b00b396c 100644 --- a/paddle/fluid/operators/gaussian_random_mkldnn_op.cc +++ b/paddle/fluid/operators/gaussian_random_mkldnn_op.cc @@ -15,27 +15,9 @@ limitations under the License. */ #include #include "paddle/fluid/operators/mean_op.h" -#include "mkldnn.hpp" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/selected_rows_functor.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/mkldnn_helper.h" - -#include "paddle/fluid/framework/eigen.h" namespace paddle { namespace operators { -using paddle::framework::Tensor; -using paddle::platform::MKLDNNDeviceContext; -using paddle::platform::MKLDNNMemDesc; -using paddle::platform::CPUDeviceContext; - -using mkldnn::memory; // Note: paddle has also "memory" namespace -using mkldnn::primitive; -using mkldnn::softmax_forward; -using mkldnn::prop_kind; -using mkldnn::stream; - using framework::DataLayout; template class GaussianMKLDNNKernel : public paddle::framework::OpKernel { @@ -61,7 +43,7 @@ class GaussianMKLDNNKernel : public paddle::framework::OpKernel { // The format of output is set as the mkldnn's format // TODO(@mozga-intel) The format of matrix sets inside the another layers. tensor->set_layout(DataLayout::kMKLDNN); - tensor->set_format(mkldnn::memory::format::Ohwi16o); + tensor->set_format(mkldnn::memory::format::oihw); } }; } // namespace operators diff --git a/python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py b/python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py new file mode 100644 index 0000000000..3ae877a608 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py @@ -0,0 +1,26 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from test_gaussian_random_op import TestGaussianRandomOp + + +class TestMKLDNN(TestGaussianRandomOp): + def init_kernel_type(self): + self.use_mkldnn = True + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py index 272caceaf3..8481500fd7 100644 --- a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py @@ -25,7 +25,15 @@ class TestGaussianRandomOp(unittest.TestCase): def setUp(self): self.op_type = "gaussian_random" self.inputs = {} - self.attrs = {"shape": [1000, 784], "mean": .0, "std": 1., "seed": 10} + self.use_mkldnn = False + self.init_kernel_type() + self.attrs = { + "shape": [1000, 784], + "mean": .0, + "std": 1., + "seed": 10, + "use_mkldnn": self.use_mkldnn + } self.outputs = ["Out"] @@ -58,6 +66,9 @@ class TestGaussianRandomOp(unittest.TestCase): self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1) self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1) + def init_kernel_type(self): + pass + if __name__ == "__main__": unittest.main() From c22ebb3bde197cdeaa4fc3f495707fe4da4109b6 Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Tue, 19 Jun 2018 18:37:27 +0800 Subject: [PATCH 40/46] Expose crop op into Python API. (#11546) --- python/paddle/fluid/layers/nn.py | 99 +++++++++++++++++++ .../fluid/tests/unittests/test_layers.py | 9 ++ 2 files changed, 108 insertions(+) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index f6f188df0d..2c397d0429 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -93,6 +93,7 @@ __all__ = [ 'mean_iou', 'relu', 'log', + 'crop', ] @@ -5003,3 +5004,101 @@ def mean_iou(input, label, num_classes): }, attrs={"num_classes": num_classes}) return out_mean_iou, out_wrong, out_correct + + +def crop(x, shape=None, offsets=None, name=None): + """ + Crop input into output, as specified by offsets and shape. + + .. code-block:: text + + * Case 1: + Given + X = [[0, 1, 2, 0, 0] + [0, 3, 4, 0, 0] + [0, 0, 0, 0, 0]], + and + shape = [2, 2], + offsets = [0, 1], + output is: + Out = [[1, 2], + [3, 4]]. + * Case 2: + Given + X = [[0, 1, 2, 5, 0] + [0, 3, 4, 6, 0] + [0, 0, 0, 0, 0]], + and shape is tensor + shape = [[0, 0, 0] + [0, 0, 0]] + and + offsets = [0, 1], + + output is: + Out = [[1, 2, 5], + [3, 4, 6]]. + + Args: + x (Variable): The input tensor variable. + shape (Variable|list/tuple of integer): The output shape is specified + by `shape`, which can a Variable or a list/tupe of integer. + If a tensor Variable, it's rank must be the same as `x`. This way + is suitable for the case that the output shape may be changed each + iteration. If a list/tupe of integer, it's length must be the same + as the rank of `x` + offsets (Variable|list/tuple of integer|None): Specifies the copping + offsets at each dimension. It can be a Variable or or a list/tupe + of integer. If a tensor Variable, it's rank must be the same as `x`. + This way is suitable for the case that the offsets may be changed + each iteration. If a list/tupe of integer, it's length must be the + same as the rank of `x`. If None, the offsets are 0 at each + dimension. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: The cropped tensor variable. + + Raises: + ValueError: If shape is not a list, tuple or Variable. + + Examples: + + .. code-block:: python + + x = fluid.layers.data(name="x", shape=[3, 5], dtype="float32") + y = fluid.layers.data(name="y", shape=[2, 3], dtype="float32") + crop = fluid.layers.crop(x, shape=y) + + # or + z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32") + crop = fluid.layers.crop(z, shape=[2, 3]) + + """ + helper = LayerHelper('crop', **locals()) + + if not (isinstance(shape, list) or isinstance(shape, tuple) or \ + isinstance(shape, Variable)): + raise ValueError("The shape should be a list, tuple or Variable.") + + if offsets is None: + offsets = [0] * len(x.shape) + + out = helper.create_tmp_variable(x.dtype) + ipts = {'X': x} + attrs = {} + if isinstance(shape, Variable): + ipts['Y'] = shape + else: + attrs['shape'] = shape + if isinstance(offsets, Variable): + ipts['Offsets'] = offsets + else: + attrs['offsets'] = offsets + + helper.append_op( + type='crop', + inputs=ipts, + outputs={'Out': out}, + attrs=None if len(attrs) == 0 else attrs) + return out diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index f8cf6f4e2d..82074955fa 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -401,6 +401,15 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(output) print(str(program)) + def test_maxout(self): + program = Program() + with program_guard(program): + x = layers.data(name='x', shape=[3, 5], dtype="float32") + y = layers.data(name='y', shape=[2, 3], dtype="float32") + output = layers.crop(x, shape=y) + self.assertIsNotNone(output) + print(str(program)) + if __name__ == '__main__': unittest.main() From 7d7592dfc6521bfd369ce66552400399caaab299 Mon Sep 17 00:00:00 2001 From: yuyang18 Date: Tue, 19 Jun 2018 20:00:18 +0800 Subject: [PATCH 41/46] add print_signatures.py --- tools/print_signatures.py | 67 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 tools/print_signatures.py diff --git a/tools/print_signatures.py b/tools/print_signatures.py new file mode 100644 index 0000000000..5e7ffd44c7 --- /dev/null +++ b/tools/print_signatures.py @@ -0,0 +1,67 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Print all signature of a python module in alphabet order. + +Usage: + ./print_signature "paddle.fluid" > signature.txt +""" +import importlib +import inspect +import collections +import sys +import pydoc + +member_dict = collections.OrderedDict() + + +def visit_member(parent_name, member): + cur_name = ".".join([parent_name, member.__name__]) + if inspect.isclass(member): + for name, value in inspect.getmembers(member): + if hasattr(value, '__name__') and (not name.startswith("_") or + name == "__init__"): + visit_member(cur_name, value) + elif callable(member): + try: + member_dict[cur_name] = inspect.getargspec(member) + except TypeError: # special for PyBind method + member_dict[cur_name] = " ".join([ + line.strip() for line in pydoc.render_doc(member).split('\n') + if "->" in line + ]) + + else: + raise RuntimeError("Unsupported generate signature of member, type {0}". + format(str(type(member)))) + + +def visit_all_module(mod): + for member_name in ( + name + for name in (mod.__all__ if hasattr(mod, "__all__") else dir(mod)) + if not name.startswith("_")): + instance = getattr(mod, member_name, None) + if instance is None: + continue + if inspect.ismodule(instance): + visit_all_module(instance) + else: + visit_member(mod.__name__, instance) + + +visit_all_module(importlib.import_module(sys.argv[1])) + +for name in member_dict: + print name, member_dict[name] From 457d81bbc09e699ca439c7b8e087b72ee4159972 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 19 Jun 2018 21:00:15 +0800 Subject: [PATCH 42/46] fix errors --- python/paddle/fluid/backward.py | 8 ++++---- python/paddle/fluid/io.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 95421704db..f7bbc98fe1 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -489,10 +489,10 @@ def append_backward(loss, parameter_list=None, no_grad_set=None, Examples: .. code-block:: python - # network configuration code - # ... - avg_loss = fluid.layers.mean(loss) - param_grad_list = fluid.backward.append_backward(loss=avg_loss) + # network configuration code + # ... + avg_loss = fluid.layers.mean(loss) + param_grad_list = fluid.backward.append_backward(loss=avg_loss) """ assert isinstance(loss, framework.Variable) diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 88e7e3bb20..6e527572f1 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -886,8 +886,8 @@ def load_checkpoint(executor, checkpoint_dir, serial, main_program): `checkpoint_dir` directory. In the training precess, we generally save a checkpoint in each - iteration. So there are more than one checkpoint in the - `checkpoint_dir`(each checkpoint has its own sub folder), use + iteration. So there are more than one checkpoint in the + `checkpoint_dir` (each checkpoint has its own sub folder), use `serial` to specify which serial of checkpoint you would like to load. From 9c90dc9728813cbac15b9cf90d5bafb236056b3e Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Tue, 19 Jun 2018 21:42:40 +0800 Subject: [PATCH 43/46] Make the CUDA kernel of concat correct and fix unit tests. (#11541) * Make the CUDA kernel of concat correct and fix unit tests. --- paddle/fluid/operators/math/concat.cu | 41 +++++-------------- .../fluid/tests/unittests/test_concat_op.py | 13 +++++- 2 files changed, 23 insertions(+), 31 deletions(-) diff --git a/paddle/fluid/operators/math/concat.cu b/paddle/fluid/operators/math/concat.cu index f66baa6573..6205f3cd85 100644 --- a/paddle/fluid/operators/math/concat.cu +++ b/paddle/fluid/operators/math/concat.cu @@ -22,43 +22,24 @@ namespace paddle { namespace operators { namespace math { -template -__device__ T upper_bound(const T* first, T count, T val) { - const T* orig = first; - const T* it = nullptr; - T step = 0; - while (count > 0) { - it = first; - step = count / 2; - it += step; - if (!(val < *it)) { - first = ++it; - count -= step + 1; - } else { - count = step; - } - } - return first - orig; -} - template __global__ void KernelConcat(T** inputs, const int* input_cols, int col_size, const int output_rows, const int output_cols, T* output) { int tid_x = blockIdx.x * blockDim.x + threadIdx.x; - int segment = upper_bound(input_cols, col_size, tid_x) - 1; - - int curr_offset = input_cols[segment]; - int curr_segment = segment; + int curr_segment = 0; + int curr_offset = input_cols[0]; for (; tid_x < output_cols; tid_x += blockDim.x * gridDim.x) { - T curr_col_offset; - while ((curr_col_offset = input_cols[curr_segment + 1]) <= tid_x) { + int curr_col_offset = input_cols[curr_segment + 1]; + while (curr_col_offset <= tid_x) { curr_offset = curr_col_offset; ++curr_segment; + curr_col_offset = input_cols[curr_segment + 1]; } int local_col = tid_x - curr_offset; int segment_width = curr_col_offset - curr_offset; + T* input_ptr = inputs[curr_segment]; int tid_y = blockIdx.y * blockDim.y + threadIdx.y; for (; tid_y < output_rows; tid_y += blockDim.y * gridDim.y) @@ -89,14 +70,14 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row, const int in_col, const int* out_cols, int out_cols_size, T** outputs_data) { int tid_x = blockIdx.x * blockDim.x + threadIdx.x; - int segment = upper_bound(out_cols, out_cols_size, tid_x) - 1; - int curr_offset = out_cols[segment]; - int curr_segment = segment; + int curr_segment = 0; + int curr_offset = out_cols[0]; for (; tid_x < in_col; tid_x += blockDim.x * gridDim.x) { - T curr_col_offset; - while ((curr_col_offset = out_cols[curr_segment + 1]) <= tid_x) { + int curr_col_offset = out_cols[curr_segment + 1]; + while (curr_col_offset <= tid_x) { curr_offset = curr_col_offset; ++curr_segment; + curr_col_offset = out_cols[curr_segment + 1]; } int local_col = tid_x - curr_offset; diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py index 1e00d67d54..e9f3c45dc4 100644 --- a/python/paddle/fluid/tests/unittests/test_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_concat_op.py @@ -43,7 +43,7 @@ class TestConcatOp(OpTest): self.axis = 1 -class TestConcatOp2(OpTest): +class TestConcatOp2(TestConcatOp): def init_test_data(self): self.x0 = np.random.random((2, 3, 4, 5)).astype('float32') self.x1 = np.random.random((2, 3, 4, 5)).astype('float32') @@ -51,5 +51,16 @@ class TestConcatOp2(OpTest): self.axis = 1 +class TestConcatOp3(TestConcatOp): + def init_test_data(self): + self.x0 = np.random.random((1, 256, 170, 256)).astype('float32') + self.x1 = np.random.random((1, 128, 170, 256)).astype('float32') + self.x2 = np.random.random((1, 128, 170, 256)).astype('float32') + self.axis = 1 + + def test_check_grad(self): + pass + + if __name__ == '__main__': unittest.main() From 762160bd8c3850a15a87467dd33edfd272469bfe Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 19 Jun 2018 23:42:57 +0800 Subject: [PATCH 44/46] fix concat grad kernel --- paddle/fluid/operators/math/concat.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/math/concat.cu b/paddle/fluid/operators/math/concat.cu index 6205f3cd85..5863d74fca 100644 --- a/paddle/fluid/operators/math/concat.cu +++ b/paddle/fluid/operators/math/concat.cu @@ -209,7 +209,7 @@ class ConcatGradFunctor { outputs_cols[0] = 0; for (int i = 0; i < o_num; ++i) { - int t_col = outputs->at(i)->numel() / out_row; + int t_col = ref_inputs.at(i)->numel() / out_row; if (sameShape) { if (t_col != out0_col) sameShape = false; } From d020d7fd298864927b2a4299e66cc5bd8888f30f Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Wed, 20 Jun 2018 09:28:25 +0800 Subject: [PATCH 45/46] add beam search doc (#11469) --- paddle/fluid/operators/activation_op.cc | 4 +-- python/paddle/fluid/layers/control_flow.py | 33 ++++++++++++++++++++-- python/paddle/fluid/layers/nn.py | 31 +++++++++++++++----- 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index b6b498a616..286b03d7b7 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -143,7 +143,7 @@ $$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ __attribute__((unused)) constexpr char TanhShrinkDoc[] = R"DOC( TanhShrink Activation Operator. -$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ +$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"; @@ -385,7 +385,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( STanh Activation Operator. -$$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ +$$out = b * \\frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ )DOC"); } diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 581770feea..849474dc58 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -185,12 +185,14 @@ def Print(input, Returns: Variable: Output tensor, same data with input tensor. + Examples: + .. code-block:: python - value = some_layer(...) - Print(value, summarize=10, - message="The content of some_layer: ") + value = some_layer(...) + Print(value, summarize=10, + message="The content of some_layer: ") ''' helper = LayerHelper('print', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) @@ -1201,6 +1203,31 @@ class ConditionalBlockGuard(BlockGuard): class ConditionalBlock(object): + ''' + **ConditionalBlock** + + ConditionalBlock is an operator that bind a block to a specific condition, + if the condition matches, the corresponding block will be executed. + + Args: + inputs (Variable): bool conditions. + is_scalar_condition (bool): whether the branch is controled by a scalar. + name(str): name of this ConditionalBlock. + + Examples: + .. code-block:: python + + cond = layers.less_than(x=label, y=limit) + true_image, false_image = layers.split_lod_tensor( + input=image, mask=cond) + true_cond = layers.ConditionalBlock([true_image]) + + with true_cond.block(): + ... + with false_cond.block(): + ... + ''' + def __init__(self, inputs, is_scalar_condition=False, name=None): for each_input in inputs: if not isinstance(each_input, Variable): diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index c84c79424e..2979ff3057 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -2678,18 +2678,35 @@ def sequence_expand(x, y, ref_level=-1, name=None): def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0): ''' + **beam search** + This function implements the beam search algorithm. + Beam search is a classical algorithm for selecting candidate words + in a machine translation task. + + Refer to `Beam search `_ + for more details. + Args: - pre_ids (Variable): ${pre_ids_comment} - ids (Variable): ${ids_comment} - scores (Variable): ${scores_comment} - beam_size (int): ${beam_size_comment} - end_id (int): ${end_id_comment} - level (int): ${level_comment} + pre_ids (Variable): ids in previous step. + ids (Variable): a LoDTensor of shape of [None,k] + scores (Variable): a LoDTensor that has the same shape and LoD with `ids` + beam_size (int): beam size for beam search + end_id (int): the token id which indicates the end of a sequence + level (int): the level of LoDTensor Returns: - tuple: a tuple of beam_search output variables: selected_ids, selected_scores + tuple: a tuple of beam_search output variables: `selected_ids`, `selected_scores` + + Examples: + .. code-block:: python + + # current_score is a Tensor of shape (num_batch_size, embed_size), which + # consists score of each candidate word. + topk_scores, topk_indices = pd.topk(current_score, k=50) + selected_ids, selected_scores = pd.beam_search( + pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) ''' helper = LayerHelper('beam_search', **locals()) score_type = scores.dtype From 25241e9e5e8f691465a9dbdce2aa38344cbd05a0 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 19 Jun 2018 21:19:12 -0500 Subject: [PATCH 46/46] Fix paddle env variables. (#11564) --- benchmark/fluid/fluid_benchmark.py | 2 +- benchmark/fluid/kube_gen_job.py | 16 +++++++++++----- .../howto/cluster/fluid_cluster_train_cn.md | 4 ++-- doc/fluid/howto/cluster/fluid_recordio.md | 4 ++-- .../tests/book/notest_understand_sentiment.py | 10 +++++----- .../paddle/fluid/tests/book/test_fit_a_line.py | 10 +++++----- .../tests/book/test_image_classification.py | 10 +++++----- .../tests/book/test_label_semantic_roles.py | 10 +++++----- .../fluid/tests/book/test_machine_translation.py | 10 +++++----- .../fluid/tests/book/test_recognize_digits.py | 10 +++++----- .../fluid/tests/book/test_recommender_system.py | 10 +++++----- python/paddle/fluid/tests/book/test_word2vec.py | 10 +++++----- 12 files changed, 56 insertions(+), 50 deletions(-) diff --git a/benchmark/fluid/fluid_benchmark.py b/benchmark/fluid/fluid_benchmark.py index aa70783ecd..acd803ddee 100644 --- a/benchmark/fluid/fluid_benchmark.py +++ b/benchmark/fluid/fluid_benchmark.py @@ -97,7 +97,7 @@ def dist_transpile(trainer_id, args): return train_program, fluid.default_startup_program() else: raise ValueError( - 'TRAINING_ROLE environment variable must be either TRAINER or PSERVER' + 'PADDLE_TRAINING_ROLE environment variable must be either TRAINER or PSERVER' ) diff --git a/benchmark/fluid/kube_gen_job.py b/benchmark/fluid/kube_gen_job.py index 9da8a69af1..f8afa3e9ef 100644 --- a/benchmark/fluid/kube_gen_job.py +++ b/benchmark/fluid/kube_gen_job.py @@ -108,10 +108,10 @@ def gen_job(): tn_container["ports"][0]["containerPort"] = spreadport envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname}) - envs.append({"name": "TRAINERS", "value": str(args.trainers)}) + envs.append({"name": "PADDLE_TRAINERS", "value": str(args.trainers)}) envs.append({"name": "PSERVERS", "value": str(args.pservers)}) envs.append({"name": "ENTRY", "value": args.entry}) - envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)}) + envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)}) envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)}) # NOTE: these directories below are cluster specific, please modify # this settings before you run on your own cluster. @@ -167,16 +167,22 @@ def gen_job(): tn_container["volumeMounts"] = volumeMounts ps_container["env"] = envs - ps_container["env"].append({"name": "TRAINING_ROLE", "value": "PSERVER"}) + ps_container["env"].append({ + "name": "PADDLE_TRAINING_ROLE", + "value": "PSERVER" + }) tn_container["env"] = envs if args.disttype == "pserver": tn_container["env"].append({ - "name": "TRAINING_ROLE", + "name": "PADDLE_TRAINING_ROLE", "value": "TRAINER" }) elif args.disttype == "nccl2" or args.disttype == "local": # NCCL2 have no training role, set to plain WORKER - tn_container["env"].append({"name": "TRAINING_ROLE", "value": "WORKER"}) + tn_container["env"].append({ + "name": "PADDLE_TRAINING_ROLE", + "value": "WORKER" + }) os.mkdir(args.jobname) if args.disttype == "pserver": diff --git a/doc/fluid/howto/cluster/fluid_cluster_train_cn.md b/doc/fluid/howto/cluster/fluid_cluster_train_cn.md index b99b90056b..55326940ce 100644 --- a/doc/fluid/howto/cluster/fluid_cluster_train_cn.md +++ b/doc/fluid/howto/cluster/fluid_cluster_train_cn.md @@ -168,13 +168,13 @@ cd /paddle/python/paddle/fluid/tests/book 第二步,启动Parameter Server: ```bash -PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.2 TRAINERS=2 POD_IP=192.168.1.2 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=PSERVER python test_fit_a_line.py +PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.2 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=192.168.1.2 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=PSERVER python test_fit_a_line.py ``` 执行命令后请等待出现提示: ```Server listening on 192.168.1.2:6174 ```, 表示Paramter Server已经正常启动。 第三步,启动Trainer: ```bash -PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.3 TRAINERS=2 POD_IP=192.168.1.3 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=TRAINER python test_fit_a_line.py +PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.3 PADDLE_TRAINERS=2 PADDLE_CURRENT_IPP=192.168.1.3 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=TRAINER python test_fit_a_line.py ``` 由于我们定义的Trainer的数量是2个,因此需要在另外一个计算节点上再启动一个Trainer。 diff --git a/doc/fluid/howto/cluster/fluid_recordio.md b/doc/fluid/howto/cluster/fluid_recordio.md index 55ce63ec19..92859e8f62 100644 --- a/doc/fluid/howto/cluster/fluid_recordio.md +++ b/doc/fluid/howto/cluster/fluid_recordio.md @@ -114,8 +114,8 @@ def gen_train_list(file_pattern, trainers, trainer_id): ret_list.append(f) return ret_list -trainers = int(os.getenv("TRAINERS")) -trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) +trainers = int(os.getenv("PADDLE_TRAINERS")) +trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) data_file = fluid.layers.io.open_files( filenames=gen_train_list("./mnist-[0-9]*.recordio", 2, 0), thread_num=1, diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py index c6687e8ad7..5d9a47c9ba 100644 --- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py @@ -194,16 +194,16 @@ def train(word_dict, if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index b1a6b524d3..74f96f456a 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -69,16 +69,16 @@ def train(use_cuda, save_dirname, is_local): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py index 0f3a4c9242..a2fb186b86 100644 --- a/python/paddle/fluid/tests/book/test_image_classification.py +++ b/python/paddle/fluid/tests/book/test_image_classification.py @@ -178,16 +178,16 @@ def train(net_type, use_cuda, save_dirname, is_local): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index 99d51ae007..e214ced0b5 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -209,16 +209,16 @@ def train(use_cuda, save_dirname=None, is_local=True): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py index 23e5900f12..372d6ec822 100644 --- a/python/paddle/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/test_machine_translation.py @@ -200,16 +200,16 @@ def train_main(use_cuda, is_sparse, is_local=True): if is_local: train_loop(framework.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index 25bcb8a641..5f5c8544bb 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -151,16 +151,16 @@ def train(nn_type, if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 65d6552acc..937d8dd5b0 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -220,16 +220,16 @@ def train(use_cuda, save_dirname, is_local=True): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index 3118d88701..75bed06bd7 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -125,16 +125,16 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER":