From 29f25fbe033e97f74123f2380d6e384ba840d0da Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Mon, 10 Jul 2017 12:26:35 +0800 Subject: [PATCH 01/71] Add pixel softmax layer for FCN model 1. Add switch function for switching image dimensions order 2. Add CpuMatrix::backwardSoftmax function 3. Add pixel softmax layer, python wrapper and grad_test --- paddle/function/CMakeLists.txt | 1 + paddle/function/SwitchOp.cpp | 132 ++++++++++++++++++ paddle/function/SwitchOp.h | 62 ++++++++ paddle/function/SwitchOpGpu.cu | 80 +++++++++++ paddle/function/SwitchOpTest.cpp | 44 ++++++ paddle/gserver/layers/PixelSoftmaxLayer.cpp | 89 ++++++++++++ paddle/gserver/layers/PixelSoftmaxLayer.h | 44 ++++++ paddle/gserver/tests/test_LayerGrad.cpp | 19 +++ paddle/math/Matrix.cpp | 21 +++ paddle/math/Matrix.h | 5 + python/paddle/trainer/config_parser.py | 16 +++ .../paddle/trainer_config_helpers/layers.py | 38 +++++ 12 files changed, 551 insertions(+) create mode 100644 paddle/function/SwitchOp.cpp create mode 100644 paddle/function/SwitchOp.h create mode 100644 paddle/function/SwitchOpGpu.cu create mode 100644 paddle/function/SwitchOpTest.cpp create mode 100644 paddle/gserver/layers/PixelSoftmaxLayer.cpp create mode 100644 paddle/gserver/layers/PixelSoftmaxLayer.h diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 1518a8a654..138f7dcf16 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -37,6 +37,7 @@ if(WITH_GPU) add_simple_unittest(MulOpTest) add_simple_unittest(CosSimOpTest) add_simple_unittest(RowConvOpTest) + add_simple_unittest(SwitchOpTest) endif() add_simple_unittest(ConvOpTest) diff --git a/paddle/function/SwitchOp.cpp b/paddle/function/SwitchOp.cpp new file mode 100644 index 0000000000..4667c4e01d --- /dev/null +++ b/paddle/function/SwitchOp.cpp @@ -0,0 +1,132 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "SwitchOp.h" +#include "paddle/math/Vector.h" + +namespace paddle { + +template <> +void NCHW2NHWC(real* outputs, + const real* inputs, + const int num, + const int inC, + const int inH, + const int inW) { + for (int n = 0; n < num; ++n) { + for (int c = 0; c < inC; ++c) { + for (int h = 0; h < inH; ++h) { + for (int w = 0; w < inW; ++w) { + outputs[((n * inH + h) * inW + w) * inC + c] = *(inputs++); + } + } + } + } +} + +template <> +void NHWC2NCHW(real* outputs, + const real* inputs, + const int num, + const int inH, + const int inW, + const int inC) { + for (int n = 0; n < num; ++n) { + for (int h = 0; h < inH; ++h) { + for (int w = 0; w < inW; ++w) { + for (int c = 0; c < inC; ++c) { + outputs[((n * inC + c) * inH + h) * inW + w] = *(inputs++); + } + } + } + } +} + +/** + * \brief Padding zeros to input according to the specify dimension. + * The struct pad_ contains the padding size in each dimension. + * The input and output is a 4D tensor. In PadFunc, we only + * pad zeros to the 2nd to 4th dimension. + * + * Argument in this Function: + * \param pad_ A struct object contains the padding size in each dimension. + * It has six integers. The channelStart and channelEnd indicate + * how many zeros to add before and after the input in channel + * dimension. And the heightStart and heightEnd indicate padding + * in height dimension. The widthStart and widthEnd indicate the + * padding in width dimension. + * \param inputs A 4D tensor, only one input. + * \param outputs A 4D tensor, the output value after padding. + * + */ + +template +class NCHW2NHWCFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override {} + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + + size_t num = inputs[0].shape()[0]; + size_t inC = inputs[0].shape()[1]; + size_t inH = inputs[0].shape()[2]; + size_t inW = inputs[0].shape()[3]; + typename Tensor::Vector vec(outputs[0].shape().getElements(), + outputs[0].data()); + vec.zero(); + + NCHW2NHWC( + outputs[0].data(), inputs[0].data(), num, inC, inH, inW); + } +}; + +/** + * \brief The backward propagation of padding Function. Remove the elements + * in the padding positions of forward. + * + * Argument in this Function: + * \param pad_ The same meaning as it in PadFunc. + * \param inputs The gradient with respect to the output value of PadFunc. + * \param outputs The gradient with respect to the input value of PadFunc. + */ + +template +class NHWC2NCHWFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override {} + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + + size_t num = inputs[0].shape()[0]; + size_t inH = inputs[0].shape()[1]; + size_t inW = inputs[0].shape()[2]; + size_t inC = inputs[0].shape()[3]; + + NHWC2NCHW( + outputs[0].data(), inputs[0].data(), num, inH, inW, inC); + } +}; + +REGISTER_TYPED_FUNC(NCHW2NHWC, CPU, NCHW2NHWCFunc); +REGISTER_TYPED_FUNC(NHWC2NCHW, CPU, NHWC2NCHWFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(NCHW2NHWC, GPU, NCHW2NHWCFunc); +REGISTER_TYPED_FUNC(NHWC2NCHW, GPU, NHWC2NCHWFunc); +#endif + +} // namespace paddle diff --git a/paddle/function/SwitchOp.h b/paddle/function/SwitchOp.h new file mode 100644 index 0000000000..5a2418a703 --- /dev/null +++ b/paddle/function/SwitchOp.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +/** + * \brief This funtion switch dimension order of image input. + * The input and output is a 4D tensor. Switch order 'batch_size, + *channels, height, width' to + * order 'batch_size, height, width, channels'. + * + * \param[out] outputs save results. + * \param[in] inputs input data. + * \param[in] num batch size of input data. + * \param[in] inC channel number of input data. + * \param[in] inH height of input data. + * \param[in] inH with of input data. + */ +template +void NCHW2NHWC(real* outputs, + const real* inputs, + const int num, + const int inC, + const int inH, + const int inW); + +/** + * \brief This funtion switch dimension order of image input. + * The input and output is a 4D tensor. Switch order 'batch_size, + *height, width, channels' to + * order 'batch_size, channels, height, width'. + * + * \param[out] inGrad gradients of previous layer. + * \param[in] outGrad output gradients. + * \param[in] num batch size of input data. + * \param[in] inH height of input data. + * \param[in] inW with of input data. + * \param[in] inC channel number of input data. + */ +template +void NHWC2NCHW(real* inGrad, + const real* outGrad, + const int num, + const int inH, + const int inW, + const int inC); +} // namespace paddle diff --git a/paddle/function/SwitchOpGpu.cu b/paddle/function/SwitchOpGpu.cu new file mode 100644 index 0000000000..c2020cb2ab --- /dev/null +++ b/paddle/function/SwitchOpGpu.cu @@ -0,0 +1,80 @@ +/* Copyright (c) 2016 Paddle + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "SwitchOp.h" + +namespace paddle { + +__global__ void KeNCHW2NHWC(real* outputs, const real* inputs, + int inC, int inH, int inW, + int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int w = idx % inW; + const int h = (idx / inW) % inH; + const int c = (idx / inW / inH) % inC; + const int n = idx / inW / inH / inC; + + const int off = ((n * inH + h) * inW + w) * inC +c; + outputs[off] = inputs[idx]; + } +} + +template <> +void NCHW2NHWC(real* outputs, + const real* inputs, + const int num, + const int inC, + const int inH, + const int inW) { + size_t nth = num * inC * inH * inW; + int blockSize = 1024; + int gridSize = (nth + 1024 - 1) / 1024; + KeNCHW2NHWC<<>> + (outputs, inputs, inC, inH, inW, nth); + CHECK_SYNC("NCHW2NHWC"); +} + +__global__ void KeNHWC2NCHW(real* outputs, const real* inputs, + int inH, int inW, int inC, + int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int c = idx % inC; + const int w = (idx / inC) % inW; + const int h = (idx / inC / inW) % inH; + const int n = idx / inW / inH / inC; + + const int off = ((n * inC + c) * inH + h) * inW + w; + outputs[off] = inputs[idx]; + } +} + +template <> +void NHWC2NCHW(real* outputs, + const real* inputs, + const int num, + const int inH, + const int inW, + const int inC) { + int nth = num * inC * inH * inW; + int blockSize = 1024; + int gridSize = (nth + 1024 - 1) / 1024; + KeNHWC2NCHW<<>> + (outputs, inputs, inH, inW, inC, nth); + CHECK_SYNC("NHWC2NCHW"); +} + +} // namespace paddle diff --git a/paddle/function/SwitchOpTest.cpp b/paddle/function/SwitchOpTest.cpp new file mode 100644 index 0000000000..03b0dd66dd --- /dev/null +++ b/paddle/function/SwitchOpTest.cpp @@ -0,0 +1,44 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" + +namespace paddle { + +TEST(Pad, real) { + for (size_t numSamples : {1, 4, 8, 16}) { + for (size_t channels : {1, 4, 8, 16}) { + for (size_t imgSizeH : {1, 4, 8, 16}) { + for (size_t imgSizeW : {1, 4, 8, 16}) { + VLOG(3) << " numSamples=" << numSamples << " channels=" << channels + << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; + for (bool test_grad : {true, false}) { + CpuGpuFuncCompare compare(test_grad ? "NHWC2NCHW" : "NCHW2NHWC", + FuncConfig()); + TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; + TensorShape outDims{numSamples, imgSizeH, imgSizeW, channels}; + compare.addInputs( + BufferArg(VALUE_TYPE_FLOAT, test_grad ? outDims : inDims)); + compare.addOutputs(BufferArg( + VALUE_TYPE_FLOAT, test_grad ? inDims : outDims, ASSIGN_TO)); + compare.run(); + } + } + } + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/PixelSoftmaxLayer.cpp b/paddle/gserver/layers/PixelSoftmaxLayer.cpp new file mode 100644 index 0000000000..6da84a6303 --- /dev/null +++ b/paddle/gserver/layers/PixelSoftmaxLayer.cpp @@ -0,0 +1,89 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "PixelSoftmaxLayer.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_LAYER(pixel_softmax, PixelSoftmaxLayer); + +bool PixelSoftmaxLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + auto& img_conf = config_.inputs(0).image_conf(); + inH_ = + img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(); + inW_ = img_conf.img_size(); + inC_ = img_conf.channels(); + createFunction(forward_, "NCHW2NHWC", FuncConfig()); + createFunction(backward_, "NHWC2NCHW", FuncConfig()); + inDims_ = TensorShape({0, inH_, inW_, inC_}); + outDims_ = TensorShape({0, inC_, inH_, inW_}); + return true; +} + +void PixelSoftmaxLayer::forward(PassType passType) { + Layer::forward(passType); + MatrixPtr input = inputLayers_[0]->getOutputValue(); + size_t batchSize = input->getHeight(); + // cout<<"useGpu:"<zeroMem(); + resetOutput(batchSize, inH_ * inW_ * inC_); + inDims_.setDim(0, batchSize); + outDims_.setDim(0, batchSize); + + // switch NCHW to NHWC + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getInputValue(0), inDims_); + outputs.addArg(*tmpInput_, outDims_); + forward_[0]->calc(inputs, outputs); + // softmax forward and save softmax result into tmpMatrix_ + tmpInput_->softmax(*tmpOutput_); + + // switch NHWC to NCHW + BufferArgs inputs_1; + BufferArgs outputs_1; + inputs_1.addArg(*tmpOutput_, outDims_); + outputs_1.addArg(*getOutputValue(), inDims_); + backward_[0]->calc(inputs_1, outputs_1); +} + +void PixelSoftmaxLayer::backward(const UpdateCallback& callback) { + (void)callback; + REGISTER_TIMER_INFO("PixelSoftmaxBackward", getName().c_str()); + + // switch NCHW to NHWC + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getOutputGrad(), inDims_); + outputs.addArg(*tmpInput_, outDims_); + forward_[0]->calc(inputs, outputs); + // softmax backward and save grad result into tmpOutput_ + tmpInput_->softmaxBackward(*tmpOutput_); + + // switch NHWC to NCHW + BufferArgs inputs_1; + BufferArgs outputs_1; + inputs_1.addArg(*tmpInput_, outDims_); + outputs_1.addArg(*getInputGrad(0), inDims_); + backward_[0]->calc(inputs_1, outputs_1); +} +} // namespace paddle diff --git a/paddle/gserver/layers/PixelSoftmaxLayer.h b/paddle/gserver/layers/PixelSoftmaxLayer.h new file mode 100644 index 0000000000..80a4ddad5a --- /dev/null +++ b/paddle/gserver/layers/PixelSoftmaxLayer.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" + +namespace paddle { + +/** + * \brief This layer calculate softmax in image channel dimension. + */ +class PixelSoftmaxLayer : public Layer { +public: + explicit PixelSoftmaxLayer(const LayerConfig& config) : Layer(config) {} + + ~PixelSoftmaxLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +protected: + uint32_t inC_; + uint32_t inH_; + uint32_t inW_; + TensorShape inDims_; + TensorShape outDims_; + MatrixPtr tmpInput_; + MatrixPtr tmpOutput_; +}; +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 59d1e9273d..8a9904087e 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1792,6 +1792,25 @@ TEST(Layer, RowConvLayer) { } } +TEST(Layer, PixelSoftmaxLayer) { + TestConfig config; + // config input_0 + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ImageConfig* img = input->mutable_image_conf(); + img->set_channels(4); + img->set_img_size(16); + img->set_img_size_y(16); + + // config softmax layer + config.layerConfig.set_type("pixel_softmax"); + config.layerConfig.set_name("pixelSofrmaxLayer"); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "pixel_softmax", 100, false, useGpu, true, 2); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 4431d613f6..2c18df3732 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -3385,6 +3385,27 @@ void CpuMatrix::oneHotCrossEntropyWithSelfNormBp(Matrix& output, real* out = output.getData(); \ for (size_t i = 0; i < numSamples; ++i, grad += dim, out += dim) +void CpuMatrix::softmaxBackward(Matrix& outputV) { + CHECK(!outputV.useGpu()) << "Matrix type are not equal"; + size_t height = getHeight(); + size_t width = getWidth(); + CHECK(height == outputV.getHeight() && width == outputV.getWidth()) + << "Matrix dimensions are not equal"; + Matrix::resizeOrCreate(sftmaxDot_, + height_, + width_, + /* trans */ false, + useGpu_); + Matrix::resizeOrCreate(sftmaxSum_, + height_, + 1, + /* trans */ false, + useGpu_); + sftmaxDot_->dotMul(*this, outputV); + sftmaxSum_->colMerge(*sftmaxDot_); + softmaxDerivative(outputV, *sftmaxSum_); +} + void CpuMatrix::softmax(Matrix& output) { CHECK(!output.useGpu()); diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 7dfd593225..dcb63a2d3f 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -1456,6 +1456,10 @@ public: }; class CpuMatrix : public Matrix { +private: + MatrixPtr sftmaxSum_; + MatrixPtr sftmaxDot_; + public: CpuMatrix(size_t height, size_t width, bool trans = false); CpuMatrix(real* data, size_t height, size_t width, bool trans = false) @@ -1728,6 +1732,7 @@ public: Matrix& prevGrad2); void softmax(Matrix& output); + void softmaxBackward(Matrix& outputV); void sequenceSoftmax(Matrix& output, const IVector& index); void softmaxDerivative(Matrix& output, Matrix& sftmaxSum); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 370529ed97..dc9c503e0b 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3171,6 +3171,22 @@ class RecurrentLayerGroup(LayerBase): name, 'recurrent_layer_group', 0, inputs=[], device=device) +@config_layer('pixel_softmax') +class PixelSoftmaxLayer(LayerBase): + def __init__(self, input, name, **xargs): + super(PixelSoftmaxLayer, self).__init__( + name, 'pixel_softmax', 0, inputs=inputs, **xargs) + + input_layer = self.get_input_layer(0) + image_conf = self.config.inputs[0].image_conf + image_conf.img_size = input_layer.width + image_conf.img_size_y = input_layer.height + image_conf.channels = input_layer.size / (input_layer.width * + input_layer.height) + self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size, + image_conf.channels) + + # Deprecated, use a new layer specific class instead @config_func def Layer(name, type, **xargs): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 206de1f8e1..fdac5984b0 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -217,6 +217,7 @@ class LayerType(object): SMOOTH_L1 = 'smooth_l1' PRELU = 'prelu' + PIXEL_SOFTMAX_LAYER = 'pixel_softmax' @staticmethod def is_layer_type(type_name): @@ -5853,3 +5854,40 @@ def prelu_layer(input, layer_type=LayerType.PRELU, parents=input, size=l.config.size) + + +@layer_support() +@wrap_name_default('pixel_softmax') +def pixel_softmax_layer(input, name=None, layer_attr=None): + """ + This layer calculate softmax in image channel dimension + + The example usage is: + + .. code-block:: python + + prelu = pixel_softmax(input=layer, name='softmax') + + :param name: Name of this layer. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput + :return: LayerOutput object. + :rtype: LayerOutput + """ + if isinstance(input, LayerOutput): + input = [input] + elif isinstance(input, Projection): + input = [input] + else: + assert isinstance(input, collections.Sequence) + l = Layer( + inputs=[x.name for x in input], + name=name, + type=LayerType.PIXEL_SOFTMAX_LAYER, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name=name, + layer_type=LayerType.PIXEL_SOFTMAX_LAYER, + parents=input, + size=l.config.size) From 0152d97e6344fbf866d75bf24f6f6034a81f5e81 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Tue, 11 Jul 2017 10:23:29 +0800 Subject: [PATCH 02/71] fix pixel softmax python wrapper bug --- python/paddle/trainer/config_parser.py | 2 +- python/paddle/trainer_config_helpers/layers.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index c24af47c4b..261e834e11 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3176,7 +3176,7 @@ class RecurrentLayerGroup(LayerBase): @config_layer('pixel_softmax') class PixelSoftmaxLayer(LayerBase): - def __init__(self, input, name, **xargs): + def __init__(self, name, inputs, **xargs): super(PixelSoftmaxLayer, self).__init__( name, 'pixel_softmax', 0, inputs=inputs, **xargs) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index d8cc52d409..2f8b0d1002 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -126,6 +126,7 @@ __all__ = [ 'row_conv_layer', 'dropout_layer', 'prelu_layer', + 'pixel_softmax_layer', ] @@ -5905,8 +5906,8 @@ def pixel_softmax_layer(input, name=None, layer_attr=None): else: assert isinstance(input, collections.Sequence) l = Layer( - inputs=[x.name for x in input], name=name, + inputs=[x.name for x in input], type=LayerType.PIXEL_SOFTMAX_LAYER, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput( From 1cdf149b6fccf4fba030f0bb847965500960fa9b Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 19 Jul 2017 12:50:45 +0800 Subject: [PATCH 03/71] 1. delete PixelSoftmaxLayer and add SwitchOrderLayer 2. Make SwitchOrderLayer support for softmax activation 3. Fix bugs --- CMakeLists.txt | 2 +- paddle/function/SwitchOp.cpp | 72 ++++++----- paddle/function/SwitchOp.h | 8 +- paddle/function/SwitchOpGpu.cu | 26 ++-- paddle/gserver/layers/PixelSoftmaxLayer.cpp | 89 -------------- paddle/gserver/layers/SwitchOrderLayer.cpp | 112 ++++++++++++++++++ ...PixelSoftmaxLayer.h => SwitchOrderLayer.h} | 19 +-- paddle/gserver/tests/test_LayerGrad.cpp | 14 ++- paddle/math/Matrix.cpp | 21 ---- paddle/math/Matrix.h | 1 - proto/ModelConfig.proto | 8 ++ python/paddle/trainer/config_parser.py | 21 ++-- .../paddle/trainer_config_helpers/layers.py | 36 +++--- 13 files changed, 231 insertions(+), 198 deletions(-) delete mode 100644 paddle/gserver/layers/PixelSoftmaxLayer.cpp create mode 100644 paddle/gserver/layers/SwitchOrderLayer.cpp rename paddle/gserver/layers/{PixelSoftmaxLayer.h => SwitchOrderLayer.h} (71%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 15a7c6b074..fdc62b3151 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ # limitations under the License cmake_minimum_required(VERSION 3.0) - +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ldl -lpthread") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/paddle/function/SwitchOp.cpp b/paddle/function/SwitchOp.cpp index 4667c4e01d..01e252a8dc 100644 --- a/paddle/function/SwitchOp.cpp +++ b/paddle/function/SwitchOp.cpp @@ -23,12 +23,17 @@ void NCHW2NHWC(real* outputs, const int num, const int inC, const int inH, - const int inW) { + const int inW, + const int argType) { for (int n = 0; n < num; ++n) { for (int c = 0; c < inC; ++c) { for (int h = 0; h < inH; ++h) { for (int w = 0; w < inW; ++w) { - outputs[((n * inH + h) * inW + w) * inC + c] = *(inputs++); + if (argType == ADD_TO) { + outputs[((n * inH + h) * inW + w) * inC + c] += *(inputs++); + } else { + outputs[((n * inH + h) * inW + w) * inC + c] = *(inputs++); + } } } } @@ -41,12 +46,17 @@ void NHWC2NCHW(real* outputs, const int num, const int inH, const int inW, - const int inC) { + const int inC, + const int argType) { for (int n = 0; n < num; ++n) { for (int h = 0; h < inH; ++h) { for (int w = 0; w < inW; ++w) { for (int c = 0; c < inC; ++c) { - outputs[((n * inC + c) * inH + h) * inW + w] = *(inputs++); + if (argType == ADD_TO) { + outputs[((n * inC + c) * inH + h) * inW + w] += *(inputs++); + } else { + outputs[((n * inC + c) * inH + h) * inW + w] = *(inputs++); + } } } } @@ -54,23 +64,15 @@ void NHWC2NCHW(real* outputs, } /** - * \brief Padding zeros to input according to the specify dimension. - * The struct pad_ contains the padding size in each dimension. - * The input and output is a 4D tensor. In PadFunc, we only - * pad zeros to the 2nd to 4th dimension. + * \brief Switch dimension order of image input. + * The input and output is a 4D tensor. Switch order + * 'batch_size,channels, height, width' to + * order 'batch_size, height, width, channels'. * * Argument in this Function: - * \param pad_ A struct object contains the padding size in each dimension. - * It has six integers. The channelStart and channelEnd indicate - * how many zeros to add before and after the input in channel - * dimension. And the heightStart and heightEnd indicate padding - * in height dimension. The widthStart and widthEnd indicate the - * padding in width dimension. - * \param inputs A 4D tensor, only one input. - * \param outputs A 4D tensor, the output value after padding. - * + * \param inputs input data with order 'batch_size,channels, height, width'. + * \param outputs output data with order 'batch_size, height, width, channels'. */ - template class NCHW2NHWCFunc : public FunctionBase { public: @@ -84,25 +86,26 @@ public: size_t inC = inputs[0].shape()[1]; size_t inH = inputs[0].shape()[2]; size_t inW = inputs[0].shape()[3]; - typename Tensor::Vector vec(outputs[0].shape().getElements(), - outputs[0].data()); - vec.zero(); - - NCHW2NHWC( - outputs[0].data(), inputs[0].data(), num, inC, inH, inW); + NCHW2NHWC(outputs[0].data(), + inputs[0].data(), + num, + inC, + inH, + inW, + outputs[0].getArgType()); } }; /** - * \brief The backward propagation of padding Function. Remove the elements - * in the padding positions of forward. + * \brief Switch dimension order of image input. + * The input and output is a 4D tensor. Switch order + * 'batch_size, height, width, channels' to + * order 'batch_size, channels, height, width'. * * Argument in this Function: - * \param pad_ The same meaning as it in PadFunc. - * \param inputs The gradient with respect to the output value of PadFunc. - * \param outputs The gradient with respect to the input value of PadFunc. + * \param inputs input data with order 'batch_size, height, width, channels'. + * \param outputs output data with order 'batch_size, channels, height, width'. */ - template class NHWC2NCHWFunc : public FunctionBase { public: @@ -117,8 +120,13 @@ public: size_t inW = inputs[0].shape()[2]; size_t inC = inputs[0].shape()[3]; - NHWC2NCHW( - outputs[0].data(), inputs[0].data(), num, inH, inW, inC); + NHWC2NCHW(outputs[0].data(), + inputs[0].data(), + num, + inH, + inW, + inC, + outputs[0].getArgType()); } }; diff --git a/paddle/function/SwitchOp.h b/paddle/function/SwitchOp.h index 5a2418a703..e4c1c3ac92 100644 --- a/paddle/function/SwitchOp.h +++ b/paddle/function/SwitchOp.h @@ -30,6 +30,7 @@ namespace paddle { * \param[in] inC channel number of input data. * \param[in] inH height of input data. * \param[in] inH with of input data. + * \param[in] argType type of output argument. */ template void NCHW2NHWC(real* outputs, @@ -37,7 +38,8 @@ void NCHW2NHWC(real* outputs, const int num, const int inC, const int inH, - const int inW); + const int inW, + const int argtype); /** * \brief This funtion switch dimension order of image input. @@ -51,6 +53,7 @@ void NCHW2NHWC(real* outputs, * \param[in] inH height of input data. * \param[in] inW with of input data. * \param[in] inC channel number of input data. + * \param[in] argType type of output argument. */ template void NHWC2NCHW(real* inGrad, @@ -58,5 +61,6 @@ void NHWC2NCHW(real* inGrad, const int num, const int inH, const int inW, - const int inC); + const int inC, + const int argType); } // namespace paddle diff --git a/paddle/function/SwitchOpGpu.cu b/paddle/function/SwitchOpGpu.cu index c2020cb2ab..0b9401dea1 100644 --- a/paddle/function/SwitchOpGpu.cu +++ b/paddle/function/SwitchOpGpu.cu @@ -19,7 +19,7 @@ namespace paddle { __global__ void KeNCHW2NHWC(real* outputs, const real* inputs, int inC, int inH, int inW, - int nthreads) { + int nthreads, int argType) { const int idx = threadIdx.x + blockIdx.x * blockDim.x; if (idx < nthreads) { const int w = idx % inW; @@ -28,7 +28,11 @@ __global__ void KeNCHW2NHWC(real* outputs, const real* inputs, const int n = idx / inW / inH / inC; const int off = ((n * inH + h) * inW + w) * inC +c; - outputs[off] = inputs[idx]; + if (argType == ADD_TO) { + outputs[off] += inputs[idx]; + } else { + outputs[off] = inputs[idx]; + } } } @@ -38,18 +42,19 @@ void NCHW2NHWC(real* outputs, const int num, const int inC, const int inH, - const int inW) { + const int inW, + const int argType) { size_t nth = num * inC * inH * inW; int blockSize = 1024; int gridSize = (nth + 1024 - 1) / 1024; KeNCHW2NHWC<<>> - (outputs, inputs, inC, inH, inW, nth); + (outputs, inputs, inC, inH, inW, nth, argType); CHECK_SYNC("NCHW2NHWC"); } __global__ void KeNHWC2NCHW(real* outputs, const real* inputs, int inH, int inW, int inC, - int nthreads) { + int nthreads, int argType) { const int idx = threadIdx.x + blockIdx.x * blockDim.x; if (idx < nthreads) { const int c = idx % inC; @@ -58,7 +63,11 @@ __global__ void KeNHWC2NCHW(real* outputs, const real* inputs, const int n = idx / inW / inH / inC; const int off = ((n * inC + c) * inH + h) * inW + w; - outputs[off] = inputs[idx]; + if (argType == ADD_TO) { + outputs[off] += inputs[idx]; + } else { + outputs[off] = inputs[idx]; + } } } @@ -68,12 +77,13 @@ void NHWC2NCHW(real* outputs, const int num, const int inH, const int inW, - const int inC) { + const int inC, + const int argType) { int nth = num * inC * inH * inW; int blockSize = 1024; int gridSize = (nth + 1024 - 1) / 1024; KeNHWC2NCHW<<>> - (outputs, inputs, inH, inW, inC, nth); + (outputs, inputs, inH, inW, inC, nth, argType); CHECK_SYNC("NHWC2NCHW"); } diff --git a/paddle/gserver/layers/PixelSoftmaxLayer.cpp b/paddle/gserver/layers/PixelSoftmaxLayer.cpp deleted file mode 100644 index 6da84a6303..0000000000 --- a/paddle/gserver/layers/PixelSoftmaxLayer.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "PixelSoftmaxLayer.h" -#include "paddle/utils/Stat.h" - -namespace paddle { - -REGISTER_LAYER(pixel_softmax, PixelSoftmaxLayer); - -bool PixelSoftmaxLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - auto& img_conf = config_.inputs(0).image_conf(); - inH_ = - img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(); - inW_ = img_conf.img_size(); - inC_ = img_conf.channels(); - createFunction(forward_, "NCHW2NHWC", FuncConfig()); - createFunction(backward_, "NHWC2NCHW", FuncConfig()); - inDims_ = TensorShape({0, inH_, inW_, inC_}); - outDims_ = TensorShape({0, inC_, inH_, inW_}); - return true; -} - -void PixelSoftmaxLayer::forward(PassType passType) { - Layer::forward(passType); - MatrixPtr input = inputLayers_[0]->getOutputValue(); - size_t batchSize = input->getHeight(); - // cout<<"useGpu:"<zeroMem(); - resetOutput(batchSize, inH_ * inW_ * inC_); - inDims_.setDim(0, batchSize); - outDims_.setDim(0, batchSize); - - // switch NCHW to NHWC - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getInputValue(0), inDims_); - outputs.addArg(*tmpInput_, outDims_); - forward_[0]->calc(inputs, outputs); - // softmax forward and save softmax result into tmpMatrix_ - tmpInput_->softmax(*tmpOutput_); - - // switch NHWC to NCHW - BufferArgs inputs_1; - BufferArgs outputs_1; - inputs_1.addArg(*tmpOutput_, outDims_); - outputs_1.addArg(*getOutputValue(), inDims_); - backward_[0]->calc(inputs_1, outputs_1); -} - -void PixelSoftmaxLayer::backward(const UpdateCallback& callback) { - (void)callback; - REGISTER_TIMER_INFO("PixelSoftmaxBackward", getName().c_str()); - - // switch NCHW to NHWC - BufferArgs inputs; - BufferArgs outputs; - inputs.addArg(*getOutputGrad(), inDims_); - outputs.addArg(*tmpInput_, outDims_); - forward_[0]->calc(inputs, outputs); - // softmax backward and save grad result into tmpOutput_ - tmpInput_->softmaxBackward(*tmpOutput_); - - // switch NHWC to NCHW - BufferArgs inputs_1; - BufferArgs outputs_1; - inputs_1.addArg(*tmpInput_, outDims_); - outputs_1.addArg(*getInputGrad(0), inDims_); - backward_[0]->calc(inputs_1, outputs_1); -} -} // namespace paddle diff --git a/paddle/gserver/layers/SwitchOrderLayer.cpp b/paddle/gserver/layers/SwitchOrderLayer.cpp new file mode 100644 index 0000000000..2a8a9500fa --- /dev/null +++ b/paddle/gserver/layers/SwitchOrderLayer.cpp @@ -0,0 +1,112 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "SwitchOrderLayer.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_LAYER(switch_order, SwitchOrderLayer); + +bool SwitchOrderLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + auto& img_conf = config_.inputs(0).image_conf(); + size_t inH = + img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(); + size_t inW = img_conf.img_size(); + size_t inC = img_conf.channels(); + inDims_ = TensorShape({0, inC, inH, inW}); + outDims_ = TensorShape(4); + + auto& reshape_conf = config_.reshape_conf(); + for (size_t i = 0; i < reshape_conf.heightaxis_size(); i++) { + LOG(INFO) << "reshape height axis: " << reshape_conf.heightaxis(i); + heightAxis_.push_back(reshape_conf.heightaxis(i)); + } + for (size_t i = 0; i < reshape_conf.widthaxis_size(); i++) { + LOG(INFO) << "reshape width axis: " << reshape_conf.widthaxis(i); + widthAxis_.push_back(reshape_conf.widthaxis(i)); + } + createFunction(nchw2nhwc_, "NCHW2NHWC", FuncConfig()); + createFunction(nhwc2nchw_, "NHWC2NCHW", FuncConfig()); + return true; +} + +void SwitchOrderLayer::setOutDims() { + outDims_.setDim(0, inDims_[0]); + outDims_.setDim(1, inDims_[2]); + outDims_.setDim(2, inDims_[3]); + outDims_.setDim(3, inDims_[1]); + reshapeHeight_ = 1; + for (size_t i = 0; i < heightAxis_.size(); i++) { + reshapeHeight_ *= outDims_[heightAxis_[i]]; + } + output_.setFrameHeight(reshapeHeight_); + reshapeWidth_ = 1; + for (size_t i = 0; i < widthAxis_.size(); i++) { + reshapeWidth_ *= outDims_[widthAxis_[i]]; + } + output_.setFrameWidth(reshapeWidth_); + LOG(INFO) << "outDims: " << outDims_[0] << "; " << outDims_[1] << ";" + << outDims_[2] << ";" << outDims_[3]; +} + +void SwitchOrderLayer::setInDims() { + MatrixPtr input = inputLayers_[0]->getOutputValue(); + size_t batchSize = input->getHeight(); + inDims_.setDim(0, batchSize); + + int h = inputLayers_[0]->getOutput().getFrameHeight(); + if (h != 0) inDims_.setDim(2, h); + int w = inputLayers_[0]->getOutput().getFrameWidth(); + if (w != 0) inDims_.setDim(3, w); + int totalCount = input->getElementCnt(); + int channels = totalCount / (inDims_[0] * inDims_[2] * inDims_[3]); + if (channels != 0) inDims_.setDim(1, channels); + LOG(INFO) << "inDims: " << inDims_[0] << "; " << inDims_[1] << ";" + << inDims_[2] << ";" << inDims_[3]; +} + +void SwitchOrderLayer::forward(PassType passType) { + Layer::forward(passType); + setInDims(); + setOutDims(); + resetOutput(outDims_[0], outDims_[1] * outDims_[2] * outDims_[3]); + if (heightAxis_.size() > 0) { + getOutputValue()->reshape(reshapeHeight_, reshapeWidth_); + } + + // switch NCHW to NHWC + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getInputValue(0), inDims_); + outputs.addArg(*getOutputValue(), outDims_); + nchw2nhwc_[0]->calc(inputs, outputs); + // forwardActivation(); +} + +void SwitchOrderLayer::backward(const UpdateCallback& callback) { + (void)callback; + // backwardActivation(); + + // switch NHWC to NCHW + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getOutputGrad(), outDims_); + outputs.addArg(*getInputGrad(0), inDims_, ADD_TO); + nhwc2nchw_[0]->calc(inputs, outputs); +} +} // namespace paddle diff --git a/paddle/gserver/layers/PixelSoftmaxLayer.h b/paddle/gserver/layers/SwitchOrderLayer.h similarity index 71% rename from paddle/gserver/layers/PixelSoftmaxLayer.h rename to paddle/gserver/layers/SwitchOrderLayer.h index 80a4ddad5a..47b1f7f73e 100644 --- a/paddle/gserver/layers/PixelSoftmaxLayer.h +++ b/paddle/gserver/layers/SwitchOrderLayer.h @@ -21,24 +21,27 @@ namespace paddle { /** * \brief This layer calculate softmax in image channel dimension. */ -class PixelSoftmaxLayer : public Layer { +class SwitchOrderLayer : public Layer { public: - explicit PixelSoftmaxLayer(const LayerConfig& config) : Layer(config) {} + explicit SwitchOrderLayer(const LayerConfig& config) : Layer(config) {} - ~PixelSoftmaxLayer() {} + ~SwitchOrderLayer() {} bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; void forward(PassType passType) override; void backward(const UpdateCallback& callback = nullptr) override; + void setInDims(); + void setOutDims(); protected: - uint32_t inC_; - uint32_t inH_; - uint32_t inW_; + std::vector> nchw2nhwc_; + std::vector> nhwc2nchw_; TensorShape inDims_; TensorShape outDims_; - MatrixPtr tmpInput_; - MatrixPtr tmpOutput_; + std::vector heightAxis_; + std::vector widthAxis_; + size_t reshapeHeight_; + size_t reshapeWidth_; }; } // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 98c9cbe9f5..42c23f0226 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1802,7 +1802,7 @@ TEST(Layer, RowConvLayer) { } } -TEST(Layer, PixelSoftmaxLayer) { +TEST(Layer, SwitchOrderLayer) { TestConfig config; // config input_0 config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); @@ -1812,12 +1812,18 @@ TEST(Layer, PixelSoftmaxLayer) { img->set_img_size(16); img->set_img_size_y(16); + ReshapeConfig* reshape = config.layerConfig.mutable_reshape_conf(); + reshape->add_heightaxis(0); + reshape->add_heightaxis(1); + reshape->add_heightaxis(2); + reshape->add_widthaxis(3); + // config softmax layer - config.layerConfig.set_type("pixel_softmax"); - config.layerConfig.set_name("pixelSofrmaxLayer"); + config.layerConfig.set_type("switch_order"); + config.layerConfig.set_name("switchOrderLayer"); for (auto useGpu : {false, true}) { - testLayerGrad(config, "pixel_softmax", 100, false, useGpu, true, 2); + testLayerGrad(config, "switch_order", 100, false, useGpu, true); } } diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 2c18df3732..4431d613f6 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -3385,27 +3385,6 @@ void CpuMatrix::oneHotCrossEntropyWithSelfNormBp(Matrix& output, real* out = output.getData(); \ for (size_t i = 0; i < numSamples; ++i, grad += dim, out += dim) -void CpuMatrix::softmaxBackward(Matrix& outputV) { - CHECK(!outputV.useGpu()) << "Matrix type are not equal"; - size_t height = getHeight(); - size_t width = getWidth(); - CHECK(height == outputV.getHeight() && width == outputV.getWidth()) - << "Matrix dimensions are not equal"; - Matrix::resizeOrCreate(sftmaxDot_, - height_, - width_, - /* trans */ false, - useGpu_); - Matrix::resizeOrCreate(sftmaxSum_, - height_, - 1, - /* trans */ false, - useGpu_); - sftmaxDot_->dotMul(*this, outputV); - sftmaxSum_->colMerge(*sftmaxDot_); - softmaxDerivative(outputV, *sftmaxSum_); -} - void CpuMatrix::softmax(Matrix& output) { CHECK(!output.useGpu()); diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index dcb63a2d3f..20f97a5060 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -1732,7 +1732,6 @@ public: Matrix& prevGrad2); void softmax(Matrix& output); - void softmaxBackward(Matrix& outputV); void sequenceSoftmax(Matrix& output, const IVector& index); void softmaxDerivative(Matrix& output, Matrix& sftmaxSum); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 37cd16c798..9fd017b23e 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -266,6 +266,11 @@ message PadConfig { repeated uint32 pad_w = 4; } +message ReshapeConfig { + repeated uint32 heightAxis = 1; + repeated uint32 widthAxis = 2; +} + message MultiBoxLossConfig { required uint32 num_classes = 1; required float overlap_threshold = 2; @@ -476,6 +481,9 @@ message LayerConfig { // controls the scope of pooling operation. can be set > 0. // leave empty or set to -1 to disable this stride pooling. optional int32 seq_pool_stride = 53 [default = -1]; + + // for switch order layer + optional ReshapeConfig reshape_conf = 54; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 261e834e11..fe06dd812e 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3174,20 +3174,13 @@ class RecurrentLayerGroup(LayerBase): name, 'recurrent_layer_group', 0, inputs=[], device=device) -@config_layer('pixel_softmax') -class PixelSoftmaxLayer(LayerBase): - def __init__(self, name, inputs, **xargs): - super(PixelSoftmaxLayer, self).__init__( - name, 'pixel_softmax', 0, inputs=inputs, **xargs) - - input_layer = self.get_input_layer(0) - image_conf = self.config.inputs[0].image_conf - image_conf.img_size = input_layer.width - image_conf.img_size_y = input_layer.height - image_conf.channels = input_layer.size / (input_layer.width * - input_layer.height) - self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size, - image_conf.channels) +@config_layer('switch_order') +class SwitchOrderLayer(LayerBase): + def __init__(self, name, inputs, reshape, **xargs): + super(SwitchOrderLayer, self).__init__( + name, 'switch_order', 0, inputs=inputs, **xargs) + self.conf.reshape_conf.heightAxis_ = reshape['height'] + self.conf.reshape_conf.widthAxis_ = reshape['width'] # Deprecated, use a new layer specific class instead diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 2f8b0d1002..6980a31679 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -126,7 +126,7 @@ __all__ = [ 'row_conv_layer', 'dropout_layer', 'prelu_layer', - 'pixel_softmax_layer', + 'switch_order_layer', ] @@ -218,7 +218,7 @@ class LayerType(object): SMOOTH_L1 = 'smooth_l1' PRELU = 'prelu' - PIXEL_SOFTMAX_LAYER = 'pixel_softmax' + SWITCH_ORDER_LAYER = 'switch_order' @staticmethod def is_layer_type(type_name): @@ -5881,37 +5881,37 @@ def prelu_layer(input, @layer_support() -@wrap_name_default('pixel_softmax') -def pixel_softmax_layer(input, name=None, layer_attr=None): +@wrap_name_default('switch_order') +def switch_order_layer(input, name=None, reshape=None, layer_attr=None): """ - This layer calculate softmax in image channel dimension + This layer switch dimension order of image input. + From order "batchSize, channels, height, width" + to order "batchSize, height, width, channels". The example usage is: .. code-block:: python + reshape = {'height':[ 0, 1, 2], 'width':[3]} + switch = switch_order(input=layer, name='switch', reshape=reshape) - prelu = pixel_softmax(input=layer, name='softmax') - - :param name: Name of this layer. - :type name: basestring :param input: The input layer. :type input: LayerOutput + :param name: Name of this layer. + :type name: basestring + :param reshape: reshape matrix by axises. + :type reshape: Dict :return: LayerOutput object. :rtype: LayerOutput """ - if isinstance(input, LayerOutput): - input = [input] - elif isinstance(input, Projection): - input = [input] - else: - assert isinstance(input, collections.Sequence) + assert isinstance(input, LayerOutput) l = Layer( name=name, - inputs=[x.name for x in input], - type=LayerType.PIXEL_SOFTMAX_LAYER, + inputs=input, + reshape=reshape, + type=LayerType.SWITCH_ORDER_LAYER, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput( name=name, - layer_type=LayerType.PIXEL_SOFTMAX_LAYER, + layer_type=LayerType.SWITCH_ORDER_LAYER, parents=input, size=l.config.size) From fa02963659239fbbd61594b61073802cc9ab4513 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 19 Jul 2017 13:15:03 +0800 Subject: [PATCH 04/71] Delete debug log --- paddle/gserver/layers/SwitchOrderLayer.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/paddle/gserver/layers/SwitchOrderLayer.cpp b/paddle/gserver/layers/SwitchOrderLayer.cpp index 2a8a9500fa..8d337611b9 100644 --- a/paddle/gserver/layers/SwitchOrderLayer.cpp +++ b/paddle/gserver/layers/SwitchOrderLayer.cpp @@ -33,11 +33,9 @@ bool SwitchOrderLayer::init(const LayerMap& layerMap, auto& reshape_conf = config_.reshape_conf(); for (size_t i = 0; i < reshape_conf.heightaxis_size(); i++) { - LOG(INFO) << "reshape height axis: " << reshape_conf.heightaxis(i); heightAxis_.push_back(reshape_conf.heightaxis(i)); } for (size_t i = 0; i < reshape_conf.widthaxis_size(); i++) { - LOG(INFO) << "reshape width axis: " << reshape_conf.widthaxis(i); widthAxis_.push_back(reshape_conf.widthaxis(i)); } createFunction(nchw2nhwc_, "NCHW2NHWC", FuncConfig()); @@ -60,8 +58,6 @@ void SwitchOrderLayer::setOutDims() { reshapeWidth_ *= outDims_[widthAxis_[i]]; } output_.setFrameWidth(reshapeWidth_); - LOG(INFO) << "outDims: " << outDims_[0] << "; " << outDims_[1] << ";" - << outDims_[2] << ";" << outDims_[3]; } void SwitchOrderLayer::setInDims() { @@ -76,8 +72,6 @@ void SwitchOrderLayer::setInDims() { int totalCount = input->getElementCnt(); int channels = totalCount / (inDims_[0] * inDims_[2] * inDims_[3]); if (channels != 0) inDims_.setDim(1, channels); - LOG(INFO) << "inDims: " << inDims_[0] << "; " << inDims_[1] << ";" - << inDims_[2] << ";" << inDims_[3]; } void SwitchOrderLayer::forward(PassType passType) { @@ -95,12 +89,12 @@ void SwitchOrderLayer::forward(PassType passType) { inputs.addArg(*getInputValue(0), inDims_); outputs.addArg(*getOutputValue(), outDims_); nchw2nhwc_[0]->calc(inputs, outputs); - // forwardActivation(); + forwardActivation(); } void SwitchOrderLayer::backward(const UpdateCallback& callback) { (void)callback; - // backwardActivation(); + backwardActivation(); // switch NHWC to NCHW BufferArgs inputs; From e23acb4e6f7b12f1b61faf3cf8d74872b7df5b39 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 19 Jul 2017 14:09:32 +0800 Subject: [PATCH 05/71] fix cmake --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a85224843..2a6b0a20e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,6 @@ # limitations under the License cmake_minimum_required(VERSION 3.0) -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ldl -lpthread") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}) From a6c53fc2fcef380784829cfb29764e1a6458827d Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 19 Jul 2017 17:32:05 +0800 Subject: [PATCH 06/71] fix python wrapper bugs --- python/paddle/trainer/config_parser.py | 4 ++-- python/paddle/trainer_config_helpers/layers.py | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 6e2f218234..0a466380ae 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3187,8 +3187,8 @@ class SwitchOrderLayer(LayerBase): def __init__(self, name, inputs, reshape, **xargs): super(SwitchOrderLayer, self).__init__( name, 'switch_order', 0, inputs=inputs, **xargs) - self.conf.reshape_conf.heightAxis_ = reshape['height'] - self.conf.reshape_conf.widthAxis_ = reshape['width'] + self.config.reshape_conf.heightAxis.extend(reshape['height']) + self.config.reshape_conf.widthAxis.extend(reshape['width']) # Deprecated, use a new layer specific class instead diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1f5b9e999c..0bcfbe1e0c 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -5976,7 +5976,11 @@ def gated_unit_layer(input, @layer_support() @wrap_name_default('switch_order') -def switch_order_layer(input, name=None, reshape=None, layer_attr=None): +def switch_order_layer(input, + name=None, + reshape=None, + act=None, + layer_attr=None): """ This layer switch dimension order of image input. From order "batchSize, channels, height, width" @@ -6000,9 +6004,10 @@ def switch_order_layer(input, name=None, reshape=None, layer_attr=None): assert isinstance(input, LayerOutput) l = Layer( name=name, - inputs=input, + inputs=input.name, reshape=reshape, type=LayerType.SWITCH_ORDER_LAYER, + active_type=act.name, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput( name=name, From baae8447ac936b29fb2b14981851bb502f5193cd Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Wed, 19 Jul 2017 18:53:32 +0800 Subject: [PATCH 07/71] Fix SwitchOrderLayer grad bugs by reshape output.grad --- paddle/gserver/layers/SwitchOrderLayer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/gserver/layers/SwitchOrderLayer.cpp b/paddle/gserver/layers/SwitchOrderLayer.cpp index 8d337611b9..6a91042f62 100644 --- a/paddle/gserver/layers/SwitchOrderLayer.cpp +++ b/paddle/gserver/layers/SwitchOrderLayer.cpp @@ -81,6 +81,7 @@ void SwitchOrderLayer::forward(PassType passType) { resetOutput(outDims_[0], outDims_[1] * outDims_[2] * outDims_[3]); if (heightAxis_.size() > 0) { getOutputValue()->reshape(reshapeHeight_, reshapeWidth_); + getOutputGrad()->reshape(reshapeHeight_, reshapeWidth_); } // switch NCHW to NHWC From 5ca4118451a38a8fa1e876fd5416028010ec218b Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Tue, 22 Aug 2017 17:27:04 +0800 Subject: [PATCH 08/71] Update Dockerfile of android to support building for arm64-v8a and armeabi. --- Dockerfile.android | 18 ++++--- paddle/scripts/docker/build_android.sh | 65 +++++++++++++++++++------- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/Dockerfile.android b/Dockerfile.android index c0fa58c384..aa95abb366 100644 --- a/Dockerfile.android +++ b/Dockerfile.android @@ -4,9 +4,15 @@ MAINTAINER PaddlePaddle Authors ARG UBUNTU_MIRROR RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' +# ENV variables +ARG ANDROID_ABI + +ENV ANDROID_ABI=${ANDROID_ABI:-"armeabi-v7a"} + ENV HOME=/root \ ANDROID_NDK_HOME=/opt/android-ndk-linux \ - ANDROID_STANDALONE_TOOLCHAIN=/opt/android-toolchain-gcc + ANDROID_ARM_STANDALONE_TOOLCHAIN=/opt/arm-toolchain-gcc \ + ANDROID_ARM64_STANDALONE_TOOLCHAIN=/opt/arm64-toolchain-gcc RUN apt-get update && \ apt-get install -y \ @@ -15,12 +21,11 @@ RUN apt-get update && \ apt-get clean -y # Install Go and glide -RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ - tar -C /usr/local -xzf go.tgz && \ +RUN wget -qO- go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \ + tar -xz -C /usr/local && \ mkdir /root/gopath && \ mkdir /root/gopath/bin && \ - mkdir /root/gopath/src && \ - rm go.tgz + mkdir /root/gopath/src ENV GOROOT=/usr/local/go GOPATH=/root/gopath # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin @@ -42,7 +47,8 @@ RUN mkdir /opt/android-ndk-tmp && \ wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip && \ unzip -q android-ndk-r14b-linux-x86_64.zip && \ mv android-ndk-r14b ${ANDROID_NDK_HOME} && \ - ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-21 --install-dir=${ANDROID_STANDALONE_TOOLCHAIN} && \ + ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-21 --install-dir=${ANDROID_ARM_STANDALONE_TOOLCHAIN} && \ + ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm64 --platform=android-21 --install-dir=${ANDROID_ARM64_STANDALONE_TOOLCHAIN} && \ rm -rf /opt/android-ndk-tmp && \ rm -rf ${ANDROID_NDK_HOME} diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index 5584e29e2a..593ae28e49 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -2,22 +2,55 @@ set -xe -mkdir -p /paddle/build_android -cd /paddle/build_android +mkdir -p /paddle/build_android/$ANDROID_ABI +cd /paddle/build_android/$ANDROID_ABI rm -rf /paddle/install 2>/dev/null || true -cmake -DCMAKE_SYSTEM_NAME=Android \ - -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \ - -DANDROID_ABI=armeabi-v7a \ - -DANDROID_ARM_NEON=ON \ - -DANDROID_ARM_MODE=ON \ - -DHOST_C_COMPILER=/usr/bin/gcc \ - -DHOST_CXX_COMPILER=/usr/bin/g++ \ - -DCMAKE_INSTALL_PREFIX=/paddle/install \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_C_FLAGS_RELWITHDEBINFO="-O3" \ - -DCMAKE_CXX_FLAGS_RELWITHDEBINFO="-O3" \ - -DWITH_C_API=ON \ - -DWITH_SWIG_PY=OFF \ - .. + +THIRD_PARTY_PATH=/paddle/third_party_android/$ANDROID_ABI + +if [ $ANDROID_ABI == "armeabi-v7a" ]; then + cmake -DCMAKE_SYSTEM_NAME=Android \ + -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM_STANDALONE_TOOLCHAIN \ + -DANDROID_ABI=$ANDROID_ABI \ + -DANDROID_ARM_NEON=ON \ + -DANDROID_ARM_MODE=ON \ + -DHOST_C_COMPILER=/usr/bin/gcc \ + -DHOST_CXX_COMPILER=/usr/bin/g++ \ + -DCMAKE_INSTALL_PREFIX=/paddle/install \ + -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ + -DCMAKE_BUILD_TYPE=Release \ + -DWITH_C_API=ON \ + -DWITH_SWIG_PY=OFF \ + /paddle +elif [ $ANDROID_ABI == "arm64-v7a" ]; then + cmake -DCMAKE_SYSTEM_NAME=Android \ + -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM64_STANDALONE_TOOLCHAIN \ + -DANDROID_ABI=$ANDROID_ABI \ + -DANDROID_ARM_MODE=ON \ + -DHOST_C_COMPILER=/usr/bin/gcc \ + -DHOST_CXX_COMPILER=/usr/bin/g++ \ + -DCMAKE_INSTALL_PREFIX=/paddle/install \ + -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ + -DCMAKE_BUILD_TYPE=Release \ + -DWITH_C_API=ON \ + -DWITH_SWIG_PY=OFF \ + /paddle +elif [ $ANDROID_ABI == "armeabi" ]; then + cmake -DCMAKE_SYSTEM_NAME=Android \ + -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM_STANDALONE_TOOLCHAIN \ + -DANDROID_ABI=$ANDROID_ABI \ + -DANDROID_ARM_MODE=ON \ + -DHOST_C_COMPILER=/usr/bin/gcc \ + -DHOST_CXX_COMPILER=/usr/bin/g++ \ + -DCMAKE_INSTALL_PREFIX=/paddle/install \ + -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ + -DCMAKE_BUILD_TYPE=Release \ + -DWITH_C_API=ON \ + -DWITH_SWIG_PY=OFF \ + /paddle +else + echo "Invalid ANDROID_ABI: $ANDROID_ABI" +fi + make -j `nproc` make install -j `nproc` From 8a4fad4248e942061586538e8de14a7d08052330 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Wed, 23 Aug 2017 19:43:57 +0800 Subject: [PATCH 09/71] Support to use clang for Android cross-compiling. --- cmake/cblas.cmake | 4 + cmake/external/warpctc.cmake | 1 + paddle/cuda/include/hl_cpu_gru.cuh | 166 ++++++++++++------------- paddle/function/MulOp.cpp | 37 +++--- paddle/math/MathFunctions.cpp | 4 + paddle/math/MathFunctions.h | 23 +++- paddle/math/Matrix.cpp | 18 ++- paddle/scripts/docker/build_android.sh | 24 ++-- 8 files changed, 155 insertions(+), 122 deletions(-) diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 854066fd1d..ab111eccc0 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -13,6 +13,10 @@ # system paths. # +if(USE_EIGEN_FOR_BLAS) + return() +endif(USE_EIGEN_FOR_BLAS) + set(CBLAS_FOUND OFF) ## Find MKLML First. diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 2d7daed9bc..3cc652bed5 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -41,6 +41,7 @@ IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "App ELSE() SET(USE_OMP ON) ENDIF() +SET(USE_OMP OFF FORCE) ExternalProject_Add( extern_warpctc diff --git a/paddle/cuda/include/hl_cpu_gru.cuh b/paddle/cuda/include/hl_cpu_gru.cuh index c0a37ced2a..732799a28b 100644 --- a/paddle/cuda/include/hl_cpu_gru.cuh +++ b/paddle/cuda/include/hl_cpu_gru.cuh @@ -20,11 +20,11 @@ limitations under the License. */ #include "paddle/math/MathFunctions.h" -#ifndef PADDLE_TYPE_DOUBLE -#define CBLAS_GEMM paddle::gemm -#else -#define CBLAS_GEMM paddle::gemm -#endif +// #ifndef PADDLE_TYPE_DOUBLE +// #define CBLAS_GEMM paddle::gemm +// #else +// #define CBLAS_GEMM paddle::gemm +// #endif template void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput, @@ -219,37 +219,37 @@ void hl_cpu_gru_forward(OpResetOutput opResetOutput, hl_activation_mode_t active_node, hl_activation_mode_t active_gate) { if (value.prevOutValue) { - CBLAS_GEMM(CblasNoTrans, - CblasNoTrans, - batchSize, - 2 * frameSize, - frameSize, - 1, - value.prevOutValue, - frameSize, - value.gateWeight, - frameSize * 2, - 1, - value.gateValue, - frameSize * 3); +// CBLAS_GEMM(CblasNoTrans, +// CblasNoTrans, +// batchSize, +// 2 * frameSize, +// frameSize, +// 1, +// value.prevOutValue, +// frameSize, +// value.gateWeight, +// frameSize * 2, +// 1, +// value.gateValue, +// frameSize * 3); } forward_reset_output(opResetOutput, value, frameSize, batchSize, active_gate); if (value.prevOutValue) { - CBLAS_GEMM(CblasNoTrans, - CblasNoTrans, - batchSize, - frameSize, - frameSize, - 1, - value.resetOutputValue, - frameSize, - value.stateWeight, - frameSize, - 1, - value.gateValue + frameSize * 2, - frameSize * 3); +// CBLAS_GEMM(CblasNoTrans, +// CblasNoTrans, +// batchSize, +// frameSize, +// frameSize, +// 1, +// value.resetOutputValue, +// frameSize, +// value.stateWeight, +// frameSize, +// 1, +// value.gateValue + frameSize * 2, +// frameSize * 3); } forward_final_output(opFinalOutput, value, frameSize, batchSize, active_node); @@ -538,34 +538,34 @@ void hl_cpu_gru_backward(OpStateGrad opStateGrad, frameSize, batchSize, active_node); if (value.prevOutValue && grad.prevOutGrad) { - CBLAS_GEMM(CblasNoTrans, - CblasTrans, - batchSize, - frameSize, - frameSize, - 1, - grad.gateGrad + frameSize * 2, - frameSize * 3, - value.stateWeight, - frameSize, - 0, - grad.resetOutputGrad, - frameSize); +// CBLAS_GEMM(CblasNoTrans, +// CblasTrans, +// batchSize, +// frameSize, +// frameSize, +// 1, +// grad.gateGrad + frameSize * 2, +// frameSize * 3, +// value.stateWeight, +// frameSize, +// 0, +// grad.resetOutputGrad, +// frameSize); if (grad.stateWeightGrad) { - CBLAS_GEMM(CblasTrans, - CblasNoTrans, - frameSize, - frameSize, - batchSize, - 1, - value.resetOutputValue, - frameSize, - grad.gateGrad + frameSize * 2, - frameSize * 3, - 1, - grad.stateWeightGrad, - frameSize); +// CBLAS_GEMM(CblasTrans, +// CblasNoTrans, +// frameSize, +// frameSize, +// batchSize, +// 1, +// value.resetOutputValue, +// frameSize, +// grad.gateGrad + frameSize * 2, +// frameSize * 3, +// 1, +// grad.stateWeightGrad, +// frameSize); } } @@ -573,34 +573,34 @@ void hl_cpu_gru_backward(OpStateGrad opStateGrad, frameSize, batchSize, active_gate); if (grad.prevOutGrad && value.prevOutValue) { - CBLAS_GEMM(CblasNoTrans, - CblasTrans, - batchSize, - frameSize, - frameSize * 2, - 1, - grad.gateGrad, - frameSize * 3, - value.gateWeight, - frameSize * 2, - 1, - grad.prevOutGrad, - frameSize); +// CBLAS_GEMM(CblasNoTrans, +// CblasTrans, +// batchSize, +// frameSize, +// frameSize * 2, +// 1, +// grad.gateGrad, +// frameSize * 3, +// value.gateWeight, +// frameSize * 2, +// 1, +// grad.prevOutGrad, +// frameSize); if (grad.gateWeightGrad) { - CBLAS_GEMM(CblasTrans, - CblasNoTrans, - frameSize, - frameSize * 2, - batchSize, - 1, - value.prevOutValue, - frameSize, - grad.gateGrad, - frameSize * 3, - 1, - grad.gateWeightGrad, - frameSize * 2); +// CBLAS_GEMM(CblasTrans, +// CblasNoTrans, +// frameSize, +// frameSize * 2, +// batchSize, +// 1, +// value.prevOutValue, +// frameSize, +// grad.gateGrad, +// frameSize * 3, +// 1, +// grad.gateWeightGrad, +// frameSize * 2); } } } diff --git a/paddle/function/MulOp.cpp b/paddle/function/MulOp.cpp index 91b4b8ed91..25e41edad5 100644 --- a/paddle/function/MulOp.cpp +++ b/paddle/function/MulOp.cpp @@ -13,18 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MulOp.h" -/// todo(tianbing), delete it -#include -#include "paddle/math/MathFunctions.h" +#include "GemmFunctor.h" #include "paddle/math/SIMDFunctions.h" #include "paddle/utils/ThreadLocal.h" -#ifndef PADDLE_TYPE_DOUBLE -#define GEMM paddle::gemm -#else -#define GEMM paddle::gemm -#endif - namespace { inline void vecAddTo(real* a, const real* b, real scaleB, size_t len) { for (unsigned int i = 0; i < len; ++i) { @@ -114,19 +106,20 @@ void MulOp(CpuMatrix& out, real scaleT, bool aTrans, bool bTrans) { - GEMM(aTrans ? CblasTrans : CblasNoTrans, - bTrans ? CblasTrans : CblasNoTrans, - out.getHeight(), - out.getWidth(), - !aTrans ? a.getWidth() : a.getHeight(), - scaleAB, - a.getData(), - a.getStride(), - b.getData(), - b.getStride(), - scaleT, - out.getData(), - out.getStride()); + BlasGemm::compute( + aTrans, + bTrans, + out.getHeight(), + out.getWidth(), + !aTrans ? a.getWidth() : a.getHeight(), + scaleAB, + a.getData(), + a.getStride(), + b.getData(), + b.getStride(), + scaleT, + out.getData(), + out.getStride()); } /// dense matrix (+)= sparse matrix * dense matrix diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp index c8ba1074a1..c2f17beeb8 100644 --- a/paddle/math/MathFunctions.cpp +++ b/paddle/math/MathFunctions.cpp @@ -84,6 +84,7 @@ LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP) namespace paddle { +#ifndef PADDLE_USE_EIGEN_FOR_BLAS template <> void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, @@ -143,6 +144,7 @@ void gemm(const CBLAS_TRANSPOSE transA, C, ldc); } +#endif template <> int getrf(const CBLAS_ORDER order, @@ -182,6 +184,7 @@ int getri(const CBLAS_ORDER order, return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv); } +#ifndef PADDLE_USE_EIGEN_FOR_BLAS template <> void axpy(const int n, const float alpha, const float* x, float* y) { cblas_saxpy(n, alpha, x, 1, y, 1); @@ -201,6 +204,7 @@ template <> double dotProduct(const int n, const double* x, const double* y) { return cblas_ddot(n, x, 1, y, 1); } +#endif #if defined(PADDLE_USE_MKL) || defined(PADDLE_USE_MKLML) diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h index 637643838f..9297ae78c2 100644 --- a/paddle/math/MathFunctions.h +++ b/paddle/math/MathFunctions.h @@ -40,7 +40,14 @@ extern "C" { #ifndef LAPACK_FOUND extern "C" { +#ifndef PADDLE_USE_EIGEN_FOR_BLAS #include +#else +typedef enum CBLAS_ORDER { + CblasRowMajor = 101, + CblasColMajor = 102 +} CBLAS_ORDER; +#endif int LAPACKE_sgetrf( int matrix_layout, int m, int n, float* a, int lda, int* ipiv); int LAPACKE_dgetrf( @@ -56,6 +63,7 @@ int LAPACKE_dgetri( namespace paddle { +#ifndef PADDLE_USE_EIGEN_FOR_BLAS template void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, @@ -70,6 +78,7 @@ void gemm(const CBLAS_TRANSPOSE transA, const T beta, T* C, const int ldc); +#endif template int getrf(const CBLAS_ORDER Order, @@ -84,10 +93,20 @@ int getri( const CBLAS_ORDER Order, const int N, T* A, const int lda, const int* ipiv); template -void axpy(const int n, const T alpha, const T* x, T* y); +void axpy(const int n, const T alpha, const T* x, T* y) { + /// y = y + alpha * x + for (int i = 0; i < n; i++) { + y[i] = y[i] + alpha * x[i]; + } +} template -T dotProduct(const int n, const T* x, const T* y); +T dotProduct(const int n, const T* x, const T* y) { + T result = static_cast(0); + for (int i = 0; i < n; i++) { + result += x[i] * y[i]; + } +} template void vExp(const int n, const T* a, T* r); diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 27f7d95b75..fbf3accc9a 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -28,6 +28,7 @@ limitations under the License. */ #include "hl_top_k.h" #include "paddle/utils/Logging.h" +#include "paddle/function/GemmFunctor.h" #include "paddle/utils/ThreadLocal.h" #include "SIMDFunctions.h" @@ -2222,24 +2223,29 @@ void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) { CHECK(!isTransposed()) << "Not supported"; size_t a_col, b_col, a_row, b_row; - CBLAS_TRANSPOSE a_trans, b_trans; + // CBLAS_TRANSPOSE a_trans, b_trans; + bool a_trans, b_trans; if (!a->isTransposed()) { a_col = a->getWidth(); a_row = a->getHeight(); - a_trans = CblasNoTrans; + // a_trans = CblasNoTrans; + a_trans = false; } else { a_col = a->getHeight(); a_row = a->getWidth(); - a_trans = CblasTrans; + // a_trans = CblasTrans; + a_trans = true; } if (!b->isTransposed()) { b_col = b->getWidth(); b_row = b->getHeight(); - b_trans = CblasNoTrans; + // b_trans = CblasNoTrans; + b_trans = false; } else { b_col = b->getHeight(); b_row = b->getWidth(); - b_trans = CblasTrans; + // b_trans = CblasTrans; + b_trans = true; } CHECK_EQ(a_col, b_row); @@ -2256,7 +2262,7 @@ void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) { int lda = a->getStride(); int ldb = b->getStride(); int ldc = getStride(); - gemm( + BlasGemm::compute( a_trans, b_trans, M, N, K, scaleAB, A, lda, B, ldb, scaleT, C, ldc); } diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index 593ae28e49..79f5ab12e9 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -2,9 +2,9 @@ set -xe -mkdir -p /paddle/build_android/$ANDROID_ABI -cd /paddle/build_android/$ANDROID_ABI -rm -rf /paddle/install 2>/dev/null || true +rm -rf /paddle/build_android 2>/dev/null || true +mkdir -p /paddle/build_android +cd /paddle/build_android THIRD_PARTY_PATH=/paddle/third_party_android/$ANDROID_ABI @@ -14,19 +14,25 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then -DANDROID_ABI=$ANDROID_ABI \ -DANDROID_ARM_NEON=ON \ -DANDROID_ARM_MODE=ON \ + -DCMAKE_C_COMPILER=$ANDROID_ARM_STANDALONE_TOOLCHAIN/bin/arm-linux-androideabi-clang \ + -DCMAKE_CXX_COMPILER=$ANDROID_ARM_STANDALONE_TOOLCHAIN/bin/arm-linux-androideabi-clang++ \ -DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_CXX_COMPILER=/usr/bin/g++ \ -DCMAKE_INSTALL_PREFIX=/paddle/install \ -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ + -DUSE_EIGEN_FOR_BLAS=ON \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ - /paddle -elif [ $ANDROID_ABI == "arm64-v7a" ]; then + -DWITH_STYLE_CHECK=OFF \ + .. +elif [ $ANDROID_ABI == "arm64-v8a" ]; then cmake -DCMAKE_SYSTEM_NAME=Android \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM64_STANDALONE_TOOLCHAIN \ -DANDROID_ABI=$ANDROID_ABI \ -DANDROID_ARM_MODE=ON \ + -DCMAKE_C_COMPILER=$ANDROID_ARM64_STANDALONE_TOOLCHAIN/bin/aarch64-linux-android-clang \ + -DCMAKE_CXX_COMPILER=$ANDROID_ARM64_STANDALONE_TOOLCHAIN/bin/aarch64-linux-android-clang++ \ -DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_CXX_COMPILER=/usr/bin/g++ \ -DCMAKE_INSTALL_PREFIX=/paddle/install \ @@ -34,7 +40,7 @@ elif [ $ANDROID_ABI == "arm64-v7a" ]; then -DCMAKE_BUILD_TYPE=Release \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ - /paddle + .. elif [ $ANDROID_ABI == "armeabi" ]; then cmake -DCMAKE_SYSTEM_NAME=Android \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM_STANDALONE_TOOLCHAIN \ @@ -47,10 +53,10 @@ elif [ $ANDROID_ABI == "armeabi" ]; then -DCMAKE_BUILD_TYPE=Release \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ - /paddle + .. else echo "Invalid ANDROID_ABI: $ANDROID_ABI" fi -make -j `nproc` -make install -j `nproc` +make VERBOSE=1 +make install From f241773c4f1803631bba968bca1d5621a0d3ced5 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Wed, 23 Aug 2017 19:43:57 +0800 Subject: [PATCH 10/71] Support to use clang for Android cross-compiling. --- Dockerfile.android | 4 +- cmake/cblas.cmake | 4 + cmake/external/warpctc.cmake | 1 + paddle/cuda/include/hl_cpu_gru.cuh | 166 ++++++++++++------------- paddle/function/MulOp.cpp | 37 +++--- paddle/math/MathFunctions.cpp | 4 + paddle/math/MathFunctions.h | 23 +++- paddle/math/Matrix.cpp | 18 ++- paddle/scripts/docker/build_android.sh | 51 ++++++-- 9 files changed, 181 insertions(+), 127 deletions(-) diff --git a/Dockerfile.android b/Dockerfile.android index aa95abb366..6013215d9d 100644 --- a/Dockerfile.android +++ b/Dockerfile.android @@ -47,8 +47,8 @@ RUN mkdir /opt/android-ndk-tmp && \ wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip && \ unzip -q android-ndk-r14b-linux-x86_64.zip && \ mv android-ndk-r14b ${ANDROID_NDK_HOME} && \ - ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-21 --install-dir=${ANDROID_ARM_STANDALONE_TOOLCHAIN} && \ - ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm64 --platform=android-21 --install-dir=${ANDROID_ARM64_STANDALONE_TOOLCHAIN} && \ + ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-23 --install-dir=${ANDROID_ARM_STANDALONE_TOOLCHAIN} && \ + ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm64 --platform=android-23 --install-dir=${ANDROID_ARM64_STANDALONE_TOOLCHAIN} && \ rm -rf /opt/android-ndk-tmp && \ rm -rf ${ANDROID_NDK_HOME} diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 854066fd1d..ab111eccc0 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -13,6 +13,10 @@ # system paths. # +if(USE_EIGEN_FOR_BLAS) + return() +endif(USE_EIGEN_FOR_BLAS) + set(CBLAS_FOUND OFF) ## Find MKLML First. diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 2d7daed9bc..3cc652bed5 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -41,6 +41,7 @@ IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "App ELSE() SET(USE_OMP ON) ENDIF() +SET(USE_OMP OFF FORCE) ExternalProject_Add( extern_warpctc diff --git a/paddle/cuda/include/hl_cpu_gru.cuh b/paddle/cuda/include/hl_cpu_gru.cuh index c0a37ced2a..732799a28b 100644 --- a/paddle/cuda/include/hl_cpu_gru.cuh +++ b/paddle/cuda/include/hl_cpu_gru.cuh @@ -20,11 +20,11 @@ limitations under the License. */ #include "paddle/math/MathFunctions.h" -#ifndef PADDLE_TYPE_DOUBLE -#define CBLAS_GEMM paddle::gemm -#else -#define CBLAS_GEMM paddle::gemm -#endif +// #ifndef PADDLE_TYPE_DOUBLE +// #define CBLAS_GEMM paddle::gemm +// #else +// #define CBLAS_GEMM paddle::gemm +// #endif template void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput, @@ -219,37 +219,37 @@ void hl_cpu_gru_forward(OpResetOutput opResetOutput, hl_activation_mode_t active_node, hl_activation_mode_t active_gate) { if (value.prevOutValue) { - CBLAS_GEMM(CblasNoTrans, - CblasNoTrans, - batchSize, - 2 * frameSize, - frameSize, - 1, - value.prevOutValue, - frameSize, - value.gateWeight, - frameSize * 2, - 1, - value.gateValue, - frameSize * 3); +// CBLAS_GEMM(CblasNoTrans, +// CblasNoTrans, +// batchSize, +// 2 * frameSize, +// frameSize, +// 1, +// value.prevOutValue, +// frameSize, +// value.gateWeight, +// frameSize * 2, +// 1, +// value.gateValue, +// frameSize * 3); } forward_reset_output(opResetOutput, value, frameSize, batchSize, active_gate); if (value.prevOutValue) { - CBLAS_GEMM(CblasNoTrans, - CblasNoTrans, - batchSize, - frameSize, - frameSize, - 1, - value.resetOutputValue, - frameSize, - value.stateWeight, - frameSize, - 1, - value.gateValue + frameSize * 2, - frameSize * 3); +// CBLAS_GEMM(CblasNoTrans, +// CblasNoTrans, +// batchSize, +// frameSize, +// frameSize, +// 1, +// value.resetOutputValue, +// frameSize, +// value.stateWeight, +// frameSize, +// 1, +// value.gateValue + frameSize * 2, +// frameSize * 3); } forward_final_output(opFinalOutput, value, frameSize, batchSize, active_node); @@ -538,34 +538,34 @@ void hl_cpu_gru_backward(OpStateGrad opStateGrad, frameSize, batchSize, active_node); if (value.prevOutValue && grad.prevOutGrad) { - CBLAS_GEMM(CblasNoTrans, - CblasTrans, - batchSize, - frameSize, - frameSize, - 1, - grad.gateGrad + frameSize * 2, - frameSize * 3, - value.stateWeight, - frameSize, - 0, - grad.resetOutputGrad, - frameSize); +// CBLAS_GEMM(CblasNoTrans, +// CblasTrans, +// batchSize, +// frameSize, +// frameSize, +// 1, +// grad.gateGrad + frameSize * 2, +// frameSize * 3, +// value.stateWeight, +// frameSize, +// 0, +// grad.resetOutputGrad, +// frameSize); if (grad.stateWeightGrad) { - CBLAS_GEMM(CblasTrans, - CblasNoTrans, - frameSize, - frameSize, - batchSize, - 1, - value.resetOutputValue, - frameSize, - grad.gateGrad + frameSize * 2, - frameSize * 3, - 1, - grad.stateWeightGrad, - frameSize); +// CBLAS_GEMM(CblasTrans, +// CblasNoTrans, +// frameSize, +// frameSize, +// batchSize, +// 1, +// value.resetOutputValue, +// frameSize, +// grad.gateGrad + frameSize * 2, +// frameSize * 3, +// 1, +// grad.stateWeightGrad, +// frameSize); } } @@ -573,34 +573,34 @@ void hl_cpu_gru_backward(OpStateGrad opStateGrad, frameSize, batchSize, active_gate); if (grad.prevOutGrad && value.prevOutValue) { - CBLAS_GEMM(CblasNoTrans, - CblasTrans, - batchSize, - frameSize, - frameSize * 2, - 1, - grad.gateGrad, - frameSize * 3, - value.gateWeight, - frameSize * 2, - 1, - grad.prevOutGrad, - frameSize); +// CBLAS_GEMM(CblasNoTrans, +// CblasTrans, +// batchSize, +// frameSize, +// frameSize * 2, +// 1, +// grad.gateGrad, +// frameSize * 3, +// value.gateWeight, +// frameSize * 2, +// 1, +// grad.prevOutGrad, +// frameSize); if (grad.gateWeightGrad) { - CBLAS_GEMM(CblasTrans, - CblasNoTrans, - frameSize, - frameSize * 2, - batchSize, - 1, - value.prevOutValue, - frameSize, - grad.gateGrad, - frameSize * 3, - 1, - grad.gateWeightGrad, - frameSize * 2); +// CBLAS_GEMM(CblasTrans, +// CblasNoTrans, +// frameSize, +// frameSize * 2, +// batchSize, +// 1, +// value.prevOutValue, +// frameSize, +// grad.gateGrad, +// frameSize * 3, +// 1, +// grad.gateWeightGrad, +// frameSize * 2); } } } diff --git a/paddle/function/MulOp.cpp b/paddle/function/MulOp.cpp index 91b4b8ed91..25e41edad5 100644 --- a/paddle/function/MulOp.cpp +++ b/paddle/function/MulOp.cpp @@ -13,18 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MulOp.h" -/// todo(tianbing), delete it -#include -#include "paddle/math/MathFunctions.h" +#include "GemmFunctor.h" #include "paddle/math/SIMDFunctions.h" #include "paddle/utils/ThreadLocal.h" -#ifndef PADDLE_TYPE_DOUBLE -#define GEMM paddle::gemm -#else -#define GEMM paddle::gemm -#endif - namespace { inline void vecAddTo(real* a, const real* b, real scaleB, size_t len) { for (unsigned int i = 0; i < len; ++i) { @@ -114,19 +106,20 @@ void MulOp(CpuMatrix& out, real scaleT, bool aTrans, bool bTrans) { - GEMM(aTrans ? CblasTrans : CblasNoTrans, - bTrans ? CblasTrans : CblasNoTrans, - out.getHeight(), - out.getWidth(), - !aTrans ? a.getWidth() : a.getHeight(), - scaleAB, - a.getData(), - a.getStride(), - b.getData(), - b.getStride(), - scaleT, - out.getData(), - out.getStride()); + BlasGemm::compute( + aTrans, + bTrans, + out.getHeight(), + out.getWidth(), + !aTrans ? a.getWidth() : a.getHeight(), + scaleAB, + a.getData(), + a.getStride(), + b.getData(), + b.getStride(), + scaleT, + out.getData(), + out.getStride()); } /// dense matrix (+)= sparse matrix * dense matrix diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp index c8ba1074a1..c2f17beeb8 100644 --- a/paddle/math/MathFunctions.cpp +++ b/paddle/math/MathFunctions.cpp @@ -84,6 +84,7 @@ LAPACK_ROUTINE_EACH(DYNAMIC_LOAD_LAPACK_WRAP) namespace paddle { +#ifndef PADDLE_USE_EIGEN_FOR_BLAS template <> void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, @@ -143,6 +144,7 @@ void gemm(const CBLAS_TRANSPOSE transA, C, ldc); } +#endif template <> int getrf(const CBLAS_ORDER order, @@ -182,6 +184,7 @@ int getri(const CBLAS_ORDER order, return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv); } +#ifndef PADDLE_USE_EIGEN_FOR_BLAS template <> void axpy(const int n, const float alpha, const float* x, float* y) { cblas_saxpy(n, alpha, x, 1, y, 1); @@ -201,6 +204,7 @@ template <> double dotProduct(const int n, const double* x, const double* y) { return cblas_ddot(n, x, 1, y, 1); } +#endif #if defined(PADDLE_USE_MKL) || defined(PADDLE_USE_MKLML) diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h index 637643838f..9297ae78c2 100644 --- a/paddle/math/MathFunctions.h +++ b/paddle/math/MathFunctions.h @@ -40,7 +40,14 @@ extern "C" { #ifndef LAPACK_FOUND extern "C" { +#ifndef PADDLE_USE_EIGEN_FOR_BLAS #include +#else +typedef enum CBLAS_ORDER { + CblasRowMajor = 101, + CblasColMajor = 102 +} CBLAS_ORDER; +#endif int LAPACKE_sgetrf( int matrix_layout, int m, int n, float* a, int lda, int* ipiv); int LAPACKE_dgetrf( @@ -56,6 +63,7 @@ int LAPACKE_dgetri( namespace paddle { +#ifndef PADDLE_USE_EIGEN_FOR_BLAS template void gemm(const CBLAS_TRANSPOSE transA, const CBLAS_TRANSPOSE transB, @@ -70,6 +78,7 @@ void gemm(const CBLAS_TRANSPOSE transA, const T beta, T* C, const int ldc); +#endif template int getrf(const CBLAS_ORDER Order, @@ -84,10 +93,20 @@ int getri( const CBLAS_ORDER Order, const int N, T* A, const int lda, const int* ipiv); template -void axpy(const int n, const T alpha, const T* x, T* y); +void axpy(const int n, const T alpha, const T* x, T* y) { + /// y = y + alpha * x + for (int i = 0; i < n; i++) { + y[i] = y[i] + alpha * x[i]; + } +} template -T dotProduct(const int n, const T* x, const T* y); +T dotProduct(const int n, const T* x, const T* y) { + T result = static_cast(0); + for (int i = 0; i < n; i++) { + result += x[i] * y[i]; + } +} template void vExp(const int n, const T* a, T* r); diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 27f7d95b75..fbf3accc9a 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -28,6 +28,7 @@ limitations under the License. */ #include "hl_top_k.h" #include "paddle/utils/Logging.h" +#include "paddle/function/GemmFunctor.h" #include "paddle/utils/ThreadLocal.h" #include "SIMDFunctions.h" @@ -2222,24 +2223,29 @@ void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) { CHECK(!isTransposed()) << "Not supported"; size_t a_col, b_col, a_row, b_row; - CBLAS_TRANSPOSE a_trans, b_trans; + // CBLAS_TRANSPOSE a_trans, b_trans; + bool a_trans, b_trans; if (!a->isTransposed()) { a_col = a->getWidth(); a_row = a->getHeight(); - a_trans = CblasNoTrans; + // a_trans = CblasNoTrans; + a_trans = false; } else { a_col = a->getHeight(); a_row = a->getWidth(); - a_trans = CblasTrans; + // a_trans = CblasTrans; + a_trans = true; } if (!b->isTransposed()) { b_col = b->getWidth(); b_row = b->getHeight(); - b_trans = CblasNoTrans; + // b_trans = CblasNoTrans; + b_trans = false; } else { b_col = b->getHeight(); b_row = b->getWidth(); - b_trans = CblasTrans; + // b_trans = CblasTrans; + b_trans = true; } CHECK_EQ(a_col, b_row); @@ -2256,7 +2262,7 @@ void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) { int lda = a->getStride(); int ldb = b->getStride(); int ldc = getStride(); - gemm( + BlasGemm::compute( a_trans, b_trans, M, N, K, scaleAB, A, lda, B, ldb, scaleT, C, ldc); } diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index 593ae28e49..a61c7c40e9 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -2,11 +2,31 @@ set -xe -mkdir -p /paddle/build_android/$ANDROID_ABI -cd /paddle/build_android/$ANDROID_ABI -rm -rf /paddle/install 2>/dev/null || true +COMPILER=gcc +USE_EIGEN=ON +if [ $COMPILER == clang ]; then + SUFFIX=_clang + C_COMPILER=clang + CXX_COMPILER=clang++ +else + SUFFIX=_gcc + C_COMPILER=gcc + CXX_COMPILER=g++ +fi +if [ $USE_EIGEN == ON ]; then + SUFFIX=${SUFFIX}_eigen +else + SUFFIX=${SUFFIX}_openblas +fi -THIRD_PARTY_PATH=/paddle/third_party_android/$ANDROID_ABI +BUILD_ROOT=/paddle/build_android$SUFFIX +DEST_ROOT=/paddle/install$SUFFIX + +rm -rf $BUILD_ROOT 2>/dev/null || true +mkdir -p $BUILD_ROOT +cd $BUILD_ROOT + +THIRD_PARTY_PATH=/paddle/third_party_android$SUFFIX/$ANDROID_ABI if [ $ANDROID_ABI == "armeabi-v7a" ]; then cmake -DCMAKE_SYSTEM_NAME=Android \ @@ -14,27 +34,34 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then -DANDROID_ABI=$ANDROID_ABI \ -DANDROID_ARM_NEON=ON \ -DANDROID_ARM_MODE=ON \ + -DCMAKE_C_COMPILER=$ANDROID_ARM_STANDALONE_TOOLCHAIN/bin/arm-linux-androideabi-${C_COMPILER} \ + -DCMAKE_CXX_COMPILER=$ANDROID_ARM_STANDALONE_TOOLCHAIN/bin/arm-linux-androideabi-${CXX_COMPILER} \ -DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_CXX_COMPILER=/usr/bin/g++ \ - -DCMAKE_INSTALL_PREFIX=/paddle/install \ + -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ + -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ - /paddle -elif [ $ANDROID_ABI == "arm64-v7a" ]; then + -DWITH_STYLE_CHECK=OFF \ + .. +elif [ $ANDROID_ABI == "arm64-v8a" ]; then cmake -DCMAKE_SYSTEM_NAME=Android \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM64_STANDALONE_TOOLCHAIN \ -DANDROID_ABI=$ANDROID_ABI \ -DANDROID_ARM_MODE=ON \ + -DCMAKE_C_COMPILER=$ANDROID_ARM64_STANDALONE_TOOLCHAIN/bin/aarch64-linux-android-${C_COMPILER} \ + -DCMAKE_CXX_COMPILER=$ANDROID_ARM64_STANDALONE_TOOLCHAIN/bin/aarch64-linux-android-${CXX_COMPILER} \ -DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_CXX_COMPILER=/usr/bin/g++ \ - -DCMAKE_INSTALL_PREFIX=/paddle/install \ + -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ + -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ - /paddle + .. elif [ $ANDROID_ABI == "armeabi" ]; then cmake -DCMAKE_SYSTEM_NAME=Android \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM_STANDALONE_TOOLCHAIN \ @@ -47,10 +74,10 @@ elif [ $ANDROID_ABI == "armeabi" ]; then -DCMAKE_BUILD_TYPE=Release \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ - /paddle + .. else echo "Invalid ANDROID_ABI: $ANDROID_ABI" fi -make -j `nproc` -make install -j `nproc` +make VERBOSE=1 -j2 +make install -j2 From 6efbe2ff43be576c64962f94f6fcf453ef0dd8a7 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 30 Aug 2017 12:03:49 +0800 Subject: [PATCH 11/71] Merge im2col functor. --- paddle/operators/math/im2col.cc | 215 ++++++++++++++++++++ paddle/operators/math/im2col.cu | 334 ++++++++++++++++++++++++++++++++ paddle/operators/math/im2col.h | 86 ++++++++ 3 files changed, 635 insertions(+) create mode 100644 paddle/operators/math/im2col.cc create mode 100644 paddle/operators/math/im2col.cu create mode 100644 paddle/operators/math/im2col.h diff --git a/paddle/operators/math/im2col.cc b/paddle/operators/math/im2col.cc new file mode 100644 index 0000000000..dafb21b335 --- /dev/null +++ b/paddle/operators/math/im2col.cc @@ -0,0 +1,215 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Im2Col.h" + +namespace paddle { + +/* + * imShape = [inputChannels, inputHeight, inputWidth] + * colShape = + * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] + */ +template +class Im2ColFunctor { + public: + void operator()(const T* imData, const TensorShape& imShape, T* colData, + const TensorShape& colShape, int strideHeight, + int strideWidth, int paddingHeight, int paddingWidth) { + int inputChannels = imShape[0]; + int inputHeight = imShape[1]; + int inputWidth = imShape[2]; + int filterHeight = colShape[1]; + int filterWidth = colShape[2]; + int outputHeight = colShape[3]; + int outputWidth = colShape[4]; + int channelsCol = inputChannels * filterHeight * filterWidth; + + for (int c = 0; c < channelsCol; ++c) { + int wOffset = c % filterWidth; + int hOffset = (c / filterWidth) % filterHeight; + int c_im = c / filterWidth / filterHeight; + for (int h = 0; h < outputHeight; ++h) { + for (int w = 0; w < outputWidth; ++w) { + int imRowIdx = h * strideHeight + hOffset; + int imColIdx = w * strideWidth + wOffset; + if ((imRowIdx - paddingHeight) < 0 || + (imRowIdx - paddingHeight) >= inputHeight || + (imColIdx - paddingWidth) < 0 || + (imColIdx - paddingWidth) >= inputWidth) { + colData[(c * outputHeight + h) * outputWidth + w] = T(0); + } else { + imRowIdx += c_im * inputHeight - paddingHeight; + imColIdx -= paddingWidth; + colData[(c * outputHeight + h) * outputWidth + w] = + imData[imRowIdx * inputWidth + imColIdx]; + } + } + } + } + } +}; + +/* + * imShape = [inputChannels, inputHeight, inputWidth] + * colShape = + * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] + */ +template +class Col2ImFunctor { + public: + void operator()(T* imData, const TensorShape& imShape, const T* colData, + const TensorShape& colShape, int strideHeight, + int strideWidth, int paddingHeight, int paddingWidth) { + int inputChannels = imShape[0]; + int inputHeight = imShape[1]; + int inputWidth = imShape[2]; + int filterHeight = colShape[1]; + int filterWidth = colShape[2]; + int outputHeight = colShape[3]; + int outputWidth = colShape[4]; + int channelsCol = inputChannels * filterHeight * filterWidth; + + for (int c = 0; c < channelsCol; ++c) { + int wOffset = c % filterWidth; + int hOffset = (c / filterWidth) % filterHeight; + int c_im = c / filterWidth / filterHeight; + for (int h = 0; h < outputHeight; ++h) { + for (int w = 0; w < outputWidth; ++w) { + int imRowIdx = h * strideHeight + hOffset; + int imColIdx = w * strideWidth + wOffset; + if ((imRowIdx - paddingHeight) >= 0 && + (imRowIdx - paddingHeight) < inputHeight && + (imColIdx - paddingWidth) >= 0 && + (imColIdx - paddingWidth) < inputWidth) { + imRowIdx += c_im * inputHeight - paddingHeight; + imColIdx -= paddingWidth; + imData[imRowIdx * inputWidth + imColIdx] += + colData[(c * outputHeight + h) * outputWidth + w]; + } + } + } + } + } +}; + +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; + +/* + * imShape = [inputChannels, inputHeight, inputWidth] + * colShape = + * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] + */ +template +class Im2ColFunctor { + public: + void operator()(const T* imData, const TensorShape& imShape, T* colData, + const TensorShape& colShape, int strideHeight, + int strideWidth, int paddingHeight, int paddingWidth) { + int inputChannels = imShape[0]; + int inputHeight = imShape[1]; + int inputWidth = imShape[2]; + int filterHeight = colShape[3]; + int filterWidth = colShape[4]; + int outputHeight = colShape[0]; + int outputWidth = colShape[1]; + for (int outputH = 0; outputH < outputHeight; ++outputH) { + for (int outputW = 0; outputW < outputWidth; ++outputW) { + for (int channel = 0; channel < inputChannels; ++channel) { + for (int filterH = 0; filterH < filterHeight; ++filterH) { + for (int filterW = 0; filterW < filterWidth; ++filterW) { + int imRowOffset = + outputH * strideHeight + filterH - paddingHeight; + int imColOffset = outputW * strideWidth + filterW - paddingWidth; + int colDataOffset = + (((outputH * outputWidth + outputW) * inputChannels + + channel) * + filterHeight + + filterH) * + filterWidth + + filterW; + if (imRowOffset < 0 || imRowOffset >= inputHeight || + imColOffset < 0 || imColOffset >= inputWidth) { + colData[colDataOffset] = float(0); + } else { + int imDataOffset = + (channel * inputHeight + imRowOffset) * inputWidth + + imColOffset; + colData[colDataOffset] = imData[imDataOffset]; + } + } + } + } + } + } + } +}; + +/* + * imShape = [inputChannels, inputHeight, inputWidth] + * colShape = + * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] + */ +template +class Col2ImFunctor { + public: + void operator()(T* imData, const TensorShape& imShape, const T* colData, + const TensorShape& colShape, int strideHeight, + int strideWidth, int paddingHeight, int paddingWidth) { + int inputChannels = imShape[0]; + int inputHeight = imShape[1]; + int inputWidth = imShape[2]; + int filterHeight = colShape[3]; + int filterWidth = colShape[4]; + int outputHeight = colShape[0]; + int outputWidth = colShape[1]; + for (int outputH = 0; outputH < outputHeight; ++outputH) { + for (int outputW = 0; outputW < outputWidth; ++outputW) { + for (int channel = 0; channel < inputChannels; ++channel) { + for (int filterH = 0; filterH < filterHeight; ++filterH) { + for (int filterW = 0; filterW < filterWidth; ++filterW) { + int imRowOffset = + outputH * strideHeight + filterH - paddingHeight; + int imColOffset = outputW * strideWidth + filterW - paddingWidth; + int colDataOffset = + (((outputH * outputWidth + outputW) * inputChannels + + channel) * + filterHeight + + filterH) * + filterWidth + + filterW; + if (imRowOffset >= 0 && imRowOffset < inputHeight && + imColOffset >= 0 && imColOffset < inputWidth) { + int imDataOffset = + (channel * inputHeight + imRowOffset) * inputWidth + + imColOffset; + imData[imDataOffset] += colData[colDataOffset]; + } + } + } + } + } + } + } +}; + +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; + +} // namespace paddle diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu new file mode 100644 index 0000000000..60bcdf8acc --- /dev/null +++ b/paddle/operators/math/im2col.cu @@ -0,0 +1,334 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Im2Col.h" +#include "hl_device_functions.cuh" + +namespace paddle { + +template +__global__ void im2col(const T* data_im, int numOuts, int height, int width, + int blockH, int blockW, int strideH, int strideW, + int paddingH, int paddingW, int height_col, + int width_col, T* data_col) { + int index = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; + if (index < numOuts) { + int w_out = index % width_col; + index /= width_col; + int h_out = index % height_col; + int channel_in = index / height_col; + int channel_out = channel_in * blockH * blockW; + int h_in = h_out * strideH; + int w_in = w_out * strideW; + + data_col += (channel_out * height_col + h_out) * width_col + w_out; + for (int i = 0; i < blockH; ++i) { + for (int j = 0; j < blockW; ++j) { + int rIdx = int(h_in + i); + int cIdx = int(w_in + j); + if ((rIdx - (int)paddingH) >= (int)height || + (rIdx - (int)paddingH) < 0 || + (cIdx - (int)paddingW) >= (int)width || + (cIdx - (int)paddingW) < 0) { + *data_col = 0; + } else { + rIdx = rIdx + channel_in * height - paddingH; + cIdx = cIdx - paddingW; + *data_col = data_im[rIdx * width + cIdx]; + } + data_col += height_col * width_col; + } + } + } +} + +/* + * imShape = [inputChannels, inputHeight, inputWidth] + * colShape = + * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] + */ +template +class Im2ColFunctor { + public: + void operator()(const T* imData, const TensorShape& imShape, T* colData, + const TensorShape& colShape, int strideHeight, + int strideWidth, int paddingHeight, int paddingWidth) { + int inputChannels = imShape[0]; + int inputHeight = imShape[1]; + int inputWidth = imShape[2]; + int filterHeight = colShape[1]; + int filterWidth = colShape[2]; + int outputHeight = colShape[3]; + int outputWidth = colShape[4]; + + int numKernels = inputChannels * outputHeight * outputWidth; + int blocks = (numKernels + 1024 - 1) / 1024; + int blockX = 512; + int blockY = (blocks + 512 - 1) / 512; + dim3 threads(1024, 1); + dim3 grid(blockX, blockY); + im2col<<>>( + imData, numKernels, inputHeight, inputWidth, filterHeight, filterWidth, + strideHeight, strideWidth, paddingHeight, paddingWidth, outputHeight, + outputWidth, colData); + CHECK_SYNC("Im2ColFunctor GPU failed"); + } +}; + +template +__global__ void col2im(size_t n, const T* data_col, size_t height, size_t width, + size_t channels, size_t blockH, size_t blockW, + size_t strideH, size_t strideW, size_t paddingH, + size_t paddingW, size_t height_col, size_t width_col, + T* data_im) { + size_t index = + (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; + if (index < n) { + T val = 0; + int w = int(index % width); + int h = int((index / width) % height); + int c = int(index / (width * height)); + if ((w - (int)paddingW) >= 0 && + (w - (int)paddingW) < (width - 2 * paddingW) && + (h - (int)paddingH) >= 0 && (h - paddingH) < (height - 2 * paddingH)) { + // compute the start and end of the output + int w_col_start = + (w < (int)blockW) ? 0 : (w - int(blockW)) / (int)strideW + 1; + int w_col_end = min((int)(w / (int)strideW + 1), (int)(width_col)); + int h_col_start = + (h < (int)blockH) ? 0 : (h - (int)blockH) / (int)strideH + 1; + int h_col_end = min(int(h / strideH + 1), int(height_col)); + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { + // the col location: [c * width * height + h_out, w_out] + int c_col = int(c * blockH * blockW) + + (h - h_col * (int)strideH) * (int)blockW + + (w - w_col * (int)strideW); + val += data_col[(c_col * height_col + h_col) * width_col + w_col]; + } + } + h -= paddingH; + w -= paddingW; + data_im[c * ((width - 2 * paddingW) * (height - 2 * paddingH)) + + h * (width - 2 * paddingW) + w] += val; + } + } +} + +/* + * imShape = [inputChannels, inputHeight, inputWidth] + * colShape = + * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] + */ +template +class Col2ImFunctor { + public: + void operator()(T* imData, const TensorShape& imShape, const T* colData, + const TensorShape& colShape, int strideHeight, + int strideWidth, int paddingHeight, int paddingWidth) { + int inputChannels = imShape[0]; + int inputHeight = imShape[1]; + int inputWidth = imShape[2]; + int filterHeight = colShape[1]; + int filterWidth = colShape[2]; + int outputHeight = colShape[3]; + int outputWidth = colShape[4]; + + size_t numKernels = inputChannels * (inputHeight + 2 * paddingHeight) * + (inputWidth + 2 * paddingWidth); + + size_t blocks = (numKernels + 1024 - 1) / 1024; + size_t blockX = 512; + size_t blockY = (blocks + 512 - 1) / 512; + dim3 threads(1024, 1); + dim3 grid(blockX, blockY); + + // To avoid involving atomic operations, we will launch one kernel per + // bottom dimension, and then in the kernel add up the top dimensions. + col2im<<>>( + numKernels, colData, inputHeight + 2 * paddingHeight, + inputWidth + 2 * paddingWidth, inputChannels, filterHeight, filterWidth, + strideHeight, strideWidth, paddingHeight, paddingWidth, outputHeight, + outputWidth, imData); + CHECK_SYNC("Col2ImFunctor GPU failed"); + } +}; + +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; + +template +__global__ void im2colOCF(const T* imData, T* colData, int inputChannels, + int inputHeight, int inputWidth, int filterHeight, + int filterWidth, int strideHeight, int strideWidth, + int paddingHeight, int paddingWidth, int outputHeight, + int outputWidth) { + int swId = blockIdx.x; + int shId = blockIdx.y; + for (int channelId = threadIdx.z; channelId < inputChannels; + channelId += blockDim.z) { + for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { + for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { + int widthOffset = idx + swId * strideWidth - paddingWidth; + int heightOffset = idy + shId * strideHeight - paddingHeight; + int imOffset = widthOffset + heightOffset * inputWidth + + channelId * inputHeight * inputWidth; + + int colOffset = idx + idy * filterWidth + + channelId * filterHeight * filterWidth + + (shId * outputWidth + swId) * + (inputChannels * filterHeight * filterWidth); + + if (heightOffset >= inputHeight || heightOffset < 0 || + widthOffset >= inputWidth || widthOffset < 0) { + colData[colOffset] = T(0); + } else { + colData[colOffset] = imData[imOffset]; + } + } + } + } +} + +/* + * imShape = [inputChannels, inputHeight, inputWidth] + * colShape = + * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] + */ +template +class Im2ColFunctor { + public: + void operator()(const T* imData, const TensorShape& imShape, T* colData, + const TensorShape& colShape, int strideHeight, + int strideWidth, int paddingHeight, int paddingWidth) { + int inputChannels = imShape[0]; + int inputHeight = imShape[1]; + int inputWidth = imShape[2]; + int filterHeight = colShape[3]; + int filterWidth = colShape[4]; + int outputHeight = colShape[0]; + int outputWidth = colShape[1]; + + int blockDimX = 0; + int blockDimY = 0; + if (filterHeight <= 4 && filterWidth <= 4) { + blockDimX = 4; + blockDimY = 4; + } else if (filterHeight <= 8 && filterWidth <= 8) { + blockDimX = 8; + blockDimY = 8; + } else if (filterHeight <= 16 && filterWidth <= 16) { + blockDimX = 16; + blockDimY = 16; + } else { + blockDimX = 32; + blockDimY = 32; + } + + int blockDimZ = 1024 / blockDimX / blockDimY; + dim3 threads(blockDimX, blockDimY, std::min(blockDimZ, inputChannels)); + dim3 grid(outputWidth, outputHeight); + im2colOCF<<>>( + imData, colData, inputChannels, inputHeight, inputWidth, filterHeight, + filterWidth, strideHeight, strideWidth, paddingHeight, paddingWidth, + outputHeight, outputWidth); + CHECK_SYNC("Im2ColFunctor GPU failed"); + } +}; + +template +__global__ void col2imOCF(T* imData, const T* colData, int inputChannels, + int inputHeight, int inputWidth, int filterHeight, + int filterWidth, int strideHeight, int strideWidth, + int paddingHeight, int paddingWidth, int outputHeight, + int outputWidth) { + int swId = blockIdx.x; + int shId = blockIdx.y; + for (int channelId = threadIdx.z; channelId < inputChannels; + channelId += blockDim.z) { + for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { + for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { + int widthOffset = idx + swId * strideWidth - paddingWidth; + int heightOffset = idy + shId * strideHeight - paddingHeight; + int imOffset = widthOffset + heightOffset * inputWidth + + channelId * inputHeight * inputWidth; + + int colOffset = idx + idy * filterWidth + + channelId * filterHeight * filterWidth + + (shId * outputWidth + swId) * + (inputChannels * filterHeight * filterWidth); + + if (heightOffset >= 0 && heightOffset < inputHeight && + widthOffset >= 0 && widthOffset < inputWidth) { + paddle::paddleAtomicAdd(imData + imOffset, colData[colOffset]); + } + } + } + } +} + +/* + * imShape = [inputChannels, inputHeight, inputWidth] + * colShape = + * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] + */ +template +class Col2ImFunctor { + public: + void operator()(T* imData, const TensorShape& imShape, const T* colData, + const TensorShape& colShape, int strideHeight, + int strideWidth, int paddingHeight, int paddingWidth) { + int inputChannels = imShape[0]; + int inputHeight = imShape[1]; + int inputWidth = imShape[2]; + int filterHeight = colShape[3]; + int filterWidth = colShape[4]; + int outputHeight = colShape[0]; + int outputWidth = colShape[1]; + + int blockDimX = 0; + int blockDimY = 0; + if (filterHeight <= 4 && filterWidth <= 4) { + blockDimX = 4; + blockDimY = 4; + } else if (filterHeight <= 8 && filterWidth <= 8) { + blockDimX = 8; + blockDimY = 8; + } else if (filterHeight <= 16 && filterWidth <= 16) { + blockDimX = 16; + blockDimY = 16; + } else { + blockDimX = 32; + blockDimY = 32; + } + + int blockDimZ = 1024 / blockDimX / blockDimY; + dim3 threads(blockDimX, blockDimY, std::min(blockDimZ, inputChannels)); + dim3 grid(outputWidth, outputHeight); + col2imOCF<<>>( + imData, colData, inputChannels, inputHeight, inputWidth, filterHeight, + filterWidth, strideHeight, strideWidth, paddingHeight, paddingWidth, + outputHeight, outputWidth); + CHECK_SYNC("Col2ImFunctor GPU failed"); + } +}; + +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; + +} // namespace paddle diff --git a/paddle/operators/math/im2col.h b/paddle/operators/math/im2col.h new file mode 100644 index 0000000000..4568ca2fd1 --- /dev/null +++ b/paddle/operators/math/im2col.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "TensorShape.h" +#include "TensorType.h" + +namespace paddle { + +/* The storage format of the coldata in the Im2ColFunctor and Col2ImFunctor. */ +enum ColFormat { kCFO = 0, kOCF = 1 }; + +/* + * \brief Converts the image data of three dimensions(CHW) into a colData of + * five dimensions in the Im2ColFunctor calculation, + * And in the Col2ImFunctor calculation, it is reversed. + * + * \param imData Image data. + * \param imShape The shape of imData, + * [inputChannels, inputHeight, inputWidth]. + * \param colData Column data. + * \param colShape The shape of colData. + * + * If the template argument Format is kCFO, the shape of colData is: + * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] + * So, it is easy to reshape into a convolution matrix for convolution + * calculation based on matrix multiplication. + * The shape of convolution matrix is [height, width], where the height is equal + * inputChannels * filterHeight * filterWidth, and the width is equal + * outputHeight * outputWidth. + * + * Reshape: + * shape of colData shape of convolution matrix + * [inputChannels, + * filterHeight, + * filterWidth, ======> [height, width] + * outputHeight, + * outputWidth] + * + * If the template argument Format is kOCF, the shape of colData is: + * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] + * So, it is easy to reshape into a sequence matrix for rnn calculation. + * The shape of sequence matrix is [seqLength, stepSize], where the seqLength + * is equal outputHeight * outputWidth, and the stepSize is equal + * inputChannels * filterHeight * filterWidth. + * + * Reshape: + * shape of colData shape of sequence matrix + * [outputHeight, + * outputWidth, + * inputChannels, ======> [seqLength, stepSize] + * filterHeight, + * filterWidth] + * + * \note The caller needs to ensure that imShape.inputChannels is equal to + * colShape.inputChannels. + */ +template +class Im2ColFunctor { + public: + void operator()(const T* imData, const TensorShape& imShape, T* colData, + const TensorShape& colShape, int strideHeight, + int strideWidth, int paddingHeight, int paddingWidth); +}; + +template +class Col2ImFunctor { + public: + void operator()(T* imData, const TensorShape& imShape, const T* colData, + const TensorShape& colShape, int strideHeight, + int strideWidth, int paddingHeight, int paddingWidth); +}; + +} // namespace paddle From f7be9cb97aa4b90c0ccd6f954a71d3caada4dac7 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 30 Aug 2017 13:55:58 +0800 Subject: [PATCH 12/71] Refine the cpu code. --- paddle/operators/math/CMakeLists.txt | 4 +- paddle/operators/math/im2col.cc | 319 +++++++++++++++------------ paddle/operators/math/im2col.h | 20 +- 3 files changed, 186 insertions(+), 157 deletions(-) diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index ed51d416ed..f31281ebac 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -1,8 +1,8 @@ if(WITH_GPU) - nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context) + nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc DEPS cblas device_context) else() - cc_library(math_function SRCS math_function.cc DEPS cblas device_context) + cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context) endif() nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/im2col.cc b/paddle/operators/math/im2col.cc index dafb21b335..8124e322cb 100644 --- a/paddle/operators/math/im2col.cc +++ b/paddle/operators/math/im2col.cc @@ -12,48 +12,54 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "Im2Col.h" +#include "paddle/operators/math/im2col.h" namespace paddle { /* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] + * im = [input_channels, input_height, input_width] + * col = + * [input_channels, filter_height, filter_width, output_height, output_width] */ template -class Im2ColFunctor { +class Im2ColFunctor { public: - void operator()(const T* imData, const TensorShape& imShape, T* colData, - const TensorShape& colShape, int strideHeight, - int strideWidth, int paddingHeight, int paddingWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[1]; - int filterWidth = colShape[2]; - int outputHeight = colShape[3]; - int outputWidth = colShape[4]; - int channelsCol = inputChannels * filterHeight * filterWidth; - - for (int c = 0; c < channelsCol; ++c) { - int wOffset = c % filterWidth; - int hOffset = (c / filterWidth) % filterHeight; - int c_im = c / filterWidth / filterHeight; - for (int h = 0; h < outputHeight; ++h) { - for (int w = 0; w < outputWidth; ++w) { - int imRowIdx = h * strideHeight + hOffset; - int imColIdx = w * strideWidth + wOffset; - if ((imRowIdx - paddingHeight) < 0 || - (imRowIdx - paddingHeight) >= inputHeight || - (imColIdx - paddingWidth) < 0 || - (imColIdx - paddingWidth) >= inputWidth) { - colData[(c * outputHeight + h) * outputWidth + w] = T(0); + void operator()(const framework::Tensor& im, framework::Tensor& col, + int stride_height, int stride_width, int padding_height, + int padding_width) { + PADDLE_ENFORCE(im.dims().size() == 3); + PADDLE_ENFORCE(col.dims().size() == 5); + + int input_channels = im.dims()[0]; + int input_height = im.dims()[1]; + int input_width = im.dims()[2]; + int filter_height = col.dims()[1]; + int filter_width = col.dims()[2]; + int output_height = col.dims()[3]; + int output_width = col.dims()[4]; + int channels_col = input_channels * filter_height * filter_width; + + const T* im_data = im.data(); + T* col_data = col.data(); + + for (int c = 0; c < channels_col; ++c) { + int w_offset = c % filter_width; + int h_offset = (c / filter_width) % filter_height; + int c_im = c / filter_width / filter_height; + for (int h = 0; h < output_height; ++h) { + for (int w = 0; w < output_width; ++w) { + int im_row_idx = h * stride_height + h_offset; + int im_col_idx = w * stride_width + w_offset; + if ((im_row_idx - padding_height) < 0 || + (im_row_idx - padding_height) >= input_height || + (im_col_idx - padding_width) < 0 || + (im_col_idx - padding_width) >= input_width) { + col_data[(c * output_height + h) * output_width + w] = T(0); } else { - imRowIdx += c_im * inputHeight - paddingHeight; - imColIdx -= paddingWidth; - colData[(c * outputHeight + h) * outputWidth + w] = - imData[imRowIdx * inputWidth + imColIdx]; + im_row_idx += c_im * input_height - padding_height; + im_col_idx -= padding_width; + col_data[(c * output_height + h) * output_width + w] = + im_data[im_row_idx * input_width + im_col_idx]; } } } @@ -62,41 +68,46 @@ class Im2ColFunctor { }; /* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] + * im = [input_channels, input_height, input_width] + * col = + * [input_channels, filter_height, filter_width, output_height, output_width] */ template -class Col2ImFunctor { +class Col2ImFunctor { public: - void operator()(T* imData, const TensorShape& imShape, const T* colData, - const TensorShape& colShape, int strideHeight, - int strideWidth, int paddingHeight, int paddingWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[1]; - int filterWidth = colShape[2]; - int outputHeight = colShape[3]; - int outputWidth = colShape[4]; - int channelsCol = inputChannels * filterHeight * filterWidth; - - for (int c = 0; c < channelsCol; ++c) { - int wOffset = c % filterWidth; - int hOffset = (c / filterWidth) % filterHeight; - int c_im = c / filterWidth / filterHeight; - for (int h = 0; h < outputHeight; ++h) { - for (int w = 0; w < outputWidth; ++w) { - int imRowIdx = h * strideHeight + hOffset; - int imColIdx = w * strideWidth + wOffset; - if ((imRowIdx - paddingHeight) >= 0 && - (imRowIdx - paddingHeight) < inputHeight && - (imColIdx - paddingWidth) >= 0 && - (imColIdx - paddingWidth) < inputWidth) { - imRowIdx += c_im * inputHeight - paddingHeight; - imColIdx -= paddingWidth; - imData[imRowIdx * inputWidth + imColIdx] += - colData[(c * outputHeight + h) * outputWidth + w]; + void operator()(framework::Tensor& im, const framework::Tensor& col, + int stride_height, int stride_width, int padding_height, + int padding_width) { + PADDLE_ENFORCE(im.dims().size() == 3); + PADDLE_ENFORCE(col.dims().size() == 5); + int input_channels = im.dims()[0]; + int input_height = im.dims()[1]; + int input_width = im.dims()[2]; + int filter_height = col.dims()[1]; + int filter_width = col.dims()[2]; + int output_height = col.dims()[3]; + int output_width = col.dims()[4]; + int channels_col = input_channels * filter_height * filter_width; + + T* im_data = im.data(); + const T* col_data = col.data(); + + for (int c = 0; c < channels_col; ++c) { + int w_offset = c % filter_width; + int h_offset = (c / filter_width) % filter_height; + int c_im = c / filter_width / filter_height; + for (int h = 0; h < output_height; ++h) { + for (int w = 0; w < output_width; ++w) { + int im_row_idx = h * stride_height + h_offset; + int im_col_idx = w * stride_width + w_offset; + if ((im_row_idx - padding_height) >= 0 && + (im_row_idx - padding_height) < input_height && + (im_col_idx - padding_width) >= 0 && + (im_col_idx - padding_width) < input_width) { + im_row_idx += c_im * input_height - padding_height; + im_col_idx -= padding_width; + im_data[im_row_idx * input_width + im_col_idx] += + col_data[(c * output_height + h) * output_width + w]; } } } @@ -104,52 +115,61 @@ class Col2ImFunctor { } }; -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; /* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] + * im = [input_channels, input_height, input_width] + * col = + * [output_height, output_width, input_channels, filter_height, filter_width] */ template -class Im2ColFunctor { +class Im2ColFunctor { public: - void operator()(const T* imData, const TensorShape& imShape, T* colData, - const TensorShape& colShape, int strideHeight, - int strideWidth, int paddingHeight, int paddingWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[3]; - int filterWidth = colShape[4]; - int outputHeight = colShape[0]; - int outputWidth = colShape[1]; - for (int outputH = 0; outputH < outputHeight; ++outputH) { - for (int outputW = 0; outputW < outputWidth; ++outputW) { - for (int channel = 0; channel < inputChannels; ++channel) { - for (int filterH = 0; filterH < filterHeight; ++filterH) { - for (int filterW = 0; filterW < filterWidth; ++filterW) { - int imRowOffset = - outputH * strideHeight + filterH - paddingHeight; - int imColOffset = outputW * strideWidth + filterW - paddingWidth; - int colDataOffset = - (((outputH * outputWidth + outputW) * inputChannels + - channel) * - filterHeight + - filterH) * - filterWidth + - filterW; - if (imRowOffset < 0 || imRowOffset >= inputHeight || - imColOffset < 0 || imColOffset >= inputWidth) { - colData[colDataOffset] = float(0); + void operator()(const framework::Tensor& im, framework::Tensor& col, + int stride_height, int stride_width, int padding_height, + int padding_width) { + PADDLE_ENFORCE(im.dims().size() == 3); + PADDLE_ENFORCE(col.dims().size() == 5); + int input_channels = im.dims()[0]; + int input_height = im.dims()[1]; + int input_width = im.dims()[2]; + int filter_height = col.dims()[3]; + int filter_width = col.dims()[4]; + int output_height = col.dims()[0]; + int output_width = col.dims()[1]; + + const T* im_data = im.data(); + T* col_data = col.data(); + + for (int col_row_idx = 0; col_row_idx < output_height; ++col_row_idx) { + for (int col_col_idx = 0; col_col_idx < output_width; ++col_col_idx) { + for (int channel = 0; channel < input_channels; ++channel) { + for (int filter_row_idx = 0; filter_row_idx < filter_height; + ++filter_row_idx) { + for (int filter_col_idx = 0; filter_col_idx < filter_width; + ++filter_col_idx) { + int im_row_offset = + col_row_idx * stride_height + filter_row_idx - padding_height; + int im_col_offset = + col_col_idx * stride_width + filter_col_idx - padding_width; + int col_offset = (((col_row_idx * output_width + col_col_idx) * + input_channels + + channel) * + filter_height + + filter_row_idx) * + filter_width + + filter_col_idx; + if (im_row_offset < 0 || im_row_offset >= input_height || + im_col_offset < 0 || im_col_offset >= input_width) { + col_data[col_offset] = T(0); } else { - int imDataOffset = - (channel * inputHeight + imRowOffset) * inputWidth + - imColOffset; - colData[colDataOffset] = imData[imDataOffset]; + int im_offset = + (channel * input_height + im_row_offset) * input_width + + im_col_offset; + col_data[col_offset] = im_data[im_offset]; } } } @@ -160,44 +180,53 @@ class Im2ColFunctor { }; /* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] + * im = [input_channels, input_height, input_width] + * col = + * [output_height, output_width, input_channels, filter_height, filter_width] */ template -class Col2ImFunctor { +class Col2ImFunctor { public: - void operator()(T* imData, const TensorShape& imShape, const T* colData, - const TensorShape& colShape, int strideHeight, - int strideWidth, int paddingHeight, int paddingWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[3]; - int filterWidth = colShape[4]; - int outputHeight = colShape[0]; - int outputWidth = colShape[1]; - for (int outputH = 0; outputH < outputHeight; ++outputH) { - for (int outputW = 0; outputW < outputWidth; ++outputW) { - for (int channel = 0; channel < inputChannels; ++channel) { - for (int filterH = 0; filterH < filterHeight; ++filterH) { - for (int filterW = 0; filterW < filterWidth; ++filterW) { - int imRowOffset = - outputH * strideHeight + filterH - paddingHeight; - int imColOffset = outputW * strideWidth + filterW - paddingWidth; - int colDataOffset = - (((outputH * outputWidth + outputW) * inputChannels + - channel) * - filterHeight + - filterH) * - filterWidth + - filterW; - if (imRowOffset >= 0 && imRowOffset < inputHeight && - imColOffset >= 0 && imColOffset < inputWidth) { - int imDataOffset = - (channel * inputHeight + imRowOffset) * inputWidth + - imColOffset; - imData[imDataOffset] += colData[colDataOffset]; + void operator()(framework::Tensor& im, const framework::Tensor& col, + int stride_height, int stride_width, int padding_height, + int padding_width) { + PADDLE_ENFORCE(im.dims().size() == 3); + PADDLE_ENFORCE(col.dims().size() == 5); + int input_channels = im.dims()[0]; + int input_height = im.dims()[1]; + int input_width = im.dims()[2]; + int filter_height = col.dims()[3]; + int filter_width = col.dims()[4]; + int output_height = col.dims()[0]; + int output_width = col.dims()[1]; + + T* im_data = im.data(); + const T* col_data = col.data(); + + for (int col_row_idx = 0; col_row_idx < output_height; ++col_row_idx) { + for (int col_col_idx = 0; col_col_idx < output_width; ++col_col_idx) { + for (int channel = 0; channel < input_channels; ++channel) { + for (int filter_row_idx = 0; filter_row_idx < filter_height; + ++filter_row_idx) { + for (int filter_col_idx = 0; filter_col_idx < filter_width; + ++filter_col_idx) { + int im_row_offset = + col_row_idx * stride_height + filter_row_idx - padding_height; + int im_col_offset = + col_col_idx * stride_width + filter_col_idx - padding_width; + int col_offset = (((col_row_idx * output_width + col_col_idx) * + input_channels + + channel) * + filter_height + + filter_row_idx) * + filter_width + + filter_col_idx; + if (im_row_offset >= 0 && im_row_offset < input_height && + im_col_offset >= 0 && im_col_offset < input_width) { + int im_offset = + (channel * input_height + im_row_offset) * input_width + + im_col_offset; + im_data[im_offset] += col_data[col_offset]; } } } @@ -207,9 +236,9 @@ class Col2ImFunctor { } }; -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; } // namespace paddle diff --git a/paddle/operators/math/im2col.h b/paddle/operators/math/im2col.h index 4568ca2fd1..f2f982b687 100644 --- a/paddle/operators/math/im2col.h +++ b/paddle/operators/math/im2col.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once -#include "TensorShape.h" -#include "TensorType.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" namespace paddle { @@ -67,20 +67,20 @@ enum ColFormat { kCFO = 0, kOCF = 1 }; * \note The caller needs to ensure that imShape.inputChannels is equal to * colShape.inputChannels. */ -template +template class Im2ColFunctor { public: - void operator()(const T* imData, const TensorShape& imShape, T* colData, - const TensorShape& colShape, int strideHeight, - int strideWidth, int paddingHeight, int paddingWidth); + void operator()(const framework::Tensor& im, framework::Tensor& col, + int stride_height, int stride_width, int padding_height, + int padding_width); }; -template +template class Col2ImFunctor { public: - void operator()(T* imData, const TensorShape& imShape, const T* colData, - const TensorShape& colShape, int strideHeight, - int strideWidth, int paddingHeight, int paddingWidth); + void operator()(framework::Tensor& im, const framework::Tensor& col, + int stride_height, int stride_width, int padding_height, + int padding_width); }; } // namespace paddle From e967645c15a1264855bac7cbd88dd6a9ed5301ad Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 30 Aug 2017 14:31:28 +0800 Subject: [PATCH 13/71] Refine the gpu code. --- paddle/operators/math/CMakeLists.txt | 3 +- paddle/operators/math/im2col.cu | 452 ++++++++++++++------------- 2 files changed, 236 insertions(+), 219 deletions(-) diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index f31281ebac..7a41e294a8 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -1,6 +1,7 @@ if(WITH_GPU) - nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc DEPS cblas device_context) + nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc + im2col.cu DEPS cblas device_context) else() cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context) endif() diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu index 60bcdf8acc..875989af58 100644 --- a/paddle/operators/math/im2col.cu +++ b/paddle/operators/math/im2col.cu @@ -12,86 +12,89 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "Im2Col.h" -#include "hl_device_functions.cuh" +#include "paddle/operators/math/im2col.h" +#include "paddle/platform/cuda_helper.h" namespace paddle { template -__global__ void im2col(const T* data_im, int numOuts, int height, int width, - int blockH, int blockW, int strideH, int strideW, - int paddingH, int paddingW, int height_col, - int width_col, T* data_col) { +__global__ void im2col(const T* data_im, int num_outs, int height, int width, + int filter_height, int filter_width, int stride_height, + int stride_width, int padding_height, int padding_width, + int output_height, int output_width, T* data_col) { int index = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; - if (index < numOuts) { - int w_out = index % width_col; - index /= width_col; - int h_out = index % height_col; - int channel_in = index / height_col; - int channel_out = channel_in * blockH * blockW; - int h_in = h_out * strideH; - int w_in = w_out * strideW; + if (index < num_outs) { + int w_out = index % output_width; + index /= output_width; + int h_out = index % output_height; + int channel_in = index / output_height; + int channel_out = channel_in * filter_height * filter_width; + int h_in = h_out * stride_height; + int w_in = w_out * stride_width; - data_col += (channel_out * height_col + h_out) * width_col + w_out; - for (int i = 0; i < blockH; ++i) { - for (int j = 0; j < blockW; ++j) { + data_col += (channel_out * output_height + h_out) * output_width + w_out; + for (int i = 0; i < filter_height; ++i) { + for (int j = 0; j < filter_width; ++j) { int rIdx = int(h_in + i); int cIdx = int(w_in + j); - if ((rIdx - (int)paddingH) >= (int)height || - (rIdx - (int)paddingH) < 0 || - (cIdx - (int)paddingW) >= (int)width || - (cIdx - (int)paddingW) < 0) { + if ((rIdx - (int)padding_height) >= (int)height || + (rIdx - (int)padding_height) < 0 || + (cIdx - (int)padding_width) >= (int)width || + (cIdx - (int)padding_width) < 0) { *data_col = 0; } else { - rIdx = rIdx + channel_in * height - paddingH; - cIdx = cIdx - paddingW; + rIdx = rIdx + channel_in * height - padding_height; + cIdx = cIdx - padding_width; *data_col = data_im[rIdx * width + cIdx]; } - data_col += height_col * width_col; + data_col += output_height * output_width; } } } } /* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] + * im = [input_channels, input_height, input_width] + * col = + * [input_channels, filter_height, filter_width, output_height, output_width] */ template -class Im2ColFunctor { +class Im2ColFunctor { public: - void operator()(const T* imData, const TensorShape& imShape, T* colData, - const TensorShape& colShape, int strideHeight, - int strideWidth, int paddingHeight, int paddingWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[1]; - int filterWidth = colShape[2]; - int outputHeight = colShape[3]; - int outputWidth = colShape[4]; + void operator()(const framework::Tensor& im, framework::Tensor& col, + int stride_height, int stride_width, int padding_height, + int padding_width) { + PADDLE_ENFORCE(im.dims().size() == 3); + PADDLE_ENFORCE(col.dims().size() == 5); - int numKernels = inputChannels * outputHeight * outputWidth; - int blocks = (numKernels + 1024 - 1) / 1024; - int blockX = 512; - int blockY = (blocks + 512 - 1) / 512; + int input_channels = im.dims()[0]; + int input_height = im.dims()[1]; + int input_width = im.dims()[2]; + int filter_height = col.dims()[1]; + int filter_width = col.dims()[2]; + int output_height = col.dims()[3]; + int output_width = col.dims()[4]; + + int num_outputs = input_channels * output_height * output_width; + int blocks = (num_outputs + 1024 - 1) / 1024; + int block_x = 512; + int block_y = (blocks + 512 - 1) / 512; dim3 threads(1024, 1); - dim3 grid(blockX, blockY); - im2col<<>>( - imData, numKernels, inputHeight, inputWidth, filterHeight, filterWidth, - strideHeight, strideWidth, paddingHeight, paddingWidth, outputHeight, - outputWidth, colData); - CHECK_SYNC("Im2ColFunctor GPU failed"); + dim3 grid(block_x, block_y); + im2col<<>>( + im.data(), num_outputs, input_height, input_width, filter_height, + filter_width, stride_height, stride_width, padding_height, + padding_width, output_height, output_width, col.data()); } }; template __global__ void col2im(size_t n, const T* data_col, size_t height, size_t width, - size_t channels, size_t blockH, size_t blockW, - size_t strideH, size_t strideW, size_t paddingH, - size_t paddingW, size_t height_col, size_t width_col, - T* data_im) { + size_t channels, size_t filter_height, + size_t filter_width, size_t stride_height, + size_t stride_width, size_t padding_height, + size_t padding_width, size_t output_height, + size_t output_width, T* data_im) { size_t index = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; if (index < n) { @@ -99,104 +102,112 @@ __global__ void col2im(size_t n, const T* data_col, size_t height, size_t width, int w = int(index % width); int h = int((index / width) % height); int c = int(index / (width * height)); - if ((w - (int)paddingW) >= 0 && - (w - (int)paddingW) < (width - 2 * paddingW) && - (h - (int)paddingH) >= 0 && (h - paddingH) < (height - 2 * paddingH)) { + if ((w - (int)padding_width) >= 0 && + (w - (int)padding_width) < (width - 2 * padding_width) && + (h - (int)padding_height) >= 0 && + (h - padding_height) < (height - 2 * padding_height)) { // compute the start and end of the output - int w_col_start = - (w < (int)blockW) ? 0 : (w - int(blockW)) / (int)strideW + 1; - int w_col_end = min((int)(w / (int)strideW + 1), (int)(width_col)); - int h_col_start = - (h < (int)blockH) ? 0 : (h - (int)blockH) / (int)strideH + 1; - int h_col_end = min(int(h / strideH + 1), int(height_col)); + int w_col_start = (w < (int)filter_width) + ? 0 + : (w - int(filter_width)) / (int)stride_width + 1; + int w_col_end = + min((int)(w / (int)stride_width + 1), (int)(output_width)); + int h_col_start = (h < (int)filter_height) + ? 0 + : (h - (int)filter_height) / (int)stride_height + 1; + int h_col_end = min(int(h / stride_height + 1), int(output_height)); for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { // the col location: [c * width * height + h_out, w_out] - int c_col = int(c * blockH * blockW) + - (h - h_col * (int)strideH) * (int)blockW + - (w - w_col * (int)strideW); - val += data_col[(c_col * height_col + h_col) * width_col + w_col]; + int c_col = int(c * filter_height * filter_width) + + (h - h_col * (int)stride_height) * (int)filter_width + + (w - w_col * (int)stride_width); + val += + data_col[(c_col * output_height + h_col) * output_width + w_col]; } } - h -= paddingH; - w -= paddingW; - data_im[c * ((width - 2 * paddingW) * (height - 2 * paddingH)) + - h * (width - 2 * paddingW) + w] += val; + h -= padding_height; + w -= padding_width; + data_im[c * ((width - 2 * padding_width) * + (height - 2 * padding_height)) + + h * (width - 2 * padding_width) + w] += val; } } } /* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] + * im = [input_channels, input_height, input_width] + * col = + * [input_channels, filter_height, filter_width, output_height, output_width] */ template -class Col2ImFunctor { +class Col2ImFunctor { public: - void operator()(T* imData, const TensorShape& imShape, const T* colData, - const TensorShape& colShape, int strideHeight, - int strideWidth, int paddingHeight, int paddingWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[1]; - int filterWidth = colShape[2]; - int outputHeight = colShape[3]; - int outputWidth = colShape[4]; + void operator()(framework::Tensor& im, const framework::Tensor& col, + int stride_height, int stride_width, int padding_height, + int padding_width) { + PADDLE_ENFORCE(im.dims().size() == 3); + PADDLE_ENFORCE(col.dims().size() == 5); + + int input_channels = im.dims()[0]; + int input_height = im.dims()[1]; + int input_width = im.dims()[2]; + int filter_height = col.dims()[1]; + int filter_width = col.dims()[2]; + int output_height = col.dims()[3]; + int output_width = col.dims()[4]; - size_t numKernels = inputChannels * (inputHeight + 2 * paddingHeight) * - (inputWidth + 2 * paddingWidth); + size_t num_kernels = input_channels * (input_height + 2 * padding_height) * + (input_width + 2 * padding_width); - size_t blocks = (numKernels + 1024 - 1) / 1024; - size_t blockX = 512; - size_t blockY = (blocks + 512 - 1) / 512; + size_t blocks = (num_kernels + 1024 - 1) / 1024; + size_t block_x = 512; + size_t block_y = (blocks + 512 - 1) / 512; dim3 threads(1024, 1); - dim3 grid(blockX, blockY); + dim3 grid(block_x, block_y); // To avoid involving atomic operations, we will launch one kernel per // bottom dimension, and then in the kernel add up the top dimensions. - col2im<<>>( - numKernels, colData, inputHeight + 2 * paddingHeight, - inputWidth + 2 * paddingWidth, inputChannels, filterHeight, filterWidth, - strideHeight, strideWidth, paddingHeight, paddingWidth, outputHeight, - outputWidth, imData); - CHECK_SYNC("Col2ImFunctor GPU failed"); + col2im<<>>( + num_kernels, col.data(), input_height + 2 * padding_height, + input_width + 2 * padding_width, input_channels, filter_height, + filter_width, stride_height, stride_width, padding_height, + padding_width, output_height, output_width, im.data()); } }; -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; template -__global__ void im2colOCF(const T* imData, T* colData, int inputChannels, - int inputHeight, int inputWidth, int filterHeight, - int filterWidth, int strideHeight, int strideWidth, - int paddingHeight, int paddingWidth, int outputHeight, - int outputWidth) { - int swId = blockIdx.x; - int shId = blockIdx.y; - for (int channelId = threadIdx.z; channelId < inputChannels; - channelId += blockDim.z) { - for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { - for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { - int widthOffset = idx + swId * strideWidth - paddingWidth; - int heightOffset = idy + shId * strideHeight - paddingHeight; - int imOffset = widthOffset + heightOffset * inputWidth + - channelId * inputHeight * inputWidth; +__global__ void im2colOCF(const T* im_data, T* col_data, int input_channels, + int input_height, int input_width, int filter_height, + int filter_width, int stride_height, int stride_width, + int padding_height, int padding_width, + int output_height, int output_width) { + int swid = blockIdx.x; + int shid = blockIdx.y; + for (int channelid = threadIdx.z; channelid < input_channels; + channelid += blockDim.z) { + for (int idy = threadIdx.y; idy < filter_height; idy += blockDim.y) { + for (int idx = threadIdx.x; idx < filter_width; idx += blockDim.x) { + int width_offset = idx + swid * stride_width - padding_width; + int height_offset = idy + shid * stride_height - padding_height; + int im_offset = width_offset + height_offset * input_width + + channelid * input_height * input_width; - int colOffset = idx + idy * filterWidth + - channelId * filterHeight * filterWidth + - (shId * outputWidth + swId) * - (inputChannels * filterHeight * filterWidth); + int col_offset = idx + idy * filter_width + + channelid * filter_height * filter_width + + (shid * output_width + swid) * + (input_channels * filter_height * filter_width); - if (heightOffset >= inputHeight || heightOffset < 0 || - widthOffset >= inputWidth || widthOffset < 0) { - colData[colOffset] = T(0); + if (height_offset >= input_height || height_offset < 0 || + width_offset >= input_width || width_offset < 0) { + col_data[col_offset] = T(0); } else { - colData[colOffset] = imData[imOffset]; + col_data[col_offset] = im_data[im_offset]; } } } @@ -204,76 +215,79 @@ __global__ void im2colOCF(const T* imData, T* colData, int inputChannels, } /* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] + * im = [input_channels, input_height, input_width] + * col = + * [output_height, output_width, input_channels, filter_height, filter_width] */ template -class Im2ColFunctor { +class Im2ColFunctor { public: - void operator()(const T* imData, const TensorShape& imShape, T* colData, - const TensorShape& colShape, int strideHeight, - int strideWidth, int paddingHeight, int paddingWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[3]; - int filterWidth = colShape[4]; - int outputHeight = colShape[0]; - int outputWidth = colShape[1]; + void operator()(const framework::Tensor& im, framework::Tensor& col, + int stride_height, int stride_width, int padding_height, + int padding_width) { + PADDLE_ENFORCE(im.dims().size() == 3); + PADDLE_ENFORCE(col.dims().size() == 5); + int input_channels = im.dims()[0]; + int input_height = im.dims()[1]; + int input_width = im.dims()[2]; + int filter_height = col.dims()[3]; + int filter_width = col.dims()[4]; + int output_height = col.dims()[0]; + int output_width = col.dims()[1]; - int blockDimX = 0; - int blockDimY = 0; - if (filterHeight <= 4 && filterWidth <= 4) { - blockDimX = 4; - blockDimY = 4; - } else if (filterHeight <= 8 && filterWidth <= 8) { - blockDimX = 8; - blockDimY = 8; - } else if (filterHeight <= 16 && filterWidth <= 16) { - blockDimX = 16; - blockDimY = 16; + int block_dim_x = 0; + int block_dim_y = 0; + if (filter_height <= 4 && filter_width <= 4) { + block_dim_x = 4; + block_dim_y = 4; + } else if (filter_height <= 8 && filter_width <= 8) { + block_dim_x = 8; + block_dim_y = 8; + } else if (filter_height <= 16 && filter_width <= 16) { + block_dim_x = 16; + block_dim_y = 16; } else { - blockDimX = 32; - blockDimY = 32; + block_dim_x = 32; + block_dim_y = 32; } - int blockDimZ = 1024 / blockDimX / blockDimY; - dim3 threads(blockDimX, blockDimY, std::min(blockDimZ, inputChannels)); - dim3 grid(outputWidth, outputHeight); - im2colOCF<<>>( - imData, colData, inputChannels, inputHeight, inputWidth, filterHeight, - filterWidth, strideHeight, strideWidth, paddingHeight, paddingWidth, - outputHeight, outputWidth); - CHECK_SYNC("Im2ColFunctor GPU failed"); + int block_dim_z = 1024 / block_dim_x / block_dim_y; + dim3 threads(block_dim_x, block_dim_y, + std::min(block_dim_z, input_channels)); + dim3 grid(output_width, output_height); + im2colOCF<<>>( + im.data(), col.data(), input_channels, input_height, input_width, + filter_height, filter_width, stride_height, stride_width, + padding_height, padding_width, output_height, output_width); } }; template -__global__ void col2imOCF(T* imData, const T* colData, int inputChannels, - int inputHeight, int inputWidth, int filterHeight, - int filterWidth, int strideHeight, int strideWidth, - int paddingHeight, int paddingWidth, int outputHeight, - int outputWidth) { - int swId = blockIdx.x; - int shId = blockIdx.y; - for (int channelId = threadIdx.z; channelId < inputChannels; - channelId += blockDim.z) { - for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { - for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { - int widthOffset = idx + swId * strideWidth - paddingWidth; - int heightOffset = idy + shId * strideHeight - paddingHeight; - int imOffset = widthOffset + heightOffset * inputWidth + - channelId * inputHeight * inputWidth; +__global__ void col2imOCF(T* im_data, const T* col_data, int input_channels, + int input_height, int input_width, int filter_height, + int filter_width, int stride_height, int stride_width, + int padding_height, int padding_width, + int output_height, int output_width) { + int swid = blockIdx.x; + int shid = blockIdx.y; + for (int channelid = threadIdx.z; channelid < input_channels; + channelid += blockDim.z) { + for (int idy = threadIdx.y; idy < filter_height; idy += blockDim.y) { + for (int idx = threadIdx.x; idx < filter_width; idx += blockDim.x) { + int width_offset = idx + swid * stride_width - padding_width; + int height_offset = idy + shid * stride_height - padding_height; + int im_offset = width_offset + height_offset * input_width + + channelid * input_height * input_width; - int colOffset = idx + idy * filterWidth + - channelId * filterHeight * filterWidth + - (shId * outputWidth + swId) * - (inputChannels * filterHeight * filterWidth); + int col_offset = idx + idy * filter_width + + channelid * filter_height * filter_width + + (shid * output_width + swid) * + (input_channels * filter_height * filter_width); - if (heightOffset >= 0 && heightOffset < inputHeight && - widthOffset >= 0 && widthOffset < inputWidth) { - paddle::paddleAtomicAdd(imData + imOffset, colData[colOffset]); + if (height_offset >= 0 && height_offset < input_height && + width_offset >= 0 && width_offset < input_width) { + paddle::platform::CudaAtomicAdd(im_data + im_offset, + col_data[col_offset]); } } } @@ -281,54 +295,56 @@ __global__ void col2imOCF(T* imData, const T* colData, int inputChannels, } /* - * imShape = [inputChannels, inputHeight, inputWidth] - * colShape = - * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] + * im = [input_channels, input_height, input_width] + * col = + * [output_height, output_width, input_channels, filter_height, filter_width] */ template -class Col2ImFunctor { +class Col2ImFunctor { public: - void operator()(T* imData, const TensorShape& imShape, const T* colData, - const TensorShape& colShape, int strideHeight, - int strideWidth, int paddingHeight, int paddingWidth) { - int inputChannels = imShape[0]; - int inputHeight = imShape[1]; - int inputWidth = imShape[2]; - int filterHeight = colShape[3]; - int filterWidth = colShape[4]; - int outputHeight = colShape[0]; - int outputWidth = colShape[1]; + void operator()(framework::Tensor& im, const framework::Tensor& col, + int stride_height, int stride_width, int padding_height, + int padding_width) { + PADDLE_ENFORCE(im.dims().size() == 3); + PADDLE_ENFORCE(col.dims().size() == 5); + int input_channels = im.dims()[0]; + int input_height = im.dims()[1]; + int input_width = im.dims()[2]; + int filter_height = col.dims()[3]; + int filter_width = col.dims()[4]; + int output_height = col.dims()[0]; + int output_width = col.dims()[1]; - int blockDimX = 0; - int blockDimY = 0; - if (filterHeight <= 4 && filterWidth <= 4) { - blockDimX = 4; - blockDimY = 4; - } else if (filterHeight <= 8 && filterWidth <= 8) { - blockDimX = 8; - blockDimY = 8; - } else if (filterHeight <= 16 && filterWidth <= 16) { - blockDimX = 16; - blockDimY = 16; + int block_dim_x = 0; + int block_dim_y = 0; + if (filter_height <= 4 && filter_width <= 4) { + block_dim_x = 4; + block_dim_y = 4; + } else if (filter_height <= 8 && filter_width <= 8) { + block_dim_x = 8; + block_dim_y = 8; + } else if (filter_height <= 16 && filter_width <= 16) { + block_dim_x = 16; + block_dim_y = 16; } else { - blockDimX = 32; - blockDimY = 32; + block_dim_x = 32; + block_dim_y = 32; } - int blockDimZ = 1024 / blockDimX / blockDimY; - dim3 threads(blockDimX, blockDimY, std::min(blockDimZ, inputChannels)); - dim3 grid(outputWidth, outputHeight); - col2imOCF<<>>( - imData, colData, inputChannels, inputHeight, inputWidth, filterHeight, - filterWidth, strideHeight, strideWidth, paddingHeight, paddingWidth, - outputHeight, outputWidth); - CHECK_SYNC("Col2ImFunctor GPU failed"); + int block_dim_z = 1024 / block_dim_x / block_dim_y; + dim3 threads(block_dim_x, block_dim_y, + std::min(block_dim_z, input_channels)); + dim3 grid(output_width, output_height); + col2imOCF<<>>( + im.data(), col.data(), input_channels, input_height, input_width, + filter_height, filter_width, stride_height, stride_width, + padding_height, padding_width, output_height, output_width); } }; -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; } // namespace paddle From 2d707e32c83d92a857b7e5359aae9415f8464d11 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 30 Aug 2017 14:39:32 +0800 Subject: [PATCH 14/71] Refine the comments. --- paddle/operators/math/im2col.h | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/paddle/operators/math/im2col.h b/paddle/operators/math/im2col.h index f2f982b687..da51bc69a2 100644 --- a/paddle/operators/math/im2col.h +++ b/paddle/operators/math/im2col.h @@ -29,40 +29,40 @@ enum ColFormat { kCFO = 0, kOCF = 1 }; * * \param imData Image data. * \param imShape The shape of imData, - * [inputChannels, inputHeight, inputWidth]. + * [input_channels, input_height, input_width]. * \param colData Column data. * \param colShape The shape of colData. * * If the template argument Format is kCFO, the shape of colData is: - * [inputChannels, filterHeight, filterWidth, outputHeight, outputWidth] + * [input_channels, filter_height, filter_width, output_height, output_width] * So, it is easy to reshape into a convolution matrix for convolution * calculation based on matrix multiplication. * The shape of convolution matrix is [height, width], where the height is equal - * inputChannels * filterHeight * filterWidth, and the width is equal - * outputHeight * outputWidth. + * input_channels * filter_height * filter_width, and the width is equal + * output_height * output_width. * * Reshape: * shape of colData shape of convolution matrix - * [inputChannels, - * filterHeight, - * filterWidth, ======> [height, width] - * outputHeight, - * outputWidth] + * [input_channels, + * filter_height, + * filter_width, ======> [height, width] + * output_height, + * output_width] * * If the template argument Format is kOCF, the shape of colData is: - * [outputHeight, outputWidth, inputChannels, filterHeight, filterWidth] + * [output_height, output_width, input_channels, filter_height, filter_width] * So, it is easy to reshape into a sequence matrix for rnn calculation. - * The shape of sequence matrix is [seqLength, stepSize], where the seqLength - * is equal outputHeight * outputWidth, and the stepSize is equal - * inputChannels * filterHeight * filterWidth. + * The shape of sequence matrix is [seq_length, step_size], where the seq_length + * is equal output_height * output_width, and the step_size is equal + * input_channels * filter_height * filter_width. * * Reshape: * shape of colData shape of sequence matrix - * [outputHeight, - * outputWidth, - * inputChannels, ======> [seqLength, stepSize] - * filterHeight, - * filterWidth] + * [output_height, + * output_width, + * input_channels, ======> [seqLength, stepSize] + * filter_height, + * filter_width] * * \note The caller needs to ensure that imShape.inputChannels is equal to * colShape.inputChannels. From a4df3f5bd8917b2cb510b23dc63bc97a20108f23 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 30 Aug 2017 22:21:53 +0800 Subject: [PATCH 15/71] Finish framework of squared_l2_distance_op. --- paddle/operators/CMakeLists.txt | 2 + paddle/operators/squared_l2_distance_op.cc | 82 ++++++++++++++++++ paddle/operators/squared_l2_distance_op.cu | 25 ++++++ paddle/operators/squared_l2_distance_op.h | 84 +++++++++++++++++++ paddle/pybind/CMakeLists.txt | 3 +- paddle/pybind/pybind.cc | 1 + .../paddle/v2/framework/tests/CMakeLists.txt | 1 + .../paddle/v2/framework/tests/op_test_util.py | 10 +-- .../tests/test_squared_l2_distance_op.py | 25 ++++++ 9 files changed, 227 insertions(+), 6 deletions(-) create mode 100644 paddle/operators/squared_l2_distance_op.cc create mode 100644 paddle/operators/squared_l2_distance_op.cu create mode 100644 paddle/operators/squared_l2_distance_op.h create mode 100644 python/paddle/v2/framework/tests/test_squared_l2_distance_op.py diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index f0fd12f1b5..1c32d1df4a 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -73,3 +73,5 @@ op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu) op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu) op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) + +op_library(squared_l2_distance_op SRCS squared_l2_distance_op.cc squared_l2_distance_op.cu) diff --git a/paddle/operators/squared_l2_distance_op.cc b/paddle/operators/squared_l2_distance_op.cc new file mode 100644 index 0000000000..9fc498d5a5 --- /dev/null +++ b/paddle/operators/squared_l2_distance_op.cc @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/squared_l2_distance_op.h" + +namespace paddle { +namespace operators { + +class SquaredL2DistanceOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input of SquaredL2DistanceOp " + "must be initialized."); + PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), + ctx.Input("Y")->dims(), + "Dimensions of SquaredL2DistanceOp's two inputs " + "must be same.") + framework::DDim dims = ctx.Input("X")->dims(); + ctx.Output("sub_result")->Resize(dims); + ctx.Output("Out")->Resize(framework::make_ddim({dims[0], 1})); + } +}; + +class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SquaredL2DistanceOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input value."); + AddInput("Y", "Target value."); + AddOutput("sub_result", + "Buffering substraction result which " + "will be reused in backward.") + .AsIntermediate(); + AddOutput("Out", "Squared l2 distance between input and target."); + AddComment(R"DOC( + SquaredL2DistanceOp will cacluate the squared L2 distances for + input and target. Number of distance value equals to the + first dimension of input. + )DOC"); + } +}; + +class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + ctx.Output(framework::GradVarName("X")) + ->Resize(ctx.Input("X")->dims()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(squared_l2_distance, ops::SquaredL2DistanceOp, + ops::SquaredL2DistanceOpMaker, squared_l2_distance_grad, + ops::SquaredL2DistanceGradOp); +REGISTER_OP_CPU_KERNEL( + squared_l2_distance, + ops::SquaredL2DistanceKernel); +REGISTER_OP_CPU_KERNEL( + squared_l2_distance_grad, + ops::SquaredL2DistanceGradKernel); diff --git a/paddle/operators/squared_l2_distance_op.cu b/paddle/operators/squared_l2_distance_op.cu new file mode 100644 index 0000000000..3fe62f1a9c --- /dev/null +++ b/paddle/operators/squared_l2_distance_op.cu @@ -0,0 +1,25 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU + +#include "paddle/operators/squared_l2_distance_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + squared_l2_distance, + ops::SquaredL2DistanceKernel); +REGISTER_OP_GPU_KERNEL( + squared_l2_distance_grad, + ops::SquaredL2DistanceGradKernel); diff --git a/paddle/operators/squared_l2_distance_op.h b/paddle/operators/squared_l2_distance_op.h new file mode 100644 index 0000000000..b350fd0117 --- /dev/null +++ b/paddle/operators/squared_l2_distance_op.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; +template +using EigenVector = framework::EigenVector; + +template +class SquaredL2DistanceKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* input0 = context.Input("X"); + auto* input1 = context.Input("Y"); + auto* output0 = context.Output("sub_result"); + auto* output1 = context.Output("Out"); + + output0->mutable_data(context.GetPlace()); + output1->mutable_data(context.GetPlace()); + + auto X = EigenMatrix::From(*input0); + auto Y = EigenMatrix::From(*input1); + auto subResult = EigenMatrix::From(*output0); + auto Z = EigenMatrix::From(*output1); + + auto place = context.GetEigenDevice(); + // buffer the substraction result + subResult.device(place) = X - Y; + const auto& inDims = X.dimensions(); + const auto& subResMat = subResult.reshape(Eigen::array( + {static_cast(inDims[0]), static_cast(X.size() / inDims[0])})); + Z.device(place) = subResMat.pow(2).sum(Eigen::array({1})); + } +}; + +template +class SquaredL2DistanceGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* input0 = context.Input("sub_result"); + auto* OG = context.Input(framework::GradVarName("Out")); + auto* IG = context.Output(framework::GradVarName("X")); + + IG->mutable_data(context.GetPlace()); + + auto subResult = EigenMatrix::From(*input0); + auto outGrad = EigenMatrix::From(*OG); + auto inGrad = EigenMatrix::From(*IG); + + const auto& subResDims = subResult.dimensions(); + int firstDim = static_cast(subResDims[0]); + int cols = subResult.size() / firstDim; + const auto subResMat = + subResult.reshape(Eigen::array({firstDim, cols})); + // create a matrix view for input gradient tensor + auto inGradMat = inGrad.reshape(Eigen::array({firstDim, cols})); + inGradMat.device(context.GetEigenDevice()) = + 2 * (outGrad.broadcast(Eigen::array({1, cols}))) * subResMat; + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 37e186a408..df8c2b37cf 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -18,5 +18,6 @@ cc_library(paddle_pybind SHARED fill_zeros_like_op lookup_table_op scale_op - minus_op) + minus_op + squared_l2_distance_op) endif(WITH_PYTHON) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 3bc150ccb7..69a5f98a43 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -48,6 +48,7 @@ USE_OP_ITSELF(identity); USE_OP(minus); USE_CPU_ONLY_OP(gather); USE_CPU_ONLY_OP(scatter); +USE_OP(squared_l2_distance); namespace paddle { namespace framework { diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 661ebd8964..06ff1f4a0c 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -32,3 +32,4 @@ py_test(test_gradient_checker SRCS test_gradient_checker.py) py_test(test_lookup_table SRCS test_lookup_table.py) py_test(test_scale_and_identity_op SRCS test_scale_and_identity_op.py) py_test(mnist SRCS mnist.py) +py_test(test_squared_l2_distance_op SRCS test_squared_l2_distance_op.py) diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index 3bc05a0fec..370f27eaf6 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -6,13 +6,13 @@ from paddle.v2.framework.op import Operator class OpTestMeta(type): """ Operator Test ClassMeta. - - It injects `test_all` method into user's OperatorTest class, to make Python + + It injects `test_all` method into user's OperatorTest class, to make Python unittest module run that method. - + The `test_all` read what value is stored in `self`. It use self's values to create and run a operator, and check whether that op is OK or not. - + See `test_add_two_op` for example usage. """ @@ -66,7 +66,7 @@ class OpTestMeta(type): self.assertTrue( numpy.allclose( actual, expect, atol=1e-05), - "output name: " + out_name + "has diff") + "output name: " + out_name + " has diff") obj.test_all = test_all return obj diff --git a/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py b/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py new file mode 100644 index 0000000000..eeddb5a3bf --- /dev/null +++ b/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py @@ -0,0 +1,25 @@ +import unittest +from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op +import numpy as np + + +class TestSquaredL2DistanceOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = 'squared_l2_distance' + self.inputs = { + 'X': np.random.uniform(0.1, 1., (2, 3)).astype('float32'), + 'Y': np.random.uniform(0.1, 1., (2, 3)).astype('float32') + } + subRes = self.inputs['X'] - self.inputs['Y'] + output = subRes * subRes + self.outputs = { + 'sub_result': subRes, + 'Out': np.expand_dims(output.sum(1), 1) + } + + +if __name__ == '__main__': + unittest.main() From f7e75a03cf03d8b71ab9be2800c7ed8058866c02 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 31 Aug 2017 19:57:22 +0800 Subject: [PATCH 16/71] Refine the neon depthwise convolution code(separate the Function and kernel). --- paddle/function/neon/NeonDepthwiseConv.cpp | 454 +------------------ paddle/function/neon/NeonDepthwiseConv.h | 480 +++++++++++++++++++++ 2 files changed, 481 insertions(+), 453 deletions(-) create mode 100644 paddle/function/neon/NeonDepthwiseConv.h diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index f09e98587d..7e5f752a0b 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "neon_util.h" +#include "NeonDepthwiseConv.h" #include "paddle/function/ConvOp.h" #include "paddle/function/Im2Col.h" @@ -22,458 +22,6 @@ namespace neon { #if defined(__ARM_NEON__) || defined(__ARM_NEON) -template -struct DepthwiseConvKernel {}; - -inline float32_t conv3x3(float32x4_t r0, - float32x4_t r1, - float32x4_t r2, - float32x4_t k0, - float32x4_t k1, - float32x4_t k2) { - float32x4_t tmp; - tmp = vmulq_f32(r0, k0); - tmp = vmlaq_f32(tmp, r1, k1); - tmp = vmlaq_f32(tmp, r2, k2); - return vaddvq_f32(tmp); -} - -inline float32_t conv4x4(float32x4_t r0, - float32x4_t r1, - float32x4_t r2, - float32x4_t r3, - float32x4_t k0, - float32x4_t k1, - float32x4_t k2, - float32x4_t k3) { - float32x4_t tmp; - tmp = vmulq_f32(r0, k0); - tmp = vmlaq_f32(tmp, r1, k1); - tmp = vmlaq_f32(tmp, r2, k2); - tmp = vmlaq_f32(tmp, r3, k3); - return vaddvq_f32(tmp); -} - -/** - * Each step calculates four elements of the output. - * First step: - * R0[0, 1, 2, 3...] * K[0][0] - * R0[1, 2, 3, 4...] * K[0][1] - * R0[2, 3, 4, 5...] * K[0][2] - * R1[0, 1, 2, 3...] * K[1][0] - * R1[1, 2, 3, 4...] * K[1][1] - * R1[2, 3, 4, 5...] * K[1][2] - * R2[0, 1, 2, 3...] * K[2][0] - * R2[1, 2, 3, 4...] * K[2][1] - * + R2[2, 3, 4, 5...] * K[2][2] - * ------------------------------ - * Output[0, 1, 2, 3] - */ -template <> -struct DepthwiseConvKernel<3, 1> { - static void run(const float* inputData, - const float* filterData, - int inputHeight, - int inputWidth, - int outputChannels, - int outputHeight, - int outputWidth, - int filterMultiplier, - float* outputData) { - const int steps = outputWidth >> 2; - const int remain = outputWidth & 3; - for (int c = 0; c < outputChannels; c++, filterData += 9) { - // Load the filters - float32x4_t k[3]; - k[0] = vld1q_f32(filterData); - k[1] = vld1q_f32(filterData + 3); - k[2] = vld1q_f32(filterData + 6); - k[0] = vsetq_lane_f32(0.f, k[0], 3); - k[1] = vsetq_lane_f32(0.f, k[1], 3); - k[2] = vsetq_lane_f32(0.f, k[2], 3); - - const float* r0 = - inputData + (c / filterMultiplier) * (inputHeight * inputWidth); - const float* r1 = r0 + inputWidth; - const float* r2 = r0 + inputWidth * 2; - float32x4_t input[3][3]; - for (int h = 0; h < outputHeight; h++) { - for (int s = 0; s < steps; s++) { - // Load the inputs - float32x4_t tmp; - input[0][0] = vld1q_f32(r0); - tmp = vld1q_f32(r0 + 4); - input[0][1] = vextq_f32(input[0][0], tmp, 1); - input[0][2] = vextq_f32(input[0][0], tmp, 2); - input[1][0] = vld1q_f32(r1); - tmp = vld1q_f32(r1 + 4); - input[1][1] = vextq_f32(input[1][0], tmp, 1); - input[1][2] = vextq_f32(input[1][0], tmp, 2); - input[2][0] = vld1q_f32(r2); - tmp = vld1q_f32(r2 + 4); - input[2][1] = vextq_f32(input[2][0], tmp, 1); - input[2][2] = vextq_f32(input[2][0], tmp, 2); - - float32x4_t tmp1 = vdupq_n_f32(0.f); - float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp1 = vaddq_f32(tmp1, tmp2); - - vst1q_f32(outputData, tmp1); - r0 += 4; - r1 += 4; - r2 += 4; - outputData += 4; - } - - for (int r = 0; r < remain; r++) { - float32x4_t i0 = vld1q_f32(r0); - float32x4_t i1 = vld1q_f32(r1); - float32x4_t i2 = vld1q_f32(r2); - *outputData = conv3x3(i0, i1, i2, k[0], k[1], k[2]); - r0++; - r1++; - r2++; - outputData++; - } - - r0 += 2; - r1 += 2; - r2 += 2; - } - } - } -}; - -/** - * Each step calculates four elements of the output. - * First step: - * R0[0, 2, 4, 6...] * K[0][0] - * R0[1, 3, 5, 7...] * K[0][1] - * R0[2, 4, 6, 8...] * K[0][2] - * R1[0, 2, 4, 6...] * K[1][0] - * R1[1, 3, 5, 7...] * K[1][1] - * R1[2, 4, 6, 8...] * K[1][2] - * R2[0, 2, 4, 6...] * K[2][0] - * R2[1, 3, 5, 7...] * K[2][1] - * R2[2, 4, 6, 8...] * K[2][2] - * ------------------------------ - * Output[0, 1, 2, 3] - */ -template <> -struct DepthwiseConvKernel<3, 2> { - static void run(const float* inputData, - const float* filterData, - int inputHeight, - int inputWidth, - int outputChannels, - int outputHeight, - int outputWidth, - int filterMultiplier, - float* outputData) { - const int steps = outputWidth >> 2; - const int remain = outputWidth & 3; - for (int c = 0; c < outputChannels; c++, filterData += 9) { - // Load the filters - float32x4_t k[3]; - k[0] = vld1q_f32(filterData); - k[1] = vld1q_f32(filterData + 3); - k[2] = vld1q_f32(filterData + 6); - k[0] = vsetq_lane_f32(0.f, k[0], 3); - k[1] = vsetq_lane_f32(0.f, k[1], 3); - k[2] = vsetq_lane_f32(0.f, k[2], 3); - - const float* start = - inputData + (c / filterMultiplier) * (inputHeight * inputWidth); - float32x4_t input[3][3]; - for (int h = 0; h < outputHeight; h++) { - const float* r0 = start + 2 * h * inputWidth; - const float* r1 = start + (2 * h + 1) * inputWidth; - const float* r2 = start + (2 * h + 2) * inputWidth; - for (int s = 0; s < steps; s++) { - // Load the inputs - float32x4_t data1; - float32x4x2_t data2; - - data2 = vld2q_f32(r0); - input[0][0] = data2.val[0]; - input[0][1] = data2.val[1]; - data1 = vld1q_f32(r0 + 8); - input[0][2] = vextq_f32(data2.val[0], data1, 1); - - data2 = vld2q_f32(r1); - input[1][0] = data2.val[0]; - input[1][1] = data2.val[1]; - data1 = vld1q_f32(r1 + 8); - input[1][2] = vextq_f32(data2.val[0], data1, 1); - - data2 = vld2q_f32(r2); - input[2][0] = data2.val[0]; - input[2][1] = data2.val[1]; - data1 = vld1q_f32(r2 + 8); - input[2][2] = vextq_f32(data2.val[0], data1, 1); - - float32x4_t tmp1 = vdupq_n_f32(0.f); - float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp1 = vaddq_f32(tmp1, tmp2); - - vst1q_f32(outputData, tmp1); - r0 += 8; - r1 += 8; - r2 += 8; - outputData += 4; - } - - for (int r = 0; r < remain; r++) { - float32x4_t i0 = vld1q_f32(r0); - float32x4_t i1 = vld1q_f32(r1); - float32x4_t i2 = vld1q_f32(r2); - *outputData = conv3x3(i0, i1, i2, k[0], k[1], k[2]); - r0 += 2; - r1 += 2; - r2 += 2; - outputData++; - } - } - } - } -}; - -/** - * Each step calculates four elements of the output. - */ -template <> -struct DepthwiseConvKernel<4, 1> { - static void run(const float* inputData, - const float* filterData, - int inputHeight, - int inputWidth, - int outputChannels, - int outputHeight, - int outputWidth, - int filterMultiplier, - float* outputData) { - const int steps = outputWidth >> 2; - const int remain = outputWidth & 3; - for (int c = 0; c < outputChannels; c++, filterData += 16) { - // Load the filters - float32x4_t k[4]; - k[0] = vld1q_f32(filterData); - k[1] = vld1q_f32(filterData + 4); - k[2] = vld1q_f32(filterData + 8); - k[3] = vld1q_f32(filterData + 12); - - const float* r0 = - inputData + (c / filterMultiplier) * (inputHeight * inputWidth); - const float* r1 = r0 + inputWidth; - const float* r2 = r0 + inputWidth * 2; - const float* r3 = r0 + inputWidth * 3; - float32x4_t input[4][4]; - for (int h = 0; h < outputHeight; h++) { - for (int s = 0; s < steps; s++) { - // Load the inputs - float32x4_t tmp; - input[0][0] = vld1q_f32(r0); - tmp = vld1q_f32(r0 + 4); - input[0][1] = vextq_f32(input[0][0], tmp, 1); - input[0][2] = vextq_f32(input[0][0], tmp, 2); - input[0][3] = vextq_f32(input[0][0], tmp, 3); - - input[1][0] = vld1q_f32(r1); - tmp = vld1q_f32(r1 + 4); - input[1][1] = vextq_f32(input[1][0], tmp, 1); - input[1][2] = vextq_f32(input[1][0], tmp, 2); - input[1][3] = vextq_f32(input[1][0], tmp, 3); - - input[2][0] = vld1q_f32(r2); - tmp = vld1q_f32(r2 + 4); - input[2][1] = vextq_f32(input[2][0], tmp, 1); - input[2][2] = vextq_f32(input[2][0], tmp, 2); - input[2][3] = vextq_f32(input[2][0], tmp, 3); - - input[3][0] = vld1q_f32(r3); - tmp = vld1q_f32(r3 + 4); - input[3][1] = vextq_f32(input[3][0], tmp, 1); - input[3][2] = vextq_f32(input[3][0], tmp, 2); - input[3][3] = vextq_f32(input[3][0], tmp, 3); - - float32x4_t tmp1 = vdupq_n_f32(0.f); - float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); - tmp1 = vaddq_f32(tmp1, tmp2); - - vst1q_f32(outputData, tmp1); - r0 += 4; - r1 += 4; - r2 += 4; - r3 += 4; - outputData += 4; - } - - for (int r = 0; r < remain; r++) { - float32x4_t i0 = vld1q_f32(r0); - float32x4_t i1 = vld1q_f32(r1); - float32x4_t i2 = vld1q_f32(r2); - float32x4_t i3 = vld1q_f32(r3); - *outputData = conv4x4(i0, i1, i2, i3, k[0], k[1], k[2], k[3]); - r0++; - r1++; - r2++; - r3++; - outputData++; - } - - r0 += 3; - r1 += 3; - r2 += 3; - r3 += 3; - } - } - } -}; - -/** - * Each step calculates four elements of the output. - */ -template <> -struct DepthwiseConvKernel<4, 2> { - static void run(const float* inputData, - const float* filterData, - int inputHeight, - int inputWidth, - int outputChannels, - int outputHeight, - int outputWidth, - int filterMultiplier, - float* outputData) { - const int steps = outputWidth >> 2; - const int remain = outputWidth & 3; - for (int c = 0; c < outputChannels; c++, filterData += 16) { - // Load the filters - float32x4_t k[4]; - k[0] = vld1q_f32(filterData); - k[1] = vld1q_f32(filterData + 4); - k[2] = vld1q_f32(filterData + 8); - k[3] = vld1q_f32(filterData + 12); - - const float* start = - inputData + (c / filterMultiplier) * (inputHeight * inputWidth); - float32x4_t input[4][4]; - for (int h = 0; h < outputHeight; h++) { - const float* r0 = start + 2 * h * inputWidth; - const float* r1 = start + (2 * h + 1) * inputWidth; - const float* r2 = start + (2 * h + 2) * inputWidth; - const float* r3 = start + (2 * h + 3) * inputWidth; - for (int s = 0; s < steps; s++) { - // Load the inputs - float32x4x2_t data1; - float32x4x2_t data2; - - data1 = vld2q_f32(r0); - data2 = vld2q_f32(r0 + 8); - input[0][0] = data1.val[0]; - input[0][1] = data1.val[1]; - input[0][2] = vextq_f32(data1.val[0], data2.val[0], 1); - input[0][3] = vextq_f32(data1.val[1], data2.val[1], 1); - - data1 = vld2q_f32(r1); - data2 = vld2q_f32(r1 + 8); - input[1][0] = data1.val[0]; - input[1][1] = data1.val[1]; - input[1][2] = vextq_f32(data1.val[0], data2.val[0], 1); - input[1][3] = vextq_f32(data1.val[1], data2.val[1], 1); - - data1 = vld2q_f32(r2); - data2 = vld2q_f32(r2 + 8); - input[2][0] = data1.val[0]; - input[2][1] = data1.val[1]; - input[2][2] = vextq_f32(data1.val[0], data2.val[0], 1); - input[2][3] = vextq_f32(data1.val[1], data2.val[1], 1); - - data1 = vld2q_f32(r3); - data2 = vld2q_f32(r3 + 8); - input[3][0] = data1.val[0]; - input[3][1] = data1.val[1]; - input[3][2] = vextq_f32(data1.val[0], data2.val[0], 1); - input[3][3] = vextq_f32(data1.val[1], data2.val[1], 1); - - float32x4_t tmp1 = vdupq_n_f32(0.f); - float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); - tmp1 = vaddq_f32(tmp1, tmp2); - - vst1q_f32(outputData, tmp1); - r0 += 8; - r1 += 8; - r2 += 8; - r3 += 8; - outputData += 4; - } - - for (int r = 0; r < remain; r++) { - float32x4_t i0 = vld1q_f32(r0); - float32x4_t i1 = vld1q_f32(r1); - float32x4_t i2 = vld1q_f32(r2); - float32x4_t i3 = vld1q_f32(r3); - *outputData = conv4x4(i0, i1, i2, i3, k[0], k[1], k[2], k[3]); - r0 += 2; - r1 += 2; - r2 += 2; - r3 += 2; - outputData++; - } - } - } - } -}; - template class NeonDepthwiseConvFunction : public ConvFunctionBase { public: diff --git a/paddle/function/neon/NeonDepthwiseConv.h b/paddle/function/neon/NeonDepthwiseConv.h new file mode 100644 index 0000000000..cb1abe1f32 --- /dev/null +++ b/paddle/function/neon/NeonDepthwiseConv.h @@ -0,0 +1,480 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "neon_util.h" + +namespace paddle { + +namespace neon { + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + +template +struct DepthwiseConvKernel {}; + +inline float32_t conv3x3(float32x4_t r0, + float32x4_t r1, + float32x4_t r2, + float32x4_t k0, + float32x4_t k1, + float32x4_t k2) { + float32x4_t tmp; + tmp = vmulq_f32(r0, k0); + tmp = vmlaq_f32(tmp, r1, k1); + tmp = vmlaq_f32(tmp, r2, k2); + return vaddvq_f32(tmp); +} + +inline float32_t conv4x4(float32x4_t r0, + float32x4_t r1, + float32x4_t r2, + float32x4_t r3, + float32x4_t k0, + float32x4_t k1, + float32x4_t k2, + float32x4_t k3) { + float32x4_t tmp; + tmp = vmulq_f32(r0, k0); + tmp = vmlaq_f32(tmp, r1, k1); + tmp = vmlaq_f32(tmp, r2, k2); + tmp = vmlaq_f32(tmp, r3, k3); + return vaddvq_f32(tmp); +} + +/** + * Each step calculates four elements of the output. + * First step: + * R0[0, 1, 2, 3...] * K[0][0] + * R0[1, 2, 3, 4...] * K[0][1] + * R0[2, 3, 4, 5...] * K[0][2] + * R1[0, 1, 2, 3...] * K[1][0] + * R1[1, 2, 3, 4...] * K[1][1] + * R1[2, 3, 4, 5...] * K[1][2] + * R2[0, 1, 2, 3...] * K[2][0] + * R2[1, 2, 3, 4...] * K[2][1] + * + R2[2, 3, 4, 5...] * K[2][2] + * ------------------------------ + * Output[0, 1, 2, 3] + */ +template <> +struct DepthwiseConvKernel<3, 1> { + static void run(const float* inputData, + const float* filterData, + int inputHeight, + int inputWidth, + int outputChannels, + int outputHeight, + int outputWidth, + int filterMultiplier, + float* outputData) { + const int steps = outputWidth >> 2; + const int remain = outputWidth & 3; + for (int c = 0; c < outputChannels; c++, filterData += 9) { + // Load the filters + float32x4_t k[3]; + k[0] = vld1q_f32(filterData); + k[1] = vld1q_f32(filterData + 3); + k[2] = vld1q_f32(filterData + 6); + k[0] = vsetq_lane_f32(0.f, k[0], 3); + k[1] = vsetq_lane_f32(0.f, k[1], 3); + k[2] = vsetq_lane_f32(0.f, k[2], 3); + + const float* r0 = + inputData + (c / filterMultiplier) * (inputHeight * inputWidth); + const float* r1 = r0 + inputWidth; + const float* r2 = r0 + inputWidth * 2; + float32x4_t input[3][3]; + for (int h = 0; h < outputHeight; h++) { + for (int s = 0; s < steps; s++) { + // Load the inputs + float32x4_t tmp; + input[0][0] = vld1q_f32(r0); + tmp = vld1q_f32(r0 + 4); + input[0][1] = vextq_f32(input[0][0], tmp, 1); + input[0][2] = vextq_f32(input[0][0], tmp, 2); + input[1][0] = vld1q_f32(r1); + tmp = vld1q_f32(r1 + 4); + input[1][1] = vextq_f32(input[1][0], tmp, 1); + input[1][2] = vextq_f32(input[1][0], tmp, 2); + input[2][0] = vld1q_f32(r2); + tmp = vld1q_f32(r2 + 4); + input[2][1] = vextq_f32(input[2][0], tmp, 1); + input[2][2] = vextq_f32(input[2][0], tmp, 2); + + float32x4_t tmp1 = vdupq_n_f32(0.f); + float32x4_t tmp2 = vdupq_n_f32(0.f); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp1 = vaddq_f32(tmp1, tmp2); + + vst1q_f32(outputData, tmp1); + r0 += 4; + r1 += 4; + r2 += 4; + outputData += 4; + } + + for (int r = 0; r < remain; r++) { + float32x4_t i0 = vld1q_f32(r0); + float32x4_t i1 = vld1q_f32(r1); + float32x4_t i2 = vld1q_f32(r2); + *outputData = conv3x3(i0, i1, i2, k[0], k[1], k[2]); + r0++; + r1++; + r2++; + outputData++; + } + + r0 += 2; + r1 += 2; + r2 += 2; + } + } + } +}; + +/** + * Each step calculates four elements of the output. + * First step: + * R0[0, 2, 4, 6...] * K[0][0] + * R0[1, 3, 5, 7...] * K[0][1] + * R0[2, 4, 6, 8...] * K[0][2] + * R1[0, 2, 4, 6...] * K[1][0] + * R1[1, 3, 5, 7...] * K[1][1] + * R1[2, 4, 6, 8...] * K[1][2] + * R2[0, 2, 4, 6...] * K[2][0] + * R2[1, 3, 5, 7...] * K[2][1] + * R2[2, 4, 6, 8...] * K[2][2] + * ------------------------------ + * Output[0, 1, 2, 3] + */ +template <> +struct DepthwiseConvKernel<3, 2> { + static void run(const float* inputData, + const float* filterData, + int inputHeight, + int inputWidth, + int outputChannels, + int outputHeight, + int outputWidth, + int filterMultiplier, + float* outputData) { + const int steps = outputWidth >> 2; + const int remain = outputWidth & 3; + for (int c = 0; c < outputChannels; c++, filterData += 9) { + // Load the filters + float32x4_t k[3]; + k[0] = vld1q_f32(filterData); + k[1] = vld1q_f32(filterData + 3); + k[2] = vld1q_f32(filterData + 6); + k[0] = vsetq_lane_f32(0.f, k[0], 3); + k[1] = vsetq_lane_f32(0.f, k[1], 3); + k[2] = vsetq_lane_f32(0.f, k[2], 3); + + const float* start = + inputData + (c / filterMultiplier) * (inputHeight * inputWidth); + float32x4_t input[3][3]; + for (int h = 0; h < outputHeight; h++) { + const float* r0 = start + 2 * h * inputWidth; + const float* r1 = start + (2 * h + 1) * inputWidth; + const float* r2 = start + (2 * h + 2) * inputWidth; + for (int s = 0; s < steps; s++) { + // Load the inputs + float32x4_t data1; + float32x4x2_t data2; + + data2 = vld2q_f32(r0); + input[0][0] = data2.val[0]; + input[0][1] = data2.val[1]; + data1 = vld1q_f32(r0 + 8); + input[0][2] = vextq_f32(data2.val[0], data1, 1); + + data2 = vld2q_f32(r1); + input[1][0] = data2.val[0]; + input[1][1] = data2.val[1]; + data1 = vld1q_f32(r1 + 8); + input[1][2] = vextq_f32(data2.val[0], data1, 1); + + data2 = vld2q_f32(r2); + input[2][0] = data2.val[0]; + input[2][1] = data2.val[1]; + data1 = vld1q_f32(r2 + 8); + input[2][2] = vextq_f32(data2.val[0], data1, 1); + + float32x4_t tmp1 = vdupq_n_f32(0.f); + float32x4_t tmp2 = vdupq_n_f32(0.f); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp1 = vaddq_f32(tmp1, tmp2); + + vst1q_f32(outputData, tmp1); + r0 += 8; + r1 += 8; + r2 += 8; + outputData += 4; + } + + for (int r = 0; r < remain; r++) { + float32x4_t i0 = vld1q_f32(r0); + float32x4_t i1 = vld1q_f32(r1); + float32x4_t i2 = vld1q_f32(r2); + *outputData = conv3x3(i0, i1, i2, k[0], k[1], k[2]); + r0 += 2; + r1 += 2; + r2 += 2; + outputData++; + } + } + } + } +}; + +/** + * Each step calculates four elements of the output. + */ +template <> +struct DepthwiseConvKernel<4, 1> { + static void run(const float* inputData, + const float* filterData, + int inputHeight, + int inputWidth, + int outputChannels, + int outputHeight, + int outputWidth, + int filterMultiplier, + float* outputData) { + const int steps = outputWidth >> 2; + const int remain = outputWidth & 3; + for (int c = 0; c < outputChannels; c++, filterData += 16) { + // Load the filters + float32x4_t k[4]; + k[0] = vld1q_f32(filterData); + k[1] = vld1q_f32(filterData + 4); + k[2] = vld1q_f32(filterData + 8); + k[3] = vld1q_f32(filterData + 12); + + const float* r0 = + inputData + (c / filterMultiplier) * (inputHeight * inputWidth); + const float* r1 = r0 + inputWidth; + const float* r2 = r0 + inputWidth * 2; + const float* r3 = r0 + inputWidth * 3; + float32x4_t input[4][4]; + for (int h = 0; h < outputHeight; h++) { + for (int s = 0; s < steps; s++) { + // Load the inputs + float32x4_t tmp; + input[0][0] = vld1q_f32(r0); + tmp = vld1q_f32(r0 + 4); + input[0][1] = vextq_f32(input[0][0], tmp, 1); + input[0][2] = vextq_f32(input[0][0], tmp, 2); + input[0][3] = vextq_f32(input[0][0], tmp, 3); + + input[1][0] = vld1q_f32(r1); + tmp = vld1q_f32(r1 + 4); + input[1][1] = vextq_f32(input[1][0], tmp, 1); + input[1][2] = vextq_f32(input[1][0], tmp, 2); + input[1][3] = vextq_f32(input[1][0], tmp, 3); + + input[2][0] = vld1q_f32(r2); + tmp = vld1q_f32(r2 + 4); + input[2][1] = vextq_f32(input[2][0], tmp, 1); + input[2][2] = vextq_f32(input[2][0], tmp, 2); + input[2][3] = vextq_f32(input[2][0], tmp, 3); + + input[3][0] = vld1q_f32(r3); + tmp = vld1q_f32(r3 + 4); + input[3][1] = vextq_f32(input[3][0], tmp, 1); + input[3][2] = vextq_f32(input[3][0], tmp, 2); + input[3][3] = vextq_f32(input[3][0], tmp, 3); + + float32x4_t tmp1 = vdupq_n_f32(0.f); + float32x4_t tmp2 = vdupq_n_f32(0.f); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); + tmp1 = vaddq_f32(tmp1, tmp2); + + vst1q_f32(outputData, tmp1); + r0 += 4; + r1 += 4; + r2 += 4; + r3 += 4; + outputData += 4; + } + + for (int r = 0; r < remain; r++) { + float32x4_t i0 = vld1q_f32(r0); + float32x4_t i1 = vld1q_f32(r1); + float32x4_t i2 = vld1q_f32(r2); + float32x4_t i3 = vld1q_f32(r3); + *outputData = conv4x4(i0, i1, i2, i3, k[0], k[1], k[2], k[3]); + r0++; + r1++; + r2++; + r3++; + outputData++; + } + + r0 += 3; + r1 += 3; + r2 += 3; + r3 += 3; + } + } + } +}; + +/** + * Each step calculates four elements of the output. + */ +template <> +struct DepthwiseConvKernel<4, 2> { + static void run(const float* inputData, + const float* filterData, + int inputHeight, + int inputWidth, + int outputChannels, + int outputHeight, + int outputWidth, + int filterMultiplier, + float* outputData) { + const int steps = outputWidth >> 2; + const int remain = outputWidth & 3; + for (int c = 0; c < outputChannels; c++, filterData += 16) { + // Load the filters + float32x4_t k[4]; + k[0] = vld1q_f32(filterData); + k[1] = vld1q_f32(filterData + 4); + k[2] = vld1q_f32(filterData + 8); + k[3] = vld1q_f32(filterData + 12); + + const float* start = + inputData + (c / filterMultiplier) * (inputHeight * inputWidth); + float32x4_t input[4][4]; + for (int h = 0; h < outputHeight; h++) { + const float* r0 = start + 2 * h * inputWidth; + const float* r1 = start + (2 * h + 1) * inputWidth; + const float* r2 = start + (2 * h + 2) * inputWidth; + const float* r3 = start + (2 * h + 3) * inputWidth; + for (int s = 0; s < steps; s++) { + // Load the inputs + float32x4x2_t data1; + float32x4x2_t data2; + + data1 = vld2q_f32(r0); + data2 = vld2q_f32(r0 + 8); + input[0][0] = data1.val[0]; + input[0][1] = data1.val[1]; + input[0][2] = vextq_f32(data1.val[0], data2.val[0], 1); + input[0][3] = vextq_f32(data1.val[1], data2.val[1], 1); + + data1 = vld2q_f32(r1); + data2 = vld2q_f32(r1 + 8); + input[1][0] = data1.val[0]; + input[1][1] = data1.val[1]; + input[1][2] = vextq_f32(data1.val[0], data2.val[0], 1); + input[1][3] = vextq_f32(data1.val[1], data2.val[1], 1); + + data1 = vld2q_f32(r2); + data2 = vld2q_f32(r2 + 8); + input[2][0] = data1.val[0]; + input[2][1] = data1.val[1]; + input[2][2] = vextq_f32(data1.val[0], data2.val[0], 1); + input[2][3] = vextq_f32(data1.val[1], data2.val[1], 1); + + data1 = vld2q_f32(r3); + data2 = vld2q_f32(r3 + 8); + input[3][0] = data1.val[0]; + input[3][1] = data1.val[1]; + input[3][2] = vextq_f32(data1.val[0], data2.val[0], 1); + input[3][3] = vextq_f32(data1.val[1], data2.val[1], 1); + + float32x4_t tmp1 = vdupq_n_f32(0.f); + float32x4_t tmp2 = vdupq_n_f32(0.f); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); + tmp1 = vaddq_f32(tmp1, tmp2); + + vst1q_f32(outputData, tmp1); + r0 += 8; + r1 += 8; + r2 += 8; + r3 += 8; + outputData += 4; + } + + for (int r = 0; r < remain; r++) { + float32x4_t i0 = vld1q_f32(r0); + float32x4_t i1 = vld1q_f32(r1); + float32x4_t i2 = vld1q_f32(r2); + float32x4_t i3 = vld1q_f32(r3); + *outputData = conv4x4(i0, i1, i2, i3, k[0], k[1], k[2], k[3]); + r0 += 2; + r1 += 2; + r2 += 2; + r3 += 2; + outputData++; + } + } + } + } +}; + +#endif + +} // namespace neon +} // namespace paddle From f8b885f27f19474124d46002d6572c239910eefd Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 31 Aug 2017 20:15:48 +0800 Subject: [PATCH 17/71] Using EigenTensor to reshape tensor. --- paddle/operators/squared_l2_distance_op.cc | 64 ++++++++--- paddle/operators/squared_l2_distance_op.h | 128 ++++++++++++++++++--- 2 files changed, 157 insertions(+), 35 deletions(-) diff --git a/paddle/operators/squared_l2_distance_op.cc b/paddle/operators/squared_l2_distance_op.cc index 9fc498d5a5..3049f0f8ba 100644 --- a/paddle/operators/squared_l2_distance_op.cc +++ b/paddle/operators/squared_l2_distance_op.cc @@ -22,36 +22,52 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - void InferShape(const framework::InferShapeContext &ctx) const override { + void InferShape(const framework::InferShapeContext& ctx) const override { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input of SquaredL2DistanceOp " "must be initialized."); - PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), - ctx.Input("Y")->dims(), - "Dimensions of SquaredL2DistanceOp's two inputs " - "must be same.") - framework::DDim dims = ctx.Input("X")->dims(); - ctx.Output("sub_result")->Resize(dims); - ctx.Output("Out")->Resize(framework::make_ddim({dims[0], 1})); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), + "Target of SquaredL2DistanceOp " + "must be initialized."); + + auto* X = ctx.Input("X"); + auto xDims = X->dims(); + auto* Y = ctx.Input("Y"); + auto yDims = Y->dims(); + + PADDLE_ENFORCE_EQ(framework::arity(xDims), framework::arity(yDims), + "Tensor rank of both SquaredL2DistanceOp's " + "inputs must be same."); + int rank = framework::arity(xDims); + PADDLE_ENFORCE(rank >= 2 || rank <= 6, "Tensor rank should be in [2, 6]."); + PADDLE_ENFORCE(yDims[0] == 1 || yDims[0] == xDims[0], + "First dimension of target must be equal to input " + "or to 1."); + + ctx.Output("sub_result")->Resize(xDims); + ctx.Output("Out")->Resize({xDims[0], 1}); } }; class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker { public: - SquaredL2DistanceOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) + SquaredL2DistanceOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input value."); - AddInput("Y", "Target value."); + AddInput("X", "Input of SquaredL2DistanceOp."); + AddInput("Y", "Target of SquaredL2DistanceOp."); AddOutput("sub_result", "Buffering substraction result which " "will be reused in backward.") .AsIntermediate(); AddOutput("Out", "Squared l2 distance between input and target."); AddComment(R"DOC( - SquaredL2DistanceOp will cacluate the squared L2 distances for + SquaredL2DistanceOp will cacluate the squared L2 distance for input and target. Number of distance value equals to the - first dimension of input. + first dimension of input. First dimension of target could be equal to + input or to 1. If the first dimension of target is 1, SquaredL2DistanceOp + will broadcast the first dimension to the first dimension of input. + You can decide whether calculate the gradient of target. )DOC"); } }; @@ -61,9 +77,23 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output(framework::GradVarName("X")) - ->Resize(ctx.Input("X")->dims()); + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), + "Gradient of Out should not be null"); + // check out grad dimensions + auto outDims = ctx.Input(framework::GradVarName("Out"))->dims(); + auto xDims = ctx.Input("X")->dims(); + auto yDims = ctx.Input("Y")->dims(); + PADDLE_ENFORCE_EQ(outDims[0], xDims[0], + "First dimension of output gradient and " + "input value must be equal."); + PADDLE_ENFORCE_EQ(outDims[1], 1, + "Second dimension of output gradient " + "must be 1."); + auto* xGrad = ctx.Output(framework::GradVarName("X")); + auto* yGrad = ctx.Output(framework::GradVarName("Y")); + if (xGrad != nullptr) xGrad->Resize(xDims); + if (yGrad != nullptr) yGrad->Resize(yDims); } }; diff --git a/paddle/operators/squared_l2_distance_op.h b/paddle/operators/squared_l2_distance_op.h index b350fd0117..e95364c706 100644 --- a/paddle/operators/squared_l2_distance_op.h +++ b/paddle/operators/squared_l2_distance_op.h @@ -20,17 +20,44 @@ namespace paddle { namespace operators { using Tensor = framework::Tensor; -template -using EigenMatrix = framework::EigenMatrix; +using EigenTensor = framework::EigenTensor; template -using EigenVector = framework::EigenVector; +using EigenMatrix = framework::EigenMatrix; template class SquaredL2DistanceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { + auto* input0 = context.Input("X"); + const int rank = framework::arity(input0->dims()); + switch (rank) { + case 2: + Operate<2>(context); + break; + case 3: + Operate<3>(context); + break; + case 4: + Operate<4>(context); + break; + case 5: + Operate<5>(context); + break; + case 6: + Operate<6>(context); + break; + default: + // already asserted in SquaredL2DistanceOpMaker + break; + } + } + + private: + template + void Operate(const framework::ExecutionContext& context) const { auto* input0 = context.Input("X"); auto* input1 = context.Input("Y"); auto* output0 = context.Output("sub_result"); @@ -39,17 +66,28 @@ class SquaredL2DistanceKernel : public framework::OpKernel { output0->mutable_data(context.GetPlace()); output1->mutable_data(context.GetPlace()); - auto X = EigenMatrix::From(*input0); - auto Y = EigenMatrix::From(*input1); - auto subResult = EigenMatrix::From(*output0); + auto X = EigenTensor::From(*input0); + auto Y = EigenTensor::From(*input1); + auto subResult = EigenTensor::From(*output0); auto Z = EigenMatrix::From(*output1); + auto xDims = X.dimensions(); + auto yDims = Y.dimensions(); + auto place = context.GetEigenDevice(); + // buffer the substraction result - subResult.device(place) = X - Y; - const auto& inDims = X.dimensions(); + if (yDims[0] == 1 && xDims[0] != yDims[0]) { + auto yBroadcastDims = yDims; + yBroadcastDims[0] = xDims[0]; + subResult.device(place) = X - Y.broadcast(yBroadcastDims); + } else { + subResult.device(place) = X - Y; + } + + // create matrix view for substraction result const auto& subResMat = subResult.reshape(Eigen::array( - {static_cast(inDims[0]), static_cast(X.size() / inDims[0])})); + {static_cast(xDims[0]), static_cast(X.size() / xDims[0])})); Z.device(place) = subResMat.pow(2).sum(Eigen::array({1})); } }; @@ -59,24 +97,78 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* input0 = context.Input("sub_result"); - auto* OG = context.Input(framework::GradVarName("Out")); - auto* IG = context.Output(framework::GradVarName("X")); + const int rank = framework::arity(input0->dims()); + switch (rank) { + case 2: + Operate<2>(context); + break; + case 3: + Operate<3>(context); + break; + case 4: + Operate<4>(context); + break; + case 5: + Operate<5>(context); + break; + case 6: + Operate<6>(context); + break; + default: + // already asserted in SquaredL2DistanceOpMaker + break; + } + } - IG->mutable_data(context.GetPlace()); + private: + template + void Operate(const framework::ExecutionContext& context) const { + auto* input0 = context.Input("sub_result"); + auto* OG = context.Input(framework::GradVarName("Out")); + auto* XG = context.Output(framework::GradVarName("X")); + auto* YG = context.Output(framework::GradVarName("Y")); - auto subResult = EigenMatrix::From(*input0); + auto subResult = EigenTensor::From(*input0); auto outGrad = EigenMatrix::From(*OG); - auto inGrad = EigenMatrix::From(*IG); - const auto& subResDims = subResult.dimensions(); + auto subResDims = subResult.dimensions(); int firstDim = static_cast(subResDims[0]); int cols = subResult.size() / firstDim; const auto subResMat = subResult.reshape(Eigen::array({firstDim, cols})); - // create a matrix view for input gradient tensor - auto inGradMat = inGrad.reshape(Eigen::array({firstDim, cols})); - inGradMat.device(context.GetEigenDevice()) = + + // calculate gradient + auto gradMat = 2 * (outGrad.broadcast(Eigen::array({1, cols}))) * subResMat; + + // propagate back to input + auto eigenPlace = context.GetEigenDevice(); + if (XG != nullptr) { + XG->mutable_data(context.GetPlace()); + auto xGrad = EigenTensor::From(*XG); + // dimensions are same with subResult + auto xGradMat = xGrad.reshape(Eigen::array({firstDim, cols})); + xGradMat.device(eigenPlace) = gradMat; + } + if (YG != nullptr) { + YG->mutable_data(context.GetPlace()); + auto yGrad = EigenTensor::From(*YG); + auto dimsYGrad = yGrad.dimensions(); + auto yGradMat = yGrad.reshape(Eigen::array( + {static_cast(dimsYGrad[0]), + static_cast(yGrad.size() / dimsYGrad[0])})); + + PADDLE_ENFORCE(dimsYGrad[0] <= firstDim, + "First dimension of gradient must be greater or " + "equal than first dimension of target"); + + if (dimsYGrad[0] == firstDim) { + yGradMat.device(eigenPlace) = -1 * gradMat; + } else { + yGradMat.device(eigenPlace) = + -1 * (gradMat.sum(Eigen::array({0}))); + } + } } }; From 4b6b7251c10371ceceb84c55ebc587715591c436 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 31 Aug 2017 21:29:49 +0800 Subject: [PATCH 18/71] Refine NeonDepthwiseConv. --- paddle/function/neon/NeonDepthwiseConv.cpp | 35 +++++++++++----------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index 7e5f752a0b..3d502f5d6d 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { -namespace neon { - #if defined(__ARM_NEON__) || defined(__ARM_NEON) template @@ -45,16 +43,16 @@ public: const TensorShape& filter = inputs[1].shape(); const TensorShape& output = outputs[0].shape(); - size_t batchSize = input[0]; - size_t inputChannels = input[1]; - size_t inputHeight = input[2]; - size_t inputWidth = input[3]; - size_t filterHeight = getFilterHeight(filter); - size_t filterWidth = getFilterWidth(filter); - size_t outputChannels = output[1]; - size_t outputHeight = output[2]; - size_t outputWidth = output[3]; - size_t filterMultiplier = outputChannels / groups_; + int batchSize = input[0]; + int inputChannels = input[1]; + int inputHeight = input[2]; + int inputWidth = input[3]; + int filterHeight = getFilterHeight(filter); + int filterWidth = getFilterWidth(filter); + int outputChannels = output[1]; + int outputHeight = output[2]; + int outputWidth = output[3]; + int filterMultiplier = outputChannels / groups_; CHECK_EQ(inputChannels, groups_); // only support strideH() == strideW() and filterHeight == filterWidth. @@ -90,18 +88,18 @@ public: DepthWiseConv; if (filterWidth == 3 && strideW() == 1) { - DepthWiseConv = DepthwiseConvKernel<3, 1>::run; + DepthWiseConv = neon::DepthwiseConvKernel<3, 1>::run; } else if (filterWidth == 3 && strideW() == 2) { - DepthWiseConv = DepthwiseConvKernel<3, 2>::run; + DepthWiseConv = neon::DepthwiseConvKernel<3, 2>::run; } else if (filterWidth == 4 && strideW() == 1) { - DepthWiseConv = DepthwiseConvKernel<4, 1>::run; + DepthWiseConv = neon::DepthwiseConvKernel<4, 1>::run; } else if (filterWidth == 4 && strideW() == 2) { - DepthWiseConv = DepthwiseConvKernel<4, 2>::run; + DepthWiseConv = neon::DepthwiseConvKernel<4, 2>::run; } else { LOG(FATAL) << "Not supported"; } - for (size_t i = 0; i < batchSize; i++) { + for (int i = 0; i < batchSize; i++) { DepthWiseConv(inputPadding, filterData, inputHeight, @@ -117,9 +115,10 @@ public: } }; +#ifndef PADDLE_TYPE_DOUBLE REGISTER_TYPED_FUNC(NeonDepthwiseConv, CPU, NeonDepthwiseConvFunction); +#endif #endif -} // namespace neon } // namespace paddle From 40d47fae95b04a26d0fa47eb46a83871f30f229f Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 31 Aug 2017 21:36:09 +0800 Subject: [PATCH 19/71] [Refine code]Move class Padding into the NeonDepthwiseConv.h. --- paddle/function/Im2Col.h | 91 --------------------- paddle/function/neon/NeonDepthwiseConv.cpp | 15 ++-- paddle/function/neon/NeonDepthwiseConv.h | 92 ++++++++++++++++++++++ 3 files changed, 99 insertions(+), 99 deletions(-) diff --git a/paddle/function/Im2Col.h b/paddle/function/Im2Col.h index 9b91e223a6..1e0cff436f 100644 --- a/paddle/function/Im2Col.h +++ b/paddle/function/Im2Col.h @@ -94,95 +94,4 @@ public: int paddingWidth); }; -template -struct Padding { - static void run(const T* src, - T* dest, - int channels, - int inputHeight, - int inputWidth, - int paddingHeight, - int paddingWidth) { - const int destWidth = inputWidth + 2 * paddingWidth; - for (int c = 0; c < channels; c++) { - if (paddingHeight > 0) { - memset(dest, 0, destWidth * paddingHeight * sizeof(T)); - dest += destWidth * paddingHeight; - } - - for (int i = 0; i < inputHeight; i++) { - // padding head - for (int j = 0; j < paddingWidth; j++) { - *dest++ = T(0); - } - - memcpy(dest, src, inputWidth * sizeof(T)); - dest += inputWidth; - src += inputWidth; - - // padding tail - for (int j = 0; j < paddingWidth; j++) { - *dest++ = T(0); - } - } - - if (paddingHeight > 0) { - memset(dest, 0, destWidth * paddingHeight * sizeof(T)); - dest += destWidth * paddingHeight; - } - } - } -}; - -#if defined(__ARM_NEON__) || defined(__ARM_NEON) -template <> -struct Padding { - static void run(const float* src, - float* dest, - int channels, - int inputHeight, - int inputWidth, - int paddingHeight, - int paddingWidth) { - const int destWidth = inputWidth + 2 * paddingWidth; - for (int c = 0; c < channels; c++) { - if (paddingHeight > 0) { - memset(dest, 0, destWidth * paddingHeight * sizeof(float)); - dest += destWidth * paddingHeight; - } - - for (int i = 0; i < inputHeight; i++) { - // padding head - for (int j = 0; j < paddingWidth; j++) { - *dest++ = float(0); - } - - int step = inputWidth >> 2; - int remain = inputWidth & 3; - for (int s = 0; s < step; s++) { - float32x4_t s0 = vld1q_f32(src); - vst1q_f32(dest, s0); - src += 4; - dest += 4; - } - for (int r = 0; r < remain; r++) { - *dest++ = *src++; - } - - // padding tail - for (int j = 0; j < paddingWidth; j++) { - *dest++ = float(0); - } - } - - if (paddingHeight > 0) { - memset(dest, 0, destWidth * paddingHeight * sizeof(float)); - dest += destWidth * paddingHeight; - } - } - } -}; - -#endif - } // namespace paddle diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index 3d502f5d6d..bd9a56a8a5 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #include "NeonDepthwiseConv.h" #include "paddle/function/ConvOp.h" -#include "paddle/function/Im2Col.h" namespace paddle { @@ -70,13 +69,13 @@ public: (inputWidth + 2 * paddingW()); resizeBuffer(newSize); inputPadding = reinterpret_cast(memory_->getBuf()); - Padding::run(inputData, - inputPadding, - batchSize * inputChannels, - inputHeight, - inputWidth, - paddingH(), - paddingW()); + neon::Padding::run(inputData, + inputPadding, + batchSize * inputChannels, + inputHeight, + inputWidth, + paddingH(), + paddingW()); // height and width of padding data inputHeight += 2 * paddingH(); diff --git a/paddle/function/neon/NeonDepthwiseConv.h b/paddle/function/neon/NeonDepthwiseConv.h index cb1abe1f32..3ceaa65ddb 100644 --- a/paddle/function/neon/NeonDepthwiseConv.h +++ b/paddle/function/neon/NeonDepthwiseConv.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "neon_util.h" namespace paddle { @@ -474,6 +475,97 @@ struct DepthwiseConvKernel<4, 2> { } }; +template +struct Padding { + static void run(const T* src, + T* dest, + int channels, + int inputHeight, + int inputWidth, + int paddingHeight, + int paddingWidth) { + const int destWidth = inputWidth + 2 * paddingWidth; + for (int c = 0; c < channels; c++) { + if (paddingHeight > 0) { + memset(dest, 0, destWidth * paddingHeight * sizeof(T)); + dest += destWidth * paddingHeight; + } + + for (int i = 0; i < inputHeight; i++) { + // padding head + for (int j = 0; j < paddingWidth; j++) { + *dest++ = T(0); + } + + memcpy(dest, src, inputWidth * sizeof(T)); + dest += inputWidth; + src += inputWidth; + + // padding tail + for (int j = 0; j < paddingWidth; j++) { + *dest++ = T(0); + } + } + + if (paddingHeight > 0) { + memset(dest, 0, destWidth * paddingHeight * sizeof(T)); + dest += destWidth * paddingHeight; + } + } + } +}; + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +template <> +struct Padding { + static void run(const float* src, + float* dest, + int channels, + int inputHeight, + int inputWidth, + int paddingHeight, + int paddingWidth) { + const int destWidth = inputWidth + 2 * paddingWidth; + for (int c = 0; c < channels; c++) { + if (paddingHeight > 0) { + memset(dest, 0, destWidth * paddingHeight * sizeof(float)); + dest += destWidth * paddingHeight; + } + + for (int i = 0; i < inputHeight; i++) { + // padding head + for (int j = 0; j < paddingWidth; j++) { + *dest++ = float(0); + } + + int step = inputWidth >> 2; + int remain = inputWidth & 3; + for (int s = 0; s < step; s++) { + float32x4_t s0 = vld1q_f32(src); + vst1q_f32(dest, s0); + src += 4; + dest += 4; + } + for (int r = 0; r < remain; r++) { + *dest++ = *src++; + } + + // padding tail + for (int j = 0; j < paddingWidth; j++) { + *dest++ = float(0); + } + } + + if (paddingHeight > 0) { + memset(dest, 0, destWidth * paddingHeight * sizeof(float)); + dest += destWidth * paddingHeight; + } + } + } +}; + +#endif + #endif } // namespace neon From 840104c99a59f3f970c71eea27382c09e0de6a28 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 31 Aug 2017 21:59:35 +0800 Subject: [PATCH 20/71] Add NeonDepthwiseConvTransposeFunction. --- paddle/function/neon/NeonDepthwiseConv.cpp | 19 ++- paddle/function/neon/NeonDepthwiseConv.h | 62 ++++----- .../neon/NeonDepthwiseConvTranspose.cpp | 124 ++++++++++++++++++ 3 files changed, 164 insertions(+), 41 deletions(-) create mode 100644 paddle/function/neon/NeonDepthwiseConvTranspose.cpp diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index bd9a56a8a5..18126152ea 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -64,9 +64,10 @@ public: // padding the input float* inputPadding = inputData; + int padInputHeight = inputHeight + 2 * paddingH(); + int padInputWidth = inputWidth + 2 * paddingW(); if (paddingH() > 0 || paddingW() > 0) { - int newSize = batchSize * inputChannels * (inputHeight + 2 * paddingH()) * - (inputWidth + 2 * paddingW()); + int newSize = batchSize * inputChannels * padInputHeight * padInputWidth; resizeBuffer(newSize); inputPadding = reinterpret_cast(memory_->getBuf()); neon::Padding::run(inputData, @@ -74,12 +75,8 @@ public: batchSize * inputChannels, inputHeight, inputWidth, - paddingH(), - paddingW()); - - // height and width of padding data - inputHeight += 2 * paddingH(); - inputWidth += 2 * paddingW(); + padInputHeight, + padInputWidth); } std::function { template struct Padding { - static void run(const T* src, - T* dest, + static void run(const T* input, + T* inputPadding, int channels, int inputHeight, int inputWidth, - int paddingHeight, - int paddingWidth) { - const int destWidth = inputWidth + 2 * paddingWidth; + int padInputHeight, + int padInputWidth) { + const int paddingHeight = (padInputHeight - inputHeight) / 2; + const int paddingWidth = (padInputWidth - inputWidth) / 2; for (int c = 0; c < channels; c++) { if (paddingHeight > 0) { - memset(dest, 0, destWidth * paddingHeight * sizeof(T)); - dest += destWidth * paddingHeight; + memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(T)); + inputPadding += padInputWidth * paddingHeight; } for (int i = 0; i < inputHeight; i++) { // padding head for (int j = 0; j < paddingWidth; j++) { - *dest++ = T(0); + *inputPadding++ = T(0); } - memcpy(dest, src, inputWidth * sizeof(T)); - dest += inputWidth; - src += inputWidth; + memcpy(inputPadding, input, inputWidth * sizeof(T)); + inputPadding += inputWidth; + input += inputWidth; // padding tail for (int j = 0; j < paddingWidth; j++) { - *dest++ = T(0); + *inputPadding++ = T(0); } } if (paddingHeight > 0) { - memset(dest, 0, destWidth * paddingHeight * sizeof(T)); - dest += destWidth * paddingHeight; + memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(T)); + inputPadding += padInputWidth * paddingHeight; } } } @@ -518,47 +519,48 @@ struct Padding { #if defined(__ARM_NEON__) || defined(__ARM_NEON) template <> struct Padding { - static void run(const float* src, - float* dest, + static void run(const float* input, + float* inputPadding, int channels, int inputHeight, int inputWidth, - int paddingHeight, - int paddingWidth) { - const int destWidth = inputWidth + 2 * paddingWidth; + int padInputHeight, + int padInputWidth) { + const int paddingHeight = (padInputHeight - inputHeight) / 2; + const int paddingWidth = (padInputWidth - inputWidth) / 2; for (int c = 0; c < channels; c++) { if (paddingHeight > 0) { - memset(dest, 0, destWidth * paddingHeight * sizeof(float)); - dest += destWidth * paddingHeight; + memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(float)); + inputPadding += padInputWidth * paddingHeight; } for (int i = 0; i < inputHeight; i++) { // padding head for (int j = 0; j < paddingWidth; j++) { - *dest++ = float(0); + *inputPadding++ = float(0); } int step = inputWidth >> 2; int remain = inputWidth & 3; for (int s = 0; s < step; s++) { - float32x4_t s0 = vld1q_f32(src); - vst1q_f32(dest, s0); - src += 4; - dest += 4; + float32x4_t s0 = vld1q_f32(input); + vst1q_f32(inputPadding, s0); + input += 4; + inputPadding += 4; } for (int r = 0; r < remain; r++) { - *dest++ = *src++; + *inputPadding++ = *input++; } // padding tail for (int j = 0; j < paddingWidth; j++) { - *dest++ = float(0); + *inputPadding++ = float(0); } } if (paddingHeight > 0) { - memset(dest, 0, destWidth * paddingHeight * sizeof(float)); - dest += destWidth * paddingHeight; + memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(float)); + inputPadding += padInputWidth * paddingHeight; } } } diff --git a/paddle/function/neon/NeonDepthwiseConvTranspose.cpp b/paddle/function/neon/NeonDepthwiseConvTranspose.cpp new file mode 100644 index 0000000000..03d571ecfe --- /dev/null +++ b/paddle/function/neon/NeonDepthwiseConvTranspose.cpp @@ -0,0 +1,124 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "NeonDepthwiseConv.h" +#include "paddle/function/ConvOp.h" + +namespace paddle { + +#if defined(__ARM_NEON__) || defined(__ARM_NEON) + +template +class NeonDepthwiseConvTransposeFunction : public ConvFunctionBase { +public: + void init(const FuncConfig& config) override { + ConvFunctionBase::init(config); + } + + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { + const TensorShape& input = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& output = outputs[0].shape(); + checkShape(input, filter, output); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(numInputs_, inputs.size()); + CHECK_EQ(numOutputs_, outputs.size()); + check(inputs, outputs); + + const TensorShape& input = inputs[0].shape(); + const TensorShape& filter = inputs[1].shape(); + const TensorShape& output = outputs[0].shape(); + + int batchSize = input[0]; + int inputChannels = input[1]; + int inputHeight = input[2]; + int inputWidth = input[3]; + int filterHeight = getFilterHeight(filter); + int filterWidth = getFilterWidth(filter); + int outputChannels = output[1]; + int outputHeight = output[2]; + int outputWidth = output[3]; + int filterMultiplier = outputChannels / groups_; + CHECK_EQ(inputChannels, groups_); + + // only support strideH() == strideW() and filterHeight == filterWidth. + CHECK_EQ(strideH(), strideW()); + CHECK_EQ(paddingH(), paddingW()); + CHECK_EQ(filterHeight, filterWidth); + + float* inputData = inputs[0].data(); + float* filterData = inputs[1].data(); + float* outputData = outputs[0].data(); + + // padding the input, input -> inputPadding + float* inputPadding = inputData; + int padInputHeight = + (inputHeight - 1) * strideH() + 2 * filterHeight - 1 - 2 * paddingH(); + int padInputWidth = + (inputWidth - 1) * strideW() + 2 * filterWidth - 1 - 2 * paddingW(); + + if (padInputHeight > inputHeight || padInputWidth > inputWidth) { + int newSize = batchSize * inputChannels * padInputHeight * padInputWidth; + resizeBuffer(newSize); + inputPadding = reinterpret_cast(memory_->getBuf()); + neon::Padding::run(inputData, + inputPadding, + batchSize * inputChannels, + inputHeight, + inputWidth, + padInputHeight, + padInputWidth); + } + + std::function + DepthWiseConv; + + if (filterWidth == 3) { + DepthWiseConv = neon::DepthwiseConvKernel<3, 1>::run; + } else if (filterWidth == 4) { + DepthWiseConv = neon::DepthwiseConvKernel<4, 1>::run; + } else { + LOG(FATAL) << "Not supported"; + } + + for (int i = 0; i < batchSize; i++) { + DepthWiseConv(inputPadding, + filterData, + padInputHeight, + padInputWidth, + outputChannels, + outputHeight, + outputWidth, + filterMultiplier, + outputData); + inputPadding += inputChannels * padInputHeight * padInputWidth; + outputData += outputChannels * outputHeight * outputWidth; + } + } +}; + +#ifndef PADDLE_TYPE_DOUBLE + +REGISTER_TYPED_FUNC(NeonDepthwiseConvTranspose, + CPU, + NeonDepthwiseConvTransposeFunction); + +#endif + +#endif + +} // namespace paddle From 6bef079660f689a1b9c061e31c8273de353f98da Mon Sep 17 00:00:00 2001 From: yangyaming Date: Thu, 31 Aug 2017 22:31:34 +0800 Subject: [PATCH 21/71] Follow coding style and move reshaping operation to paddle tensor. --- paddle/operators/squared_l2_distance_op.cc | 47 ++--- paddle/operators/squared_l2_distance_op.h | 170 ++++++------------ .../tests/test_squared_l2_distance_op.py | 10 ++ 3 files changed, 92 insertions(+), 135 deletions(-) diff --git a/paddle/operators/squared_l2_distance_op.cc b/paddle/operators/squared_l2_distance_op.cc index 3049f0f8ba..b19c274dcc 100644 --- a/paddle/operators/squared_l2_distance_op.cc +++ b/paddle/operators/squared_l2_distance_op.cc @@ -30,22 +30,27 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { "Target of SquaredL2DistanceOp " "must be initialized."); - auto* X = ctx.Input("X"); - auto xDims = X->dims(); - auto* Y = ctx.Input("Y"); - auto yDims = Y->dims(); + auto* x = ctx.Input("X"); + auto x_dims = x->dims(); + auto* y = ctx.Input("Y"); + auto y_dims = y->dims(); - PADDLE_ENFORCE_EQ(framework::arity(xDims), framework::arity(yDims), + PADDLE_ENFORCE_EQ(framework::arity(x_dims), framework::arity(y_dims), "Tensor rank of both SquaredL2DistanceOp's " "inputs must be same."); - int rank = framework::arity(xDims); - PADDLE_ENFORCE(rank >= 2 || rank <= 6, "Tensor rank should be in [2, 6]."); - PADDLE_ENFORCE(yDims[0] == 1 || yDims[0] == xDims[0], + + int rank = framework::arity(x_dims); + PADDLE_ENFORCE(rank >= 2, "Tensor rank should be at least equal to 2."); + PADDLE_ENFORCE_EQ(framework::product(x_dims) / x_dims[0], + framework::product(y_dims) / y_dims[0], + "Product of dimensions expcet the first dimension of " + "input and target must be equal."); + PADDLE_ENFORCE(y_dims[0] == 1 || y_dims[0] == x_dims[0], "First dimension of target must be equal to input " "or to 1."); - ctx.Output("sub_result")->Resize(xDims); - ctx.Output("Out")->Resize({xDims[0], 1}); + ctx.Output("sub_result")->Resize(x_dims); + ctx.Output("Out")->Resize({x_dims[0], 1}); } }; @@ -66,8 +71,8 @@ class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker { input and target. Number of distance value equals to the first dimension of input. First dimension of target could be equal to input or to 1. If the first dimension of target is 1, SquaredL2DistanceOp - will broadcast the first dimension to the first dimension of input. - You can decide whether calculate the gradient of target. + will broadcast target's first dimension to input's first dimension. + You can decide whether calculate the gradient of input and target. )DOC"); } }; @@ -81,19 +86,19 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), "Gradient of Out should not be null"); // check out grad dimensions - auto outDims = ctx.Input(framework::GradVarName("Out"))->dims(); - auto xDims = ctx.Input("X")->dims(); - auto yDims = ctx.Input("Y")->dims(); - PADDLE_ENFORCE_EQ(outDims[0], xDims[0], + auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); + auto x_dims = ctx.Input("X")->dims(); + auto y_dims = ctx.Input("Y")->dims(); + PADDLE_ENFORCE_EQ(out_dims[0], x_dims[0], "First dimension of output gradient and " "input value must be equal."); - PADDLE_ENFORCE_EQ(outDims[1], 1, + PADDLE_ENFORCE_EQ(out_dims[1], 1, "Second dimension of output gradient " "must be 1."); - auto* xGrad = ctx.Output(framework::GradVarName("X")); - auto* yGrad = ctx.Output(framework::GradVarName("Y")); - if (xGrad != nullptr) xGrad->Resize(xDims); - if (yGrad != nullptr) yGrad->Resize(yDims); + auto* x_grad = ctx.Output(framework::GradVarName("X")); + auto* y_grad = ctx.Output(framework::GradVarName("Y")); + if (x_grad != nullptr) x_grad->Resize(x_dims); + if (y_grad != nullptr) y_grad->Resize(y_dims); } }; diff --git a/paddle/operators/squared_l2_distance_op.h b/paddle/operators/squared_l2_distance_op.h index e95364c706..ec8c34ddf8 100644 --- a/paddle/operators/squared_l2_distance_op.h +++ b/paddle/operators/squared_l2_distance_op.h @@ -20,9 +20,6 @@ namespace paddle { namespace operators { using Tensor = framework::Tensor; -template -using EigenTensor = framework::EigenTensor; template using EigenMatrix = framework::EigenMatrix; @@ -31,64 +28,39 @@ template class SquaredL2DistanceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* input0 = context.Input("X"); - const int rank = framework::arity(input0->dims()); - switch (rank) { - case 2: - Operate<2>(context); - break; - case 3: - Operate<3>(context); - break; - case 4: - Operate<4>(context); - break; - case 5: - Operate<5>(context); - break; - case 6: - Operate<6>(context); - break; - default: - // already asserted in SquaredL2DistanceOpMaker - break; - } - } - - private: - template - void Operate(const framework::ExecutionContext& context) const { - auto* input0 = context.Input("X"); - auto* input1 = context.Input("Y"); - auto* output0 = context.Output("sub_result"); - auto* output1 = context.Output("Out"); - - output0->mutable_data(context.GetPlace()); - output1->mutable_data(context.GetPlace()); - - auto X = EigenTensor::From(*input0); - auto Y = EigenTensor::From(*input1); - auto subResult = EigenTensor::From(*output0); - auto Z = EigenMatrix::From(*output1); - - auto xDims = X.dimensions(); - auto yDims = Y.dimensions(); + auto* in0 = context.Input("X"); + auto* in1 = context.Input("Y"); + auto* out0 = context.Output("sub_result"); + auto* out1 = context.Output("Out"); + + auto in0_dims = in0->dims(); + auto in1_dims = in1->dims(); + + int cols = framework::product(in0_dims) / in0_dims[0]; + // reduce dimensions except the first + auto x = + EigenMatrix::From(*in0, framework::make_ddim({in0_dims[0], cols})); + auto y = + EigenMatrix::From(*in1, framework::make_ddim({in1_dims[0], cols})); + + out0->mutable_data(context.GetPlace()); + out1->mutable_data(context.GetPlace()); + auto sub_result = EigenMatrix::From(*out0); + auto z = EigenMatrix::From(*out1); auto place = context.GetEigenDevice(); - + auto x_dims = x.dimensions(); + auto y_dims = y.dimensions(); // buffer the substraction result - if (yDims[0] == 1 && xDims[0] != yDims[0]) { - auto yBroadcastDims = yDims; - yBroadcastDims[0] = xDims[0]; - subResult.device(place) = X - Y.broadcast(yBroadcastDims); + if (y_dims[0] == 1 && x_dims[0] > y_dims[0]) { + auto y_broadcast_dims = y_dims; + y_broadcast_dims[0] = x_dims[0]; + sub_result.device(place) = x - y.broadcast(y_broadcast_dims); } else { - subResult.device(place) = X - Y; + sub_result.device(place) = x - y; } - // create matrix view for substraction result - const auto& subResMat = subResult.reshape(Eigen::array( - {static_cast(xDims[0]), static_cast(X.size() / xDims[0])})); - Z.device(place) = subResMat.pow(2).sum(Eigen::array({1})); + z.device(place) = sub_result.pow(2).sum(Eigen::array({1})); } }; @@ -96,77 +68,47 @@ template class SquaredL2DistanceGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* input0 = context.Input("sub_result"); - const int rank = framework::arity(input0->dims()); - switch (rank) { - case 2: - Operate<2>(context); - break; - case 3: - Operate<3>(context); - break; - case 4: - Operate<4>(context); - break; - case 5: - Operate<5>(context); - break; - case 6: - Operate<6>(context); - break; - default: - // already asserted in SquaredL2DistanceOpMaker - break; - } - } + auto* in0 = context.Input("sub_result"); + auto* in1 = context.Input(framework::GradVarName("Out")); + auto* x_g = context.Output(framework::GradVarName("X")); + auto* y_g = context.Output(framework::GradVarName("Y")); - private: - template - void Operate(const framework::ExecutionContext& context) const { - auto* input0 = context.Input("sub_result"); - auto* OG = context.Input(framework::GradVarName("Out")); - auto* XG = context.Output(framework::GradVarName("X")); - auto* YG = context.Output(framework::GradVarName("Y")); + auto sub_result = EigenMatrix::From(*in0); + auto out_grad = EigenMatrix::From(*in1); - auto subResult = EigenTensor::From(*input0); - auto outGrad = EigenMatrix::From(*OG); - - auto subResDims = subResult.dimensions(); - int firstDim = static_cast(subResDims[0]); - int cols = subResult.size() / firstDim; - const auto subResMat = - subResult.reshape(Eigen::array({firstDim, cols})); + auto x_dims = x_g->dims(); + auto y_dims = y_g->dims(); + int cols = framework::product(x_dims) / x_dims[0]; // calculate gradient - auto gradMat = - 2 * (outGrad.broadcast(Eigen::array({1, cols}))) * subResMat; + auto grad_mat = + 2 * (out_grad.broadcast(Eigen::array({1, cols}))) * sub_result; // propagate back to input - auto eigenPlace = context.GetEigenDevice(); - if (XG != nullptr) { - XG->mutable_data(context.GetPlace()); - auto xGrad = EigenTensor::From(*XG); + auto eigen_place = context.GetEigenDevice(); + if (x_g != nullptr) { + x_g->mutable_data(context.GetPlace()); + // eigen matrix + auto x_grad = + EigenMatrix::From(*x_g, framework::make_ddim({x_dims[0], cols})); // dimensions are same with subResult - auto xGradMat = xGrad.reshape(Eigen::array({firstDim, cols})); - xGradMat.device(eigenPlace) = gradMat; + x_grad.device(eigen_place) = grad_mat; } - if (YG != nullptr) { - YG->mutable_data(context.GetPlace()); - auto yGrad = EigenTensor::From(*YG); - auto dimsYGrad = yGrad.dimensions(); - auto yGradMat = yGrad.reshape(Eigen::array( - {static_cast(dimsYGrad[0]), - static_cast(yGrad.size() / dimsYGrad[0])})); - - PADDLE_ENFORCE(dimsYGrad[0] <= firstDim, + + if (y_g != nullptr) { + y_g->mutable_data(context.GetPlace()); + auto y_grad = + EigenMatrix::From(*y_g, framework::make_ddim({y_dims[0], cols})); + + PADDLE_ENFORCE(sub_result.dimensions()[0] >= y_dims[0], "First dimension of gradient must be greater or " "equal than first dimension of target"); - if (dimsYGrad[0] == firstDim) { - yGradMat.device(eigenPlace) = -1 * gradMat; + if (sub_result.dimensions()[0] == y_dims[0]) { + y_grad.device(eigen_place) = -1 * grad_mat; } else { - yGradMat.device(eigenPlace) = - -1 * (gradMat.sum(Eigen::array({0}))); + y_grad.device(eigen_place) = + -1 * (grad_mat.sum(Eigen::array({0}))); } } } diff --git a/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py b/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py index eeddb5a3bf..51c95b286a 100644 --- a/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py +++ b/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py @@ -21,5 +21,15 @@ class TestSquaredL2DistanceOp(unittest.TestCase): } +class TestSquaredL2DistanceGradOp(GradientChecker): + def test_squared_l2_distance(self): + op = create_op("squared_l2_distance") + inputs = { + 'X': np.random.uniform(0.1, 1., (2, 3)).astype('float32'), + 'Y': np.random.uniform(0.1, 1., (2, 3)).astype('float32') + } + self.check_grad(op, inputs, set(["X", "Y"]), "Out") + + if __name__ == '__main__': unittest.main() From 90bf4f60aea012a3eeb819fe4655069d66dbe6e6 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 31 Aug 2017 23:59:58 +0800 Subject: [PATCH 22/71] Add stride support 2 for NeonDepthwiseConvTranspose. --- paddle/function/neon/NeonDepthwiseConv.h | 57 +++++++++++++++++++ .../neon/NeonDepthwiseConvTranspose.cpp | 26 ++++++--- 2 files changed, 76 insertions(+), 7 deletions(-) diff --git a/paddle/function/neon/NeonDepthwiseConv.h b/paddle/function/neon/NeonDepthwiseConv.h index 30f0158c61..aefeea78ba 100644 --- a/paddle/function/neon/NeonDepthwiseConv.h +++ b/paddle/function/neon/NeonDepthwiseConv.h @@ -566,6 +566,63 @@ struct Padding { } }; +// for stride is 2 +struct StridePadding { + static void run(const float* input, + float* inputPadding, + int channels, + int inputHeight, + int inputWidth, + int padInputHeight, + int padInputWidth) { + const int paddingHeight = (padInputHeight - (inputHeight * 2 - 1)) / 2; + const int paddingWidth = (padInputWidth - (inputWidth * 2 - 1)) / 2; + for (int c = 0; c < channels; c++) { + if (paddingHeight > 0) { + memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(float)); + inputPadding += padInputWidth * paddingHeight; + } + + for (int i = 0; i < inputHeight; i++) { + // padding head + for (int j = 0; j < paddingWidth; j++) { + *inputPadding++ = float(0); + } + + int step = inputWidth >> 2; + int remain = inputWidth & 3; + float32x4_t s1 = vdupq_n_f32(0.f); + for (int s = 0; s < step; s++) { + float32x4_t s0 = vld1q_f32(input); + float32x4x2_t v = {s0, s1}; + vst2q_f32(inputPadding, v); + input += 4; + inputPadding += 8; + } + for (int r = 0; r < remain; r++) { + *inputPadding++ = *input++; + *inputPadding++ = float(0); + } + inputPadding--; + + // padding tail + for (int j = 0; j < paddingWidth; j++) { + *inputPadding++ = float(0); + } + if (i != inputHeight - 1) { + memset(inputPadding, 0, padInputWidth * sizeof(float)); + inputPadding += padInputWidth; + } + } + + if (paddingHeight > 0) { + memset(inputPadding, 0, padInputWidth * paddingHeight * sizeof(float)); + inputPadding += padInputWidth * paddingHeight; + } + } + } +}; + #endif #endif diff --git a/paddle/function/neon/NeonDepthwiseConvTranspose.cpp b/paddle/function/neon/NeonDepthwiseConvTranspose.cpp index 03d571ecfe..49ca4bc8a0 100644 --- a/paddle/function/neon/NeonDepthwiseConvTranspose.cpp +++ b/paddle/function/neon/NeonDepthwiseConvTranspose.cpp @@ -74,13 +74,25 @@ public: int newSize = batchSize * inputChannels * padInputHeight * padInputWidth; resizeBuffer(newSize); inputPadding = reinterpret_cast(memory_->getBuf()); - neon::Padding::run(inputData, - inputPadding, - batchSize * inputChannels, - inputHeight, - inputWidth, - padInputHeight, - padInputWidth); + if (strideH() == 1) { + neon::Padding::run(inputData, + inputPadding, + batchSize * inputChannels, + inputHeight, + inputWidth, + padInputHeight, + padInputWidth); + } else if (strideH() == 2) { + neon::StridePadding::run(inputData, + inputPadding, + batchSize * inputChannels, + inputHeight, + inputWidth, + padInputHeight, + padInputWidth); + } else { + LOG(FATAL) << "Not supported"; + } } std::function Date: Fri, 1 Sep 2017 16:01:53 +0800 Subject: [PATCH 23/71] Use template to deliver const argument instead, to remove the compiling error "argument to __builtin_neon_vgetq_lane_f32 must be a constant integer". --- paddle/function/neon/NeonDepthwiseConv.cpp | 100 ++++++++++----------- paddle/function/neon/neon_util.h | 4 +- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index f09e98587d..14e5198e1b 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -116,15 +116,15 @@ struct DepthwiseConvKernel<3, 1> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); + tmp2 = vmlaq_laneq_f32<0>(tmp2, input[1][0], k[1]); + tmp1 = vmlaq_laneq_f32<1>(tmp1, input[1][1], k[1]); + tmp2 = vmlaq_laneq_f32<2>(tmp2, input[1][2], k[1]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); @@ -223,15 +223,15 @@ struct DepthwiseConvKernel<3, 2> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); + tmp2 = vmlaq_laneq_f32<0>(tmp2, input[1][0], k[1]); + tmp1 = vmlaq_laneq_f32<1>(tmp1, input[1][1], k[1]); + tmp2 = vmlaq_laneq_f32<2>(tmp2, input[1][2], k[1]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); @@ -316,22 +316,22 @@ struct DepthwiseConvKernel<4, 1> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[0][3], k[0]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[1][0], k[1]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[1][1], k[1]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[1][2], k[1]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[1][3], k[1]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[2][3], k[2]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[3][0], k[3]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[3][1], k[3]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[3][2], k[3]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[3][3], k[3]); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); @@ -431,22 +431,22 @@ struct DepthwiseConvKernel<4, 2> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); - tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); - tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[0][3], k[0]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[1][0], k[1]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[1][1], k[1]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[1][2], k[1]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[1][3], k[1]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[2][3], k[2]); + tmp1 = vmlaq_laneq_f32<0>(tmp1, input[3][0], k[3]); + tmp2 = vmlaq_laneq_f32<1>(tmp2, input[3][1], k[3]); + tmp1 = vmlaq_laneq_f32<2>(tmp1, input[3][2], k[3]); + tmp2 = vmlaq_laneq_f32<3>(tmp2, input[3][3], k[3]); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); diff --git a/paddle/function/neon/neon_util.h b/paddle/function/neon/neon_util.h index 56b3febe2d..dbe017170b 100644 --- a/paddle/function/neon/neon_util.h +++ b/paddle/function/neon/neon_util.h @@ -33,10 +33,10 @@ inline float32_t vaddvq_f32(float32x4_t a) { return vget_lane_f32(vpadd_f32(v, v), 0); } +template inline float32x4_t vmlaq_laneq_f32(float32x4_t a, float32x4_t b, - float32x4_t v, - const int lane) { + float32x4_t v) { return vmlaq_n_f32(a, b, vgetq_lane_f32(v, lane)); } #endif From 8b15ac82fa831f95493c2bd218b93655db0d739e Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Fri, 1 Sep 2017 17:50:01 +0800 Subject: [PATCH 24/71] Move the definition of hl_cpu_gru_forward and hl_cpu_gru_backward to function/GruFunctor.h. --- paddle/cuda/include/hl_cpu_gru.cuh | 134 --------------------- paddle/function/GruFunctor.h | 160 +++++++++++++++++++++++++ paddle/gserver/layers/GruCompute.cpp | 32 ++--- paddle/scripts/docker/build_android.sh | 25 +--- 4 files changed, 181 insertions(+), 170 deletions(-) create mode 100644 paddle/function/GruFunctor.h diff --git a/paddle/cuda/include/hl_cpu_gru.cuh b/paddle/cuda/include/hl_cpu_gru.cuh index 732799a28b..347b038598 100644 --- a/paddle/cuda/include/hl_cpu_gru.cuh +++ b/paddle/cuda/include/hl_cpu_gru.cuh @@ -18,14 +18,6 @@ limitations under the License. */ #ifndef __NVCC__ -#include "paddle/math/MathFunctions.h" - -// #ifndef PADDLE_TYPE_DOUBLE -// #define CBLAS_GEMM paddle::gemm -// #else -// #define CBLAS_GEMM paddle::gemm -// #endif - template void hl_naive_gru_forward_reset_output(OpResetOutput opResetOutput, real *gateValue, @@ -210,51 +202,6 @@ inline void forward_final_output(OpFinalOutput opFinalOutput, } } -template -void hl_cpu_gru_forward(OpResetOutput opResetOutput, - OpFinalOutput opFinalOutput, - hl_gru_value value, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) { - if (value.prevOutValue) { -// CBLAS_GEMM(CblasNoTrans, -// CblasNoTrans, -// batchSize, -// 2 * frameSize, -// frameSize, -// 1, -// value.prevOutValue, -// frameSize, -// value.gateWeight, -// frameSize * 2, -// 1, -// value.gateValue, -// frameSize * 3); - } - - forward_reset_output(opResetOutput, value, frameSize, batchSize, active_gate); - - if (value.prevOutValue) { -// CBLAS_GEMM(CblasNoTrans, -// CblasNoTrans, -// batchSize, -// frameSize, -// frameSize, -// 1, -// value.resetOutputValue, -// frameSize, -// value.stateWeight, -// frameSize, -// 1, -// value.gateValue + frameSize * 2, -// frameSize * 3); - } - - forward_final_output(opFinalOutput, value, frameSize, batchSize, active_node); -} - template void hl_naive_gru_backward_state_grad(OpStateGrad opStateGrad, real *gateValue, @@ -524,87 +471,6 @@ inline void backward_reset_grad(OpResetGrad opResetGrad, } } } - -template -void hl_cpu_gru_backward(OpStateGrad opStateGrad, - OpResetGrad opResetGrad, - hl_gru_value value, - hl_gru_grad grad, - int frameSize, - int batchSize, - hl_activation_mode_t active_node, - hl_activation_mode_t active_gate) { - backward_state_grad(opStateGrad, value, grad, - frameSize, batchSize, active_node); - - if (value.prevOutValue && grad.prevOutGrad) { -// CBLAS_GEMM(CblasNoTrans, -// CblasTrans, -// batchSize, -// frameSize, -// frameSize, -// 1, -// grad.gateGrad + frameSize * 2, -// frameSize * 3, -// value.stateWeight, -// frameSize, -// 0, -// grad.resetOutputGrad, -// frameSize); - - if (grad.stateWeightGrad) { -// CBLAS_GEMM(CblasTrans, -// CblasNoTrans, -// frameSize, -// frameSize, -// batchSize, -// 1, -// value.resetOutputValue, -// frameSize, -// grad.gateGrad + frameSize * 2, -// frameSize * 3, -// 1, -// grad.stateWeightGrad, -// frameSize); - } - } - - backward_reset_grad(opResetGrad, value, grad, - frameSize, batchSize, active_gate); - - if (grad.prevOutGrad && value.prevOutValue) { -// CBLAS_GEMM(CblasNoTrans, -// CblasTrans, -// batchSize, -// frameSize, -// frameSize * 2, -// 1, -// grad.gateGrad, -// frameSize * 3, -// value.gateWeight, -// frameSize * 2, -// 1, -// grad.prevOutGrad, -// frameSize); - - if (grad.gateWeightGrad) { -// CBLAS_GEMM(CblasTrans, -// CblasNoTrans, -// frameSize, -// frameSize * 2, -// batchSize, -// 1, -// value.prevOutValue, -// frameSize, -// grad.gateGrad, -// frameSize * 3, -// 1, -// grad.gateWeightGrad, -// frameSize * 2); - } - } -} - #endif #endif // HL_CPU_GRU_CUH_ diff --git a/paddle/function/GruFunctor.h b/paddle/function/GruFunctor.h new file mode 100644 index 0000000000..11f6174dbd --- /dev/null +++ b/paddle/function/GruFunctor.h @@ -0,0 +1,160 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "GemmFunctor.h" +#include "GruFunctor.h" +#include "hl_cpu_gru.cuh" + +namespace paddle { + +template +struct GruFunctor { + template + static void compute(OpResetOutput opResetOutput, + OpFinalOutput opFinalOutput, + hl_gru_value value, + int frameSize, + int batchSize, + hl_activation_mode_t active_node, + hl_activation_mode_t active_gate) { +#ifndef __NVCC__ + if (value.prevOutValue) { + BlasGemm::compute(false, + false, + batchSize, + 2 * frameSize, + frameSize, + 1, + value.prevOutValue, + frameSize, + value.gateWeight, + frameSize * 2, + 1, + value.gateValue, + frameSize * 3); + } + + forward_reset_output( + opResetOutput, value, frameSize, batchSize, active_gate); + + if (value.prevOutValue) { + BlasGemm::compute(false, + false, + batchSize, + frameSize, + frameSize, + 1, + value.resetOutputValue, + frameSize, + value.stateWeight, + frameSize, + 1, + value.gateValue + frameSize * 2, + frameSize * 3); + } + + forward_final_output( + opFinalOutput, value, frameSize, batchSize, active_node); +#endif + } +}; + +template +struct GruGradFunctor { + template + static void compute(OpStateGrad opStateGrad, + OpResetGrad opResetGrad, + hl_gru_value value, + hl_gru_grad grad, + int frameSize, + int batchSize, + hl_activation_mode_t active_node, + hl_activation_mode_t active_gate) { +#ifndef __NVCC__ + backward_state_grad( + opStateGrad, value, grad, frameSize, batchSize, active_node); + + if (value.prevOutValue && grad.prevOutGrad) { + BlasGemm::compute(false, + true, + batchSize, + frameSize, + frameSize, + 1, + grad.gateGrad + frameSize * 2, + frameSize * 3, + value.stateWeight, + frameSize, + 0, + grad.resetOutputGrad, + frameSize); + + if (grad.stateWeightGrad) { + BlasGemm::compute(true, + false, + frameSize, + frameSize, + batchSize, + 1, + value.resetOutputValue, + frameSize, + grad.gateGrad + frameSize * 2, + frameSize * 3, + 1, + grad.stateWeightGrad, + frameSize); + } + } + + backward_reset_grad( + opResetGrad, value, grad, frameSize, batchSize, active_gate); + + if (grad.prevOutGrad && value.prevOutValue) { + BlasGemm::compute(false, + true, + batchSize, + frameSize, + frameSize * 2, + 1, + grad.gateGrad, + frameSize * 3, + value.gateWeight, + frameSize * 2, + 1, + grad.prevOutGrad, + frameSize); + + if (grad.gateWeightGrad) { + BlasGemm::compute(true, + false, + frameSize, + frameSize * 2, + batchSize, + 1, + value.prevOutValue, + frameSize, + grad.gateGrad, + frameSize * 3, + 1, + grad.gateWeightGrad, + frameSize * 2); + } + } +#endif + } +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/GruCompute.cpp b/paddle/gserver/layers/GruCompute.cpp index 06907768e9..148516391c 100644 --- a/paddle/gserver/layers/GruCompute.cpp +++ b/paddle/gserver/layers/GruCompute.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "GruCompute.h" #include "hl_recurrent_apply.cuh" +#include "paddle/function/GruFunctor.h" #include "paddle/utils/Util.h" namespace paddle { @@ -25,13 +26,13 @@ void GruCompute::init(LayerConfig &config) { template <> void GruCompute::forward<0>(hl_gru_value value, int frameSize, int batchSize) { - hl_cpu_gru_forward(hppl::forward::gru_resetOutput(), - hppl::forward::gru_finalOutput(), - value, - frameSize, - batchSize, - activeNode_, - activeGate_); + GruFunctor::compute(hppl::forward::gru_resetOutput(), + hppl::forward::gru_finalOutput(), + value, + frameSize, + batchSize, + activeNode_, + activeGate_); } template <> @@ -39,14 +40,15 @@ void GruCompute::backward<0>(hl_gru_value value, hl_gru_grad grad, int frameSize, int batchSize) { - hl_cpu_gru_backward(hppl::backward::gru_stateGrad(), - hppl::backward::gru_resetGrad(), - value, - grad, - frameSize, - batchSize, - activeNode_, - activeGate_); + GruGradFunctor::compute( + hppl::backward::gru_stateGrad(), + hppl::backward::gru_resetGrad(), + value, + grad, + frameSize, + batchSize, + activeNode_, + activeGate_); } } // namespace paddle diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index a61c7c40e9..34e31f1394 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -2,25 +2,8 @@ set -xe -COMPILER=gcc -USE_EIGEN=ON -if [ $COMPILER == clang ]; then - SUFFIX=_clang - C_COMPILER=clang - CXX_COMPILER=clang++ -else - SUFFIX=_gcc - C_COMPILER=gcc - CXX_COMPILER=g++ -fi -if [ $USE_EIGEN == ON ]; then - SUFFIX=${SUFFIX}_eigen -else - SUFFIX=${SUFFIX}_openblas -fi - -BUILD_ROOT=/paddle/build_android$SUFFIX -DEST_ROOT=/paddle/install$SUFFIX +BUILD_ROOT=/paddle/build_android +DEST_ROOT=/paddle/install rm -rf $BUILD_ROOT 2>/dev/null || true mkdir -p $BUILD_ROOT @@ -41,7 +24,7 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ - -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \ + -DUSE_EIGEN_FOR_BLAS=ON \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ -DWITH_STYLE_CHECK=OFF \ @@ -58,7 +41,7 @@ elif [ $ANDROID_ABI == "arm64-v8a" ]; then -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ - -DUSE_EIGEN_FOR_BLAS=${USE_EIGEN} \ + -DUSE_EIGEN_FOR_BLAS=OFF \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ .. From 7939705384751b7fbbcf6d9c334363b8f7fbd763 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Fri, 1 Sep 2017 18:34:18 +0800 Subject: [PATCH 25/71] Add the missing return statement. --- paddle/math/MathFunctions.h | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/math/MathFunctions.h b/paddle/math/MathFunctions.h index 9297ae78c2..e8ea6e37ac 100644 --- a/paddle/math/MathFunctions.h +++ b/paddle/math/MathFunctions.h @@ -106,6 +106,7 @@ T dotProduct(const int n, const T* x, const T* y) { for (int i = 0; i < n; i++) { result += x[i] * y[i]; } + return result; } template From 9d2909be2abcb9b8728f7c4de7437c07a1254b8b Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 1 Sep 2017 19:27:52 +0800 Subject: [PATCH 26/71] rename add_op to add_two_op --- paddle/framework/CMakeLists.txt | 2 +- paddle/operators/{add_op.cc => add_two_op.cc} | 2 +- paddle/operators/{add_op.cu => add_two_op.cu} | 2 +- paddle/operators/{add_op.h => add_two_op.h} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename paddle/operators/{add_op.cc => add_two_op.cc} (98%) rename paddle/operators/{add_op.cu => add_two_op.cu} (95%) rename paddle/operators/{add_op.h => add_two_op.h} (100%) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index c0838d9b75..e138517b6b 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -25,7 +25,7 @@ cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator) cc_library(op_registry SRCS op_registry.cc DEPS grad_op_builder) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) -cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_op) +cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_two_op) py_proto_compile(framework_py_proto SRCS framework.proto) # Generate an empty __init__.py to make framework_py_proto as a valid python module. diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_two_op.cc similarity index 98% rename from paddle/operators/add_op.cc rename to paddle/operators/add_two_op.cc index 8ab748ed71..bc99e306e0 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_two_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/add_op.h" +#include "paddle/operators/add_two_op.h" namespace paddle { namespace operators { diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_two_op.cu similarity index 95% rename from paddle/operators/add_op.cu rename to paddle/operators/add_two_op.cu index cec5f558cb..acc03b2c8b 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_two_op.cu @@ -14,7 +14,7 @@ #define EIGEN_USE_GPU #include "paddle/framework/op_registry.h" -#include "paddle/operators/add_op.h" +#include "paddle/operators/add_two_op.h" namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(add_two, diff --git a/paddle/operators/add_op.h b/paddle/operators/add_two_op.h similarity index 100% rename from paddle/operators/add_op.h rename to paddle/operators/add_two_op.h From c33ddc74c1062af7585b6d923acbbcc6299335a5 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 1 Sep 2017 23:00:15 +0800 Subject: [PATCH 27/71] Fix some bugs, add more unittests. --- paddle/operators/squared_l2_distance_op.cc | 8 ++- paddle/operators/squared_l2_distance_op.h | 19 ++--- .../tests/test_squared_l2_distance_op.py | 72 ++++++++++++++++--- 3 files changed, 79 insertions(+), 20 deletions(-) diff --git a/paddle/operators/squared_l2_distance_op.cc b/paddle/operators/squared_l2_distance_op.cc index b19c274dcc..694b00e493 100644 --- a/paddle/operators/squared_l2_distance_op.cc +++ b/paddle/operators/squared_l2_distance_op.cc @@ -49,7 +49,9 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { "First dimension of target must be equal to input " "or to 1."); - ctx.Output("sub_result")->Resize(x_dims); + ctx.Output("sub_result") + ->Resize({static_cast(x_dims[0]), + static_cast(framework::product(x_dims) / x_dims[0])}); ctx.Output("Out")->Resize({x_dims[0], 1}); } }; @@ -97,8 +99,8 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { "must be 1."); auto* x_grad = ctx.Output(framework::GradVarName("X")); auto* y_grad = ctx.Output(framework::GradVarName("Y")); - if (x_grad != nullptr) x_grad->Resize(x_dims); - if (y_grad != nullptr) y_grad->Resize(y_dims); + if (x_grad) x_grad->Resize(x_dims); + if (y_grad) y_grad->Resize(y_dims); } }; diff --git a/paddle/operators/squared_l2_distance_op.h b/paddle/operators/squared_l2_distance_op.h index ec8c34ddf8..97907768f7 100644 --- a/paddle/operators/squared_l2_distance_op.h +++ b/paddle/operators/squared_l2_distance_op.h @@ -53,14 +53,16 @@ class SquaredL2DistanceKernel : public framework::OpKernel { auto y_dims = y.dimensions(); // buffer the substraction result if (y_dims[0] == 1 && x_dims[0] > y_dims[0]) { - auto y_broadcast_dims = y_dims; - y_broadcast_dims[0] = x_dims[0]; - sub_result.device(place) = x - y.broadcast(y_broadcast_dims); + sub_result.device(place) = + x - + y.broadcast(Eigen::array({static_cast(x_dims[0]), 1})); } else { sub_result.device(place) = x - y; } - - z.device(place) = sub_result.pow(2).sum(Eigen::array({1})); + auto sub_res_pow2 = sub_result * sub_result; + z.device(place) = + sub_res_pow2.sum(Eigen::array({1})) + .reshape(Eigen::array({static_cast(x_dims[0]), 1})); } }; @@ -86,7 +88,7 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { // propagate back to input auto eigen_place = context.GetEigenDevice(); - if (x_g != nullptr) { + if (x_g) { x_g->mutable_data(context.GetPlace()); // eigen matrix auto x_grad = @@ -95,7 +97,7 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { x_grad.device(eigen_place) = grad_mat; } - if (y_g != nullptr) { + if (y_g) { y_g->mutable_data(context.GetPlace()); auto y_grad = EigenMatrix::From(*y_g, framework::make_ddim({y_dims[0], cols})); @@ -107,8 +109,9 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { if (sub_result.dimensions()[0] == y_dims[0]) { y_grad.device(eigen_place) = -1 * grad_mat; } else { + auto col_sum_res = -1 * (grad_mat.sum(Eigen::array({0}))); y_grad.device(eigen_place) = - -1 * (grad_mat.sum(Eigen::array({0}))); + col_sum_res.reshape(Eigen::array({1, cols})); } } } diff --git a/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py b/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py index 51c95b286a..2bcdf37df4 100644 --- a/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py +++ b/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py @@ -4,30 +4,84 @@ from gradient_checker import GradientChecker, create_op import numpy as np -class TestSquaredL2DistanceOp(unittest.TestCase): +class TestSquaredL2DistanceOp_f0(unittest.TestCase): __metaclass__ = OpTestMeta def setUp(self): self.type = 'squared_l2_distance' self.inputs = { - 'X': np.random.uniform(0.1, 1., (2, 3)).astype('float32'), - 'Y': np.random.uniform(0.1, 1., (2, 3)).astype('float32') + 'X': np.random.uniform(0.1, 1., (32, 64)).astype('float32'), + 'Y': np.random.uniform(0.1, 1., (32, 64)).astype('float32') } - subRes = self.inputs['X'] - self.inputs['Y'] - output = subRes * subRes + sub_res = self.inputs['X'] - self.inputs['Y'] + output = sub_res * sub_res self.outputs = { - 'sub_result': subRes, + 'sub_result': sub_res, + 'Out': np.expand_dims(output.sum(1), 1) + } + + +class TestSquaredL2DistanceOp_f1(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = 'squared_l2_distance' + self.inputs = { + 'X': np.random.uniform(0.1, 1., (32, 64)).astype('float32'), + 'Y': np.random.uniform(0.1, 1., (1, 64)).astype('float32') + } + sub_res = self.inputs['X'] - self.inputs['Y'] + output = sub_res * sub_res + self.outputs = { + 'sub_result': sub_res, + 'Out': np.expand_dims(output.sum(1), 1) + } + + +class TestSquaredL2DistanceOp_f2(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = 'squared_l2_distance' + self.inputs = { + 'X': np.random.uniform(0.1, 1., (32, 64, 128)).astype('float32'), + 'Y': np.random.uniform(0.1, 1., (1, 64, 128)).astype('float32') + } + sub_res = self.inputs['X'] - self.inputs['Y'] + sub_res = sub_res.reshape((32, 64 * 128)) + output = sub_res * sub_res + self.outputs = { + 'sub_result': sub_res, 'Out': np.expand_dims(output.sum(1), 1) } class TestSquaredL2DistanceGradOp(GradientChecker): - def test_squared_l2_distance(self): + def test_squared_l2_distance_b0(self): + op = create_op("squared_l2_distance") + inputs = { + 'X': np.random.uniform(0.1, .6, (2, 3)).astype('float32'), + 'Y': np.random.uniform(0.1, .6, (2, 3)).astype('float32') + } + self.compare_grad(op, inputs) + self.check_grad(op, inputs, set(["X", "Y"]), "Out") + + def test_squared_l2_distance_b1(self): + op = create_op("squared_l2_distance") + inputs = { + 'X': np.random.uniform(0.1, .6, (2, 3)).astype('float32'), + 'Y': np.random.uniform(0.1, .6, (1, 3)).astype('float32') + } + self.compare_grad(op, inputs) + self.check_grad(op, inputs, set(["X", "Y"]), "Out") + + def test_squared_l2_distance_b2(self): op = create_op("squared_l2_distance") inputs = { - 'X': np.random.uniform(0.1, 1., (2, 3)).astype('float32'), - 'Y': np.random.uniform(0.1, 1., (2, 3)).astype('float32') + 'X': np.random.uniform(0.1, .6, (2, 3, 4)).astype('float32'), + 'Y': np.random.uniform(0.1, .6, (1, 3, 4)).astype('float32') } + self.compare_grad(op, inputs) self.check_grad(op, inputs, set(["X", "Y"]), "Out") From 633fcc91338d9328b87e2d041030952045ae074b Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 3 Sep 2017 09:06:00 -0700 Subject: [PATCH 28/71] add var description design --- doc/design/var_desc.md | 90 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 doc/design/var_desc.md diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md new file mode 100644 index 0000000000..71586866ba --- /dev/null +++ b/doc/design/var_desc.md @@ -0,0 +1,90 @@ +## Background +PaddlePaddle divides the description of neural network computation graph into two stages: compile time and runtime. + +The data structure to describe the compile time graph should be able to be serialized for distributing training. So we use proto message OpDesc to describe computation and VarDesc to describe data. + +PaddlePaddle will generate these data structure according to user's description and do some optimization, such as: + +1. InferShape. Infer the Output size according to Input size and set them into VarDesc. +1. memory optimise and reuse. Scan all the memory that will be used and reuse some memory that is allocated before but will not be used anymore to reduce memory. + +VarDesc is used to describe different kinds of Variable value, such as Tensor, scalar, and scope, we use a proto message to do this: + +## Definition of VarDesc in Proto + +``` +message LoDTensorDesc { + enum Type { + INT16 = 1; + INT32 = 2; + INT64 = 3; + FP16 = 4; + FP32 = 5; + DOUBLE = 6 + BOOL = 7; + } + + Type element_type = 1; + repeated int dims = 2; // [UNK, UNK, 6000] is saved as [-1, -1, 6000] + optional int lod_level [default=0] = 3; + repeated int32 int16_val = 4 [packed = true]; // INT16 + repeated int32 int32_val = 5 [packed = true]; // INT32 + repeated int64 int64_val = 6 [packed = true]; // INT64 + repeated float float_val = 7 [packed = true]; // FP32 + repeated double double_val = 8 [packed = true]; // DOUBLE + repeated bool bool_val = 9 [packed = true]; // BOOL +} + +message VarDesc { + enum Type { + INT = 0; + FLOAT = 1; + STRING = 2; + INTS = 3; + FLOATS = 4; + STRINGS = 5; + LOD_TENSOR = 6; + } + + message Value { + optional int32 i = 1; + optional float f = 2; + optional string s = 3; + repeated int32 ints = 4; + repeated float floats = 5; + repeated string strings = 6; + optional LodTesnorDesc lod_tensor = 7; // when type==LOD_TENSOR + } + + required string name = 1; + required Type type = 2; + required Value value = 3; +} + +``` + +## Definition of Variable in Python + +There is a class `Variable` in python to help create Variable. + +```python +class Variable(object): + def __init__(self, + name=None, + data_type=None, + shape=None, + value=None, + trainable=True): +``` + +create a variable with a tensor value. + +```python +a = Variable("X", shape=[784, 10], data_type=INT32, value=0) +``` + +or create a Variable with a string value + +```python +a = Variable("X", data_type=STRING, value="aa") +``` From 842daac9d9c593a3e911e73d4f97bcebeda8e48f Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 3 Sep 2017 09:46:03 -0700 Subject: [PATCH 29/71] update python variable define --- doc/design/var_desc.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md index 71586866ba..1cba557b24 100644 --- a/doc/design/var_desc.md +++ b/doc/design/var_desc.md @@ -1,14 +1,14 @@ ## Background PaddlePaddle divides the description of neural network computation graph into two stages: compile time and runtime. -The data structure to describe the compile time graph should be able to be serialized for distributing training. So we use proto message OpDesc to describe computation and VarDesc to describe data. +The data structure to describe the compile time graph should be able to be serialized for distributed training. So we use proto message to describe the graph: OpDesc to describe computation and VarDesc to describe data. PaddlePaddle will generate these data structure according to user's description and do some optimization, such as: 1. InferShape. Infer the Output size according to Input size and set them into VarDesc. 1. memory optimise and reuse. Scan all the memory that will be used and reuse some memory that is allocated before but will not be used anymore to reduce memory. -VarDesc is used to describe different kinds of Variable value, such as Tensor, scalar, and scope, we use a proto message to do this: +VarDesc is used to describe different kinds of Variable value, such as Tensor, scalar, and scope: ## Definition of VarDesc in Proto @@ -80,11 +80,11 @@ class Variable(object): create a variable with a tensor value. ```python -a = Variable("X", shape=[784, 10], data_type=INT32, value=0) +a = Variable("X", shape=[784, 10], data_type=pd.INT32, value=0) ``` or create a Variable with a string value ```python -a = Variable("X", data_type=STRING, value="aa") +a = Variable("X", data_type=pd.STRING, value="aa") ``` From 45c8f9b2820ca11259e63eef8cd7e373afaf4c00 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 4 Sep 2017 10:11:42 +0800 Subject: [PATCH 30/71] Add context parameter and math namespace. --- paddle/operators/math/im2col.cc | 10 ++++++---- paddle/operators/math/im2col.cu | 16 +++++++++++----- paddle/operators/math/im2col.h | 6 ++++-- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/paddle/operators/math/im2col.cc b/paddle/operators/math/im2col.cc index 8124e322cb..bcc18af036 100644 --- a/paddle/operators/math/im2col.cc +++ b/paddle/operators/math/im2col.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/operators/math/im2col.h" namespace paddle { +namespace math { /* * im = [input_channels, input_height, input_width] @@ -26,7 +27,7 @@ class Im2ColFunctor { public: void operator()(const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width) { + int padding_width, platform::DeviceContext* context) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); @@ -77,7 +78,7 @@ class Col2ImFunctor { public: void operator()(framework::Tensor& im, const framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width) { + int padding_width, platform::DeviceContext* context) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); int input_channels = im.dims()[0]; @@ -130,7 +131,7 @@ class Im2ColFunctor { public: void operator()(const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width) { + int padding_width, platform::DeviceContext* context) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); int input_channels = im.dims()[0]; @@ -189,7 +190,7 @@ class Col2ImFunctor { public: void operator()(framework::Tensor& im, const framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width) { + int padding_width, platform::DeviceContext* context) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); int input_channels = im.dims()[0]; @@ -241,4 +242,5 @@ template class Im2ColFunctor; template class Col2ImFunctor; template class Col2ImFunctor; +} // namespace math } // namespace paddle diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu index 875989af58..2caa7c5ec2 100644 --- a/paddle/operators/math/im2col.cu +++ b/paddle/operators/math/im2col.cu @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/platform/cuda_helper.h" namespace paddle { +namespace math { template __global__ void im2col(const T* data_im, int num_outs, int height, int width, @@ -63,7 +64,7 @@ class Im2ColFunctor { public: void operator()(const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width) { + int padding_width, platform::DeviceContext* context) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); @@ -81,6 +82,7 @@ class Im2ColFunctor { int block_y = (blocks + 512 - 1) / 512; dim3 threads(1024, 1); dim3 grid(block_x, block_y); + // TODO(hedaoyuan): launch kernel on specified stream im2col<<>>( im.data(), num_outputs, input_height, input_width, filter_height, filter_width, stride_height, stride_width, padding_height, @@ -145,7 +147,7 @@ class Col2ImFunctor { public: void operator()(framework::Tensor& im, const framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width) { + int padding_width, platform::DeviceContext* context) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); @@ -168,6 +170,7 @@ class Col2ImFunctor { // To avoid involving atomic operations, we will launch one kernel per // bottom dimension, and then in the kernel add up the top dimensions. + // TODO(hedaoyuan): launch kernel on specified stream col2im<<>>( num_kernels, col.data(), input_height + 2 * padding_height, input_width + 2 * padding_width, input_channels, filter_height, @@ -224,7 +227,7 @@ class Im2ColFunctor { public: void operator()(const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width) { + int padding_width, platform::DeviceContext* context) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); int input_channels = im.dims()[0]; @@ -255,6 +258,7 @@ class Im2ColFunctor { dim3 threads(block_dim_x, block_dim_y, std::min(block_dim_z, input_channels)); dim3 grid(output_width, output_height); + // TODO(hedaoyuan): launch kernel on specified stream im2colOCF<<>>( im.data(), col.data(), input_channels, input_height, input_width, filter_height, filter_width, stride_height, stride_width, @@ -304,7 +308,7 @@ class Col2ImFunctor { public: void operator()(framework::Tensor& im, const framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width) { + int padding_width, platform::DeviceContext* context) { PADDLE_ENFORCE(im.dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); int input_channels = im.dims()[0]; @@ -335,7 +339,8 @@ class Col2ImFunctor { dim3 threads(block_dim_x, block_dim_y, std::min(block_dim_z, input_channels)); dim3 grid(output_width, output_height); - col2imOCF<<>>( + // TODO(hedaoyuan): launch kernel on specified stream + col2imOCF<<>>( im.data(), col.data(), input_channels, input_height, input_width, filter_height, filter_width, stride_height, stride_width, padding_height, padding_width, output_height, output_width); @@ -347,4 +352,5 @@ template class Im2ColFunctor; template class Col2ImFunctor; template class Col2ImFunctor; +} // namespace math } // namespace paddle diff --git a/paddle/operators/math/im2col.h b/paddle/operators/math/im2col.h index da51bc69a2..f6b428e289 100644 --- a/paddle/operators/math/im2col.h +++ b/paddle/operators/math/im2col.h @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/platform/device_context.h" namespace paddle { +namespace math { /* The storage format of the coldata in the Im2ColFunctor and Col2ImFunctor. */ enum ColFormat { kCFO = 0, kOCF = 1 }; @@ -72,7 +73,7 @@ class Im2ColFunctor { public: void operator()(const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width); + int padding_width, platform::DeviceContext* context); }; template @@ -80,7 +81,8 @@ class Col2ImFunctor { public: void operator()(framework::Tensor& im, const framework::Tensor& col, int stride_height, int stride_width, int padding_height, - int padding_width); + int padding_width, platform::DeviceContext* context); }; +} // namespace math } // namespace paddle From f807807ed11c662db3af853ec9b7209a601157a9 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 4 Sep 2017 10:26:03 +0800 Subject: [PATCH 31/71] Fix the namespace of the im2col functor. --- paddle/operators/math/im2col.cc | 2 ++ paddle/operators/math/im2col.cu | 2 ++ paddle/operators/math/im2col.h | 2 ++ 3 files changed, 6 insertions(+) diff --git a/paddle/operators/math/im2col.cc b/paddle/operators/math/im2col.cc index bcc18af036..a5fcf6da8c 100644 --- a/paddle/operators/math/im2col.cc +++ b/paddle/operators/math/im2col.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/operators/math/im2col.h" namespace paddle { +namespace operators { namespace math { /* @@ -243,4 +244,5 @@ template class Col2ImFunctor; template class Col2ImFunctor; } // namespace math +} // namespace operators } // namespace paddle diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu index 2caa7c5ec2..23d3854610 100644 --- a/paddle/operators/math/im2col.cu +++ b/paddle/operators/math/im2col.cu @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/platform/cuda_helper.h" namespace paddle { +namespace operators { namespace math { template @@ -353,4 +354,5 @@ template class Col2ImFunctor; template class Col2ImFunctor; } // namespace math +} // namespace operators } // namespace paddle diff --git a/paddle/operators/math/im2col.h b/paddle/operators/math/im2col.h index f6b428e289..1065cd3d85 100644 --- a/paddle/operators/math/im2col.h +++ b/paddle/operators/math/im2col.h @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/platform/device_context.h" namespace paddle { +namespace operators { namespace math { /* The storage format of the coldata in the Im2ColFunctor and Col2ImFunctor. */ @@ -85,4 +86,5 @@ class Col2ImFunctor { }; } // namespace math +} // namespace operators } // namespace paddle From 8e5f54320fceca8e031d070d6a6f406f271845fe Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 4 Sep 2017 11:43:52 +0800 Subject: [PATCH 32/71] Refine the toolchain file of Android to use clang as default compiler. --- cmake/cross_compiling/android.cmake | 73 +++++++++++++++++++------- cmake/external/warpctc.cmake | 1 - paddle/math/Matrix.cpp | 5 -- paddle/scripts/docker/build_android.sh | 9 ---- 4 files changed, 53 insertions(+), 35 deletions(-) diff --git a/cmake/cross_compiling/android.cmake b/cmake/cross_compiling/android.cmake index 5e3e437a8d..84219cfa55 100644 --- a/cmake/cross_compiling/android.cmake +++ b/cmake/cross_compiling/android.cmake @@ -20,6 +20,7 @@ # The supported variables are listed belows: # # ANDROID_STANDALONE_TOOLCHAIN +# ANDROID_TOOLCHAIN # ANDROID_ABI # ANDROID_NATIVE_API_LEVEL # ANDROID_ARM_MODE @@ -57,6 +58,10 @@ IF(NOT DEFINED CMAKE_SYSTEM_VERSION AND ANDROID_NATIVE_API_LEVEL) ENDIF() ENDIF() +IF(NOT DEFINED ANDROID_TOOLCHAIN) + SET(ANDROID_TOOLCHAIN clang) +ENDIF() + IF(NOT DEFINED ANDROID_ABI) SET(ANDROID_ABI "armeabi-v7a") ENDIF() @@ -82,6 +87,7 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") "${CMAKE_VERSION}), when cross-compiling for Android.") IF(ANDROID_STANDALONE_TOOLCHAIN) + # Use standalone toolchain SET(CMAKE_SYSROOT "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot") IF(NOT CMAKE_SYSTEM_VERSION) @@ -96,26 +102,44 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") ENDIF() # Toolchain - SET(ANDROID_TOOLCHAIN "gcc") SET(ANDROID_TOOLCHAIN_ROOT ${ANDROID_STANDALONE_TOOLCHAIN}) - IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") - SET(ANDROID_TOOLCHAIN_NAME arm-linux-androideabi) - IF(ANDROID_ABI STREQUAL "armeabi") - SET(CMAKE_SYSTEM_PROCESSOR armv5te) - ELSEIF(ANDROID_ABI STREQUAL "armeabi-v7a") - SET(CMAKE_SYSTEM_PROCESSOR armv7-a) - ENDIF() - ENDIF() - IF(ANDROID_ABI STREQUAL "arm64-v8a") - SET(ANDROID_TOOLCHAIN_NAME aarch64-linux-android) - SET(CMAKE_SYSTEM_PROCESSOR aarch64) + ELSE(ANDROID_NDK) + # TODO: use android ndk + ENDIF() + + IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + SET(ANDROID_TOOLCHAIN_NAME arm-linux-androideabi) + IF(ANDROID_ABI STREQUAL "armeabi") + SET(CMAKE_SYSTEM_PROCESSOR armv5te) + SET(ANDROID_CLANG_TRIPLE armv5te-none-linux-androideabi) + ELSEIF(ANDROID_ABI STREQUAL "armeabi-v7a") + SET(CMAKE_SYSTEM_PROCESSOR armv7-a) + SET(ANDROID_CLANG_TRIPLE armv7-none-linux-androideabi) ENDIF() - SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-") + ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a") + SET(ANDROID_TOOLCHAIN_NAME aarch64-linux-android) + SET(CMAKE_SYSTEM_PROCESSOR aarch64) + SET(ANDROID_CLANG_TRIPLE aarch64-none-linux-android) + ELSE() + MESSAGE(FATAL_ERROR "Invalid Android ABI: ${ANDROID_ABI}.") + ENDIF() + SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-") + + IF(ANDROID_TOOLCHAIN STREQUAL clang) + SET(ANDROID_C_COMPILER_NAME clang) + SET(ANDROID_CXX_COMPILER_NAME clang++) + SET(CMAKE_C_COMPILER_TARGET ${ANDROID_CLANG_TRIPLE}) + SET(CMAKE_CXX_COMPILER_TARGET ${ANDROID_CLANG_TRIPLE}) + ELSEIF(ANDROID_TOOLCHAIN STREQUAL gcc) + SET(ANDROID_C_COMPILER_NAME gcc) + SET(ANDROID_CXX_COMPILER_NAME g++) + ELSE() + MESSAGE(FATAL_ERROR "Invalid Android toolchain: ${ANDROID_TOOLCHAIN}") ENDIF() # C compiler IF(NOT CMAKE_C_COMPILER) - SET(ANDROID_C_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}gcc") + SET(ANDROID_C_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}${ANDROID_C_COMPILER_NAME}") ELSE() GET_FILENAME_COMPONENT(ANDROID_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM) ENDIF() @@ -125,7 +149,7 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") # CXX compiler IF(NOT CMAKE_CXX_COMPILER) - SET(ANDROID_CXX_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}g++") + SET(ANDROID_CXX_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}${ANDROID_CXX_COMPILER_NAME}") ELSE() GET_FILENAME_COMPONENT(ANDROID_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM) ENDIF() @@ -137,7 +161,7 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") SET(CMAKE_CXX_COMPILER ${ANDROID_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE) # Toolchain and ABI specific flags. - SET(ANDROID_COMPILER_FLAGS "-ffunction-sections -fdata-sections -finline-limit=64") + SET(ANDROID_COMPILER_FLAGS "-ffunction-sections -fdata-sections") SET(ANDROID_LINKER_FLAGS "-Wl,--gc-sections") IF(ANDROID_ABI STREQUAL "armeabi") @@ -145,8 +169,7 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") -march=armv5te -mtune=xscale -msoft-float) - ENDIF() - IF(ANDROID_ABI STREQUAL "armeabi-v7a") + ELSEIF(ANDROID_ABI STREQUAL "armeabi-v7a") LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv7-a -mfloat-abi=softfp) @@ -156,6 +179,8 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") LIST(APPEND ANDROID_COMPILER_FLAGS -mfpu=vfpv3-d16) ENDIF() LIST(APPEND ANDROID_LINKER_FLAGS -Wl,--fix-cortex-a8) + ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a") + LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a) ENDIF() IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") @@ -164,10 +189,18 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") ELSE() LIST(APPEND ANDROID_COMPILER_FLAGS -mthumb) ENDIF() + IF(ANDROID_TOOLCHAIN STREQUAL clang) + # Disable integrated-as for better compatibility. + LIST(APPEND ANDROID_COMPILER_FLAGS -fno-integrated-as) + ENDIF() ENDIF() - IF(ANDROID_ABI STREQUAL "arm64-v8a") - LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a) + IF(ANDROID_TOOLCHAIN STREQUAL clang) + # CMake automatically forwards all compiler flags to the linker, + # and clang doesn't like having -Wa flags being used for linking. + # To prevent CMake from doing this would require meddling with + # the CMAKE__COMPILE_OBJECT rules, which would get quite messy. + LIST(APPEND ANDROID_LINKER_FLAGS -Qunused-arguments) ENDIF() STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}") diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 3cc652bed5..2d7daed9bc 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -41,7 +41,6 @@ IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "App ELSE() SET(USE_OMP ON) ENDIF() -SET(USE_OMP OFF FORCE) ExternalProject_Add( extern_warpctc diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 82d598d885..4a2132c8d1 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -2774,28 +2774,23 @@ void CpuMatrix::mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT) { CHECK(!isTransposed()) << "Not supported"; size_t a_col, b_col, a_row, b_row; - // CBLAS_TRANSPOSE a_trans, b_trans; bool a_trans, b_trans; if (!a->isTransposed()) { a_col = a->getWidth(); a_row = a->getHeight(); - // a_trans = CblasNoTrans; a_trans = false; } else { a_col = a->getHeight(); a_row = a->getWidth(); - // a_trans = CblasTrans; a_trans = true; } if (!b->isTransposed()) { b_col = b->getWidth(); b_row = b->getHeight(); - // b_trans = CblasNoTrans; b_trans = false; } else { b_col = b->getHeight(); b_row = b->getWidth(); - // b_trans = CblasTrans; b_trans = true; } diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index 98f66fa6f1..512a37166c 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -9,20 +9,15 @@ rm -rf $BUILD_ROOT 2>/dev/null || true mkdir -p $BUILD_ROOT cd $BUILD_ROOT -THIRD_PARTY_PATH=/paddle/third_party_android$SUFFIX/$ANDROID_ABI - if [ $ANDROID_ABI == "armeabi-v7a" ]; then cmake -DCMAKE_SYSTEM_NAME=Android \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM_STANDALONE_TOOLCHAIN \ -DANDROID_ABI=$ANDROID_ABI \ -DANDROID_ARM_NEON=ON \ -DANDROID_ARM_MODE=ON \ - -DCMAKE_C_COMPILER=$ANDROID_ARM_STANDALONE_TOOLCHAIN/bin/arm-linux-androideabi-clang \ - -DCMAKE_CXX_COMPILER=$ANDROID_ARM_STANDALONE_TOOLCHAIN/bin/arm-linux-androideabi-clang++ \ -DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_CXX_COMPILER=/usr/bin/g++ \ -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ - -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ -DUSE_EIGEN_FOR_BLAS=ON \ -DWITH_C_API=ON \ @@ -34,12 +29,9 @@ elif [ $ANDROID_ABI == "arm64-v8a" ]; then -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_ARM64_STANDALONE_TOOLCHAIN \ -DANDROID_ABI=$ANDROID_ABI \ -DANDROID_ARM_MODE=ON \ - -DCMAKE_C_COMPILER=$ANDROID_ARM64_STANDALONE_TOOLCHAIN/bin/aarch64-linux-android-clang \ - -DCMAKE_CXX_COMPILER=$ANDROID_ARM64_STANDALONE_TOOLCHAIN/bin/aarch64-linux-android-clang++ \ -DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_CXX_COMPILER=/usr/bin/g++ \ -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ - -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ -DUSE_EIGEN_FOR_BLAS=OFF \ -DWITH_C_API=ON \ @@ -53,7 +45,6 @@ elif [ $ANDROID_ABI == "armeabi" ]; then -DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_CXX_COMPILER=/usr/bin/g++ \ -DCMAKE_INSTALL_PREFIX=/paddle/install \ - -DTHIRD_PARTY_PATH=$THIRD_PARTY_PATH \ -DCMAKE_BUILD_TYPE=Release \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ From 387b4e4eac4d7cd7d4f5f18b52d74f6b8f9601bd Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 4 Sep 2017 11:50:52 +0800 Subject: [PATCH 33/71] Change the declaration of EigenGemm from `class` to `struct`. --- paddle/function/EigenGemm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/function/EigenGemm.cpp b/paddle/function/EigenGemm.cpp index 674141ed39..b3e666e860 100644 --- a/paddle/function/EigenGemm.cpp +++ b/paddle/function/EigenGemm.cpp @@ -83,9 +83,9 @@ struct EigenBlasGemm { }; #ifdef PADDLE_TYPE_DOUBLE -template class EigenBlasGemm; +template struct EigenBlasGemm; #else -template class EigenBlasGemm; +template struct EigenBlasGemm; #endif } // namespace paddle From 9293dc48179ae34d182f420c4500967d02238636 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 4 Sep 2017 12:29:32 +0800 Subject: [PATCH 34/71] Move the third_party_android from cache directories in travis. --- .travis.yml | 1 - paddle/scripts/travis/build_android.sh | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b4b83fcdbc..cc2036df5a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ cache: - $HOME/.ccache - $HOME/.cache/pip - $TRAVIS_BUILD_DIR/build/third_party - - $TRAVIS_BUILD_DIR/build_android/third_party sudo: required dist: trusty os: diff --git a/paddle/scripts/travis/build_android.sh b/paddle/scripts/travis/build_android.sh index 004067a8f5..9da71d1e8c 100755 --- a/paddle/scripts/travis/build_android.sh +++ b/paddle/scripts/travis/build_android.sh @@ -22,6 +22,7 @@ cmake -DCMAKE_SYSTEM_NAME=Android \ -DANDROID_ABI=armeabi-v7a \ -DANDROID_ARM_NEON=ON \ -DANDROID_ARM_MODE=ON \ + -DUSE_EIGEN_FOR_BLAS=ON \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ -DWITH_STYLE_CHECK=OFF \ From a98c9e6bbf27dba8377d4f709bfc0aa2e71b8148 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Mon, 4 Sep 2017 13:25:00 +0800 Subject: [PATCH 35/71] Add third_party_android back to the cache directories to speedup travis. --- .travis.yml | 1 + cmake/cblas.cmake | 4 ---- cmake/external/openblas.cmake | 4 ++++ 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index cc2036df5a..14a39c58de 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ cache: - $HOME/.ccache - $HOME/.cache/pip - $TRAVIS_BUILD_DIR/build/third_party + - $TRAVIS_BUILD_DIR/build/third_party_android sudo: required dist: trusty os: diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index ab111eccc0..854066fd1d 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -13,10 +13,6 @@ # system paths. # -if(USE_EIGEN_FOR_BLAS) - return() -endif(USE_EIGEN_FOR_BLAS) - set(CBLAS_FOUND OFF) ## Find MKLML First. diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 0002a470d9..f9e05af59f 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +IF(USE_EIGEN_FOR_BLAS) + return() +ENDIF(USE_EIGEN_FOR_BLAS) + INCLUDE(cblas) IF(NOT ${CBLAS_FOUND}) From a60128aeb235c3e53b6785537bf3f67502ca079d Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 4 Sep 2017 14:11:43 +0800 Subject: [PATCH 36/71] move identity_op from scale_op.cc to be a single file --- paddle/operators/CMakeLists.txt | 2 ++ paddle/operators/identity_op.cc | 54 +++++++++++++++++++++++++++++++++ paddle/operators/scale_op.cc | 31 +------------------ 3 files changed, 57 insertions(+), 30 deletions(-) create mode 100644 paddle/operators/identity_op.cc diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index e5efcccb0e..d85c3d575a 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -48,12 +48,14 @@ add_subdirectory(math) list(REMOVE_ITEM GENERAL_OPS net_op + identity_op minus_op mul_op recurrent_op scale_op) op_library(net_op SRCS net_op.cc) +op_library(identity_op SRCS identity_op.cc DEPS scale_op) op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc diff --git a/paddle/operators/identity_op.cc b/paddle/operators/identity_op.cc new file mode 100644 index 0000000000..be956bf3b3 --- /dev/null +++ b/paddle/operators/identity_op.cc @@ -0,0 +1,54 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/net_op.h" +#include "paddle/operators/scale_op.h" + +namespace paddle { +namespace operators { + +// identity is a alias of scale op. This is also a example for creating a alias +// operator. +template +class IdentityOpMaker : public framework::OpProtoAndCheckerMaker { + public: + IdentityOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "input tensor of identity op"); + AddOutput("Out", "output tensor of identity op"); + AddComment("identity operator. Just a alias of scale op which scale = 1.0"); + } +}; + +template +class IdentityOp : public NetOp { + public: + IdentityOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : NetOp(type, inputs, outputs, attrs) { + AppendOp(framework::OpRegistry::CreateOp( + "scale", {{"X", {Input("X")}}}, {{"Out", {Output("Out")}}}, + {{"scale", static_cast(1)}})); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_WITHOUT_GRADIENT(identity, ops::IdentityOp, + ops::IdentityOpMaker); diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc index 4e039688d4..57696d2ee6 100644 --- a/paddle/operators/scale_op.cc +++ b/paddle/operators/scale_op.cc @@ -48,7 +48,7 @@ The equation is: Out = scale*X } }; -// Identity Op's gradient is identity op, too. +// Scale Op's gradient is scale op, too. // Grad(Out=scale(X)) => Grad(X) = scale(Grad(Out)) template class ScaleGradOp : public NetOp { @@ -65,33 +65,6 @@ class ScaleGradOp : public NetOp { } }; -// identity is a alias of scale op. This is also a example for creating a alias -// operator. -template -class IdentityOpMaker : public framework::OpProtoAndCheckerMaker { - public: - IdentityOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "input tensor of identity op"); - AddOutput("Out", "output tensor of identity op"); - AddComment("identity operator. Just a alias of scale op which scale = 1.0"); - } -}; - -template -class IdentityOp : public NetOp { - public: - IdentityOp(const std::string &type, const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : NetOp(type, inputs, outputs, attrs) { - AppendOp(framework::OpRegistry::CreateOp( - "scale", {{"X", {Input("X")}}}, {{"Out", {Output("Out")}}}, - {{"scale", static_cast(1)}})); - } -}; - } // namespace operators } // namespace paddle @@ -101,5 +74,3 @@ REGISTER_OP(scale, ops::ScaleOp, ops::ScaleOpMaker, ops::ScaleGradOp); REGISTER_OP_CPU_KERNEL(scale, ops::ScaleKernel); -REGISTER_OP_WITHOUT_GRADIENT(identity, ops::IdentityOp, - ops::IdentityOpMaker); From 3f555001db1b4a862931abeff85c2c33bb36149b Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 4 Sep 2017 16:16:33 +0800 Subject: [PATCH 37/71] Add im2col test. --- paddle/operators/math/im2col.h | 2 +- paddle/operators/math/im2col_test.cc | 88 ++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 paddle/operators/math/im2col_test.cc diff --git a/paddle/operators/math/im2col.h b/paddle/operators/math/im2col.h index 1065cd3d85..8958c5457c 100644 --- a/paddle/operators/math/im2col.h +++ b/paddle/operators/math/im2col.h @@ -22,7 +22,7 @@ namespace operators { namespace math { /* The storage format of the coldata in the Im2ColFunctor and Col2ImFunctor. */ -enum ColFormat { kCFO = 0, kOCF = 1 }; +enum class ColFormat { kCFO = 0, kOCF = 1 }; /* * \brief Converts the image data of three dimensions(CHW) into a colData of diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc new file mode 100644 index 0000000000..4a9deb7210 --- /dev/null +++ b/paddle/operators/math/im2col_test.cc @@ -0,0 +1,88 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/im2col.h" +#include +#include + +TEST(math, im2col) { + paddle::framework::Tensor input; + paddle::framework::Tensor output_cfo; + paddle::framework::Tensor output_ocf; + paddle::framework::Tensor input_check; + + int input_height = 2; + int input_width = 3; + int filter_size = 2; + int stride = 1; + int padding = 0; + int output_height = (input_height - filter_size + 2 * padding) / stride + 1; + int output_width = (input_width - filter_size + 2 * padding) / stride + 1; + + /** + * input = [0, 1, 2, + * 3, 4, 5] + * + * output_cfo = [0, 1 + * 1, 2 + * 3, 4 + * 4, 5] + * + * output_ocf = [0, 1, 3, 4 + * 1, 2, 4, 5] + */ + auto* cpu_place = new paddle::platform::CPUPlace(); + float* input_ptr = + input.mutable_data({1, input_height, input_width}, *cpu_place); + float arr[6] = {0, 1, 2, 3, 4, 5}; + memcpy(input_ptr, arr, 6 * sizeof(float)); + output_cfo.mutable_data( + {1, filter_size, filter_size, output_height, output_width}, *cpu_place); + output_ocf.mutable_data( + {output_height, output_width, 1, filter_size, filter_size}, *cpu_place); + + paddle::operators::math::Im2ColFunctor< + paddle::operators::math::ColFormat::kCFO, paddle::platform::CPUPlace, + float> + im2col; + paddle::operators::math::Im2ColFunctor< + paddle::operators::math::ColFormat::kOCF, paddle::platform::CPUPlace, + float> + im2col_ocf; + + paddle::platform::DeviceContext* context = + new paddle::platform::CPUDeviceContext(*cpu_place); + im2col(input, output_cfo, stride, stride, padding, padding, context); + im2col_ocf(input, output_ocf, stride, stride, padding, padding, context); + + float* out_cfo_ptr = output_cfo.data(); + EXPECT_EQ(out_cfo_ptr[0], 0); + EXPECT_EQ(out_cfo_ptr[1], 1); + EXPECT_EQ(out_cfo_ptr[2], 1); + EXPECT_EQ(out_cfo_ptr[3], 2); + EXPECT_EQ(out_cfo_ptr[4], 3); + EXPECT_EQ(out_cfo_ptr[5], 4); + EXPECT_EQ(out_cfo_ptr[6], 4); + EXPECT_EQ(out_cfo_ptr[7], 5); + + float* out_ocf_ptr = output_ocf.data(); + EXPECT_EQ(out_ocf_ptr[0], 0); + EXPECT_EQ(out_ocf_ptr[1], 1); + EXPECT_EQ(out_ocf_ptr[2], 3); + EXPECT_EQ(out_ocf_ptr[3], 4); + EXPECT_EQ(out_ocf_ptr[4], 1); + EXPECT_EQ(out_ocf_ptr[5], 2); + EXPECT_EQ(out_ocf_ptr[6], 4); + EXPECT_EQ(out_ocf_ptr[7], 5); +} From 740c8ba12a29418eefeb22a843bebb1781f300fe Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 4 Sep 2017 16:40:23 +0800 Subject: [PATCH 38/71] remove scatter_op.cu/gather_op.cu as they support only_cpu now --- paddle/operators/CMakeLists.txt | 24 ++++++++++++++++-------- paddle/operators/gather_op.cu | 20 -------------------- paddle/operators/scatter_op.cu | 20 -------------------- 3 files changed, 16 insertions(+), 48 deletions(-) delete mode 100644 paddle/operators/gather_op.cu delete mode 100644 paddle/operators/scatter_op.cu diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index d85c3d575a..90185101c4 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -46,15 +46,20 @@ endfunction() add_subdirectory(math) -list(REMOVE_ITEM GENERAL_OPS - net_op - identity_op - minus_op - mul_op - recurrent_op - scale_op) +set(ONLYCPU_OPS + net_op + gather_op + scatter_op) +foreach(src ${ONLYCPU_OPS}) + op_library(${src} SRCS ${src}.cc) +endforeach() -op_library(net_op SRCS net_op.cc) +set(DEPS_OPS + identity_op + minus_op + mul_op + recurrent_op + scale_op) op_library(identity_op SRCS identity_op.cc DEPS scale_op) op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) @@ -62,6 +67,9 @@ op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor operator net_op) op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) +list(REMOVE_ITEM GENERAL_OPS + ${ONLYCPU_OPS} + ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) op_library(${src} SRCS ${src}.cc ${src}.cu) endforeach() diff --git a/paddle/operators/gather_op.cu b/paddle/operators/gather_op.cu deleted file mode 100644 index 3f04a7b3f8..0000000000 --- a/paddle/operators/gather_op.cu +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#define EIGEN_USE_GPU -#include "paddle/operators/gather_op.h" - -namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(gather, - ops::GatherOpKernel); diff --git a/paddle/operators/scatter_op.cu b/paddle/operators/scatter_op.cu deleted file mode 100644 index 6716b47883..0000000000 --- a/paddle/operators/scatter_op.cu +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#define EIGEN_USE_GPU -#include "paddle/operators/scatter_op.h" - -namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(scatter, - ops::ScatterOpKernel); From 1a615b486ff9327c6db4fe37b785165ee1a1816e Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 4 Sep 2017 20:18:33 +0800 Subject: [PATCH 39/71] Gpu test of im2col. --- paddle/operators/math/im2col.cu | 20 +++++--- paddle/operators/math/im2col_test.cc | 76 +++++++++++++++++++--------- 2 files changed, 65 insertions(+), 31 deletions(-) diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu index 23d3854610..64ecd4e215 100644 --- a/paddle/operators/math/im2col.cu +++ b/paddle/operators/math/im2col.cu @@ -83,8 +83,9 @@ class Im2ColFunctor { int block_y = (blocks + 512 - 1) / 512; dim3 threads(1024, 1); dim3 grid(block_x, block_y); - // TODO(hedaoyuan): launch kernel on specified stream - im2col<<>>( + im2col<<< + grid, threads, 0, + reinterpret_cast(context)->stream()>>>( im.data(), num_outputs, input_height, input_width, filter_height, filter_width, stride_height, stride_width, padding_height, padding_width, output_height, output_width, col.data()); @@ -171,8 +172,9 @@ class Col2ImFunctor { // To avoid involving atomic operations, we will launch one kernel per // bottom dimension, and then in the kernel add up the top dimensions. - // TODO(hedaoyuan): launch kernel on specified stream - col2im<<>>( + col2im<<< + grid, threads, 0, + reinterpret_cast(context)->stream()>>>( num_kernels, col.data(), input_height + 2 * padding_height, input_width + 2 * padding_width, input_channels, filter_height, filter_width, stride_height, stride_width, padding_height, @@ -259,8 +261,9 @@ class Im2ColFunctor { dim3 threads(block_dim_x, block_dim_y, std::min(block_dim_z, input_channels)); dim3 grid(output_width, output_height); - // TODO(hedaoyuan): launch kernel on specified stream - im2colOCF<<>>( + im2colOCF<<< + grid, threads, 0, + reinterpret_cast(context)->stream()>>>( im.data(), col.data(), input_channels, input_height, input_width, filter_height, filter_width, stride_height, stride_width, padding_height, padding_width, output_height, output_width); @@ -340,8 +343,9 @@ class Col2ImFunctor { dim3 threads(block_dim_x, block_dim_y, std::min(block_dim_z, input_channels)); dim3 grid(output_width, output_height); - // TODO(hedaoyuan): launch kernel on specified stream - col2imOCF<<>>( + col2imOCF<<< + grid, threads, 0, + reinterpret_cast(context)->stream()>>>( im.data(), col.data(), input_channels, input_height, input_width, filter_height, filter_width, stride_height, stride_width, padding_height, padding_width, output_height, output_width); diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc index 4a9deb7210..ee5fb98acd 100644 --- a/paddle/operators/math/im2col_test.cc +++ b/paddle/operators/math/im2col_test.cc @@ -16,19 +16,13 @@ limitations under the License. */ #include #include -TEST(math, im2col) { +template +void testIm2col() { + paddle::framework::Tensor input_tmp; paddle::framework::Tensor input; paddle::framework::Tensor output_cfo; paddle::framework::Tensor output_ocf; - paddle::framework::Tensor input_check; - - int input_height = 2; - int input_width = 3; - int filter_size = 2; - int stride = 1; - int padding = 0; - int output_height = (input_height - filter_size + 2 * padding) / stride + 1; - int output_width = (input_width - filter_size + 2 * padding) / stride + 1; + paddle::framework::Tensor output_tmp; /** * input = [0, 1, 2, @@ -42,31 +36,54 @@ TEST(math, im2col) { * output_ocf = [0, 1, 3, 4 * 1, 2, 4, 5] */ - auto* cpu_place = new paddle::platform::CPUPlace(); - float* input_ptr = - input.mutable_data({1, input_height, input_width}, *cpu_place); + int input_height = 2; + int input_width = 3; + int filter_size = 2; + int stride = 1; + int padding = 0; + int output_height = (input_height - filter_size + 2 * padding) / stride + 1; + int output_width = (input_width - filter_size + 2 * padding) / stride + 1; + float* input_ptr = input_tmp.mutable_data( + {1, input_height, input_width}, paddle::platform::CPUPlace()); float arr[6] = {0, 1, 2, 3, 4, 5}; memcpy(input_ptr, arr, 6 * sizeof(float)); + + auto* place = new Place(); + if (paddle::platform::is_cpu_place(*place)) { + input = input_tmp; + } else { + input.CopyFrom(input_tmp, *place); + } output_cfo.mutable_data( - {1, filter_size, filter_size, output_height, output_width}, *cpu_place); + {1, filter_size, filter_size, output_height, output_width}, *place); output_ocf.mutable_data( - {output_height, output_width, 1, filter_size, filter_size}, *cpu_place); + {output_height, output_width, 1, filter_size, filter_size}, *place); paddle::operators::math::Im2ColFunctor< - paddle::operators::math::ColFormat::kCFO, paddle::platform::CPUPlace, - float> + paddle::operators::math::ColFormat::kCFO, Place, float> im2col; paddle::operators::math::Im2ColFunctor< - paddle::operators::math::ColFormat::kOCF, paddle::platform::CPUPlace, - float> + paddle::operators::math::ColFormat::kOCF, Place, float> im2col_ocf; - paddle::platform::DeviceContext* context = - new paddle::platform::CPUDeviceContext(*cpu_place); + paddle::platform::DeviceContext* context; + if (paddle::platform::is_cpu_place(*place)) { + context = + new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace()); + } else { + context = + new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace()); + } im2col(input, output_cfo, stride, stride, padding, padding, context); im2col_ocf(input, output_ocf, stride, stride, padding, padding, context); - float* out_cfo_ptr = output_cfo.data(); + float* out_cfo_ptr; + if (paddle::platform::is_cpu_place(*place)) { + out_cfo_ptr = output_cfo.data(); + } else { + output_tmp.CopyFrom(output_cfo, paddle::platform::CPUPlace()); + out_cfo_ptr = output_tmp.data(); + } EXPECT_EQ(out_cfo_ptr[0], 0); EXPECT_EQ(out_cfo_ptr[1], 1); EXPECT_EQ(out_cfo_ptr[2], 1); @@ -76,7 +93,13 @@ TEST(math, im2col) { EXPECT_EQ(out_cfo_ptr[6], 4); EXPECT_EQ(out_cfo_ptr[7], 5); - float* out_ocf_ptr = output_ocf.data(); + float* out_ocf_ptr; + if (paddle::platform::is_cpu_place(*place)) { + out_ocf_ptr = output_ocf.data(); + } else { + output_tmp.CopyFrom(output_ocf, paddle::platform::CPUPlace()); + out_ocf_ptr = output_tmp.data(); + } EXPECT_EQ(out_ocf_ptr[0], 0); EXPECT_EQ(out_ocf_ptr[1], 1); EXPECT_EQ(out_ocf_ptr[2], 3); @@ -86,3 +109,10 @@ TEST(math, im2col) { EXPECT_EQ(out_ocf_ptr[6], 4); EXPECT_EQ(out_ocf_ptr[7], 5); } + +TEST(math, im2col) { + testIm2col(); +#ifndef PADDLE_ONLY_CPU + testIm2col(); +#endif +} \ No newline at end of file From 32d7e61880103996795304c6997d530040304550 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 4 Sep 2017 22:48:18 +0800 Subject: [PATCH 40/71] Fix some bugs. --- paddle/operators/math/im2col.cc | 36 ++++++++++++++++++++++----------- paddle/operators/math/im2col.cu | 36 ++++++++++++++++++++++----------- 2 files changed, 48 insertions(+), 24 deletions(-) diff --git a/paddle/operators/math/im2col.cc b/paddle/operators/math/im2col.cc index a5fcf6da8c..5727c1cab1 100644 --- a/paddle/operators/math/im2col.cc +++ b/paddle/operators/math/im2col.cc @@ -24,7 +24,8 @@ namespace math { * [input_channels, filter_height, filter_width, output_height, output_width] */ template -class Im2ColFunctor { +class Im2ColFunctor { public: void operator()(const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, @@ -75,7 +76,8 @@ class Im2ColFunctor { * [input_channels, filter_height, filter_width, output_height, output_width] */ template -class Col2ImFunctor { +class Col2ImFunctor { public: void operator()(framework::Tensor& im, const framework::Tensor& col, int stride_height, int stride_width, int padding_height, @@ -117,10 +119,14 @@ class Col2ImFunctor { } }; -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; /* * im = [input_channels, input_height, input_width] @@ -128,7 +134,8 @@ template class Col2ImFunctor; * [output_height, output_width, input_channels, filter_height, filter_width] */ template -class Im2ColFunctor { +class Im2ColFunctor { public: void operator()(const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, @@ -187,7 +194,8 @@ class Im2ColFunctor { * [output_height, output_width, input_channels, filter_height, filter_width] */ template -class Col2ImFunctor { +class Col2ImFunctor { public: void operator()(framework::Tensor& im, const framework::Tensor& col, int stride_height, int stride_width, int padding_height, @@ -238,10 +246,14 @@ class Col2ImFunctor { } }; -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu index 64ecd4e215..9bff7bee3c 100644 --- a/paddle/operators/math/im2col.cu +++ b/paddle/operators/math/im2col.cu @@ -61,7 +61,8 @@ __global__ void im2col(const T* data_im, int num_outs, int height, int width, * [input_channels, filter_height, filter_width, output_height, output_width] */ template -class Im2ColFunctor { +class Im2ColFunctor { public: void operator()(const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, @@ -145,7 +146,8 @@ __global__ void col2im(size_t n, const T* data_col, size_t height, size_t width, * [input_channels, filter_height, filter_width, output_height, output_width] */ template -class Col2ImFunctor { +class Col2ImFunctor { public: void operator()(framework::Tensor& im, const framework::Tensor& col, int stride_height, int stride_width, int padding_height, @@ -182,10 +184,14 @@ class Col2ImFunctor { } }; -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; template __global__ void im2colOCF(const T* im_data, T* col_data, int input_channels, @@ -226,7 +232,8 @@ __global__ void im2colOCF(const T* im_data, T* col_data, int input_channels, * [output_height, output_width, input_channels, filter_height, filter_width] */ template -class Im2ColFunctor { +class Im2ColFunctor { public: void operator()(const framework::Tensor& im, framework::Tensor& col, int stride_height, int stride_width, int padding_height, @@ -308,7 +315,8 @@ __global__ void col2imOCF(T* im_data, const T* col_data, int input_channels, * [output_height, output_width, input_channels, filter_height, filter_width] */ template -class Col2ImFunctor { +class Col2ImFunctor { public: void operator()(framework::Tensor& im, const framework::Tensor& col, int stride_height, int stride_width, int padding_height, @@ -352,10 +360,14 @@ class Col2ImFunctor { } }; -template class Im2ColFunctor; -template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; +template class Im2ColFunctor; +template class Im2ColFunctor; +template class Col2ImFunctor; +template class Col2ImFunctor; } // namespace math } // namespace operators From 3b29da379a8208429a6e19cd8827022577c119d7 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 4 Sep 2017 08:45:04 -0700 Subject: [PATCH 41/71] update vardesc --- doc/design/var_desc.md | 126 +++++++++++++++++++++++------------------ 1 file changed, 71 insertions(+), 55 deletions(-) diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md index 1cba557b24..f39daa125f 100644 --- a/doc/design/var_desc.md +++ b/doc/design/var_desc.md @@ -1,90 +1,106 @@ ## Background PaddlePaddle divides the description of neural network computation graph into two stages: compile time and runtime. -The data structure to describe the compile time graph should be able to be serialized for distributed training. So we use proto message to describe the graph: OpDesc to describe computation and VarDesc to describe data. +PaddlePaddle use proto message to describe compile time graph for -PaddlePaddle will generate these data structure according to user's description and do some optimization, such as: +1. Computation graph should be able to be saved to a file. +1. In distributed trianing, graph will be serialized and send to multiple workers. -1. InferShape. Infer the Output size according to Input size and set them into VarDesc. -1. memory optimise and reuse. Scan all the memory that will be used and reuse some memory that is allocated before but will not be used anymore to reduce memory. +The computation graph is constructed by Data Node and Operation Node. The concept to represent them is in the table below. -VarDesc is used to describe different kinds of Variable value, such as Tensor, scalar, and scope: +| |compile time|runtime| +|---|---|---| +|Data|VarDesc(proto)|Variable(cpp)| +|Operation|OpDesc(proto)|Operator(cpp)| -## Definition of VarDesc in Proto +## Definition of VarDesc + +A VarDesc should have a name and value, in PaddlePaddle, the value will always be a tensor. Since we use LoDTensor most of the time. We add a LoDTesnorDesc to represent it. + +```proto +message VarDesc { + required string name = 1; + optional LoDTesnorDesc lod_tensor = 2; // +} ``` + +## Definition of LodTensorDesc + +```proto message LoDTensorDesc { enum Type { + BOOL = 0; INT16 = 1; INT32 = 2; INT64 = 3; FP16 = 4; FP32 = 5; - DOUBLE = 6 - BOOL = 7; + FP64 = 6 } - Type element_type = 1; - repeated int dims = 2; // [UNK, UNK, 6000] is saved as [-1, -1, 6000] + Type data_type = 1; + repeated int dims = 2; // [UNK, 6000] is saved as [-1, 6000] optional int lod_level [default=0] = 3; - repeated int32 int16_val = 4 [packed = true]; // INT16 - repeated int32 int32_val = 5 [packed = true]; // INT32 - repeated int64 int64_val = 6 [packed = true]; // INT64 - repeated float float_val = 7 [packed = true]; // FP32 - repeated double double_val = 8 [packed = true]; // DOUBLE - repeated bool bool_val = 9 [packed = true]; // BOOL -} - -message VarDesc { - enum Type { - INT = 0; - FLOAT = 1; - STRING = 2; - INTS = 3; - FLOATS = 4; - STRINGS = 5; - LOD_TENSOR = 6; - } - - message Value { - optional int32 i = 1; - optional float f = 2; - optional string s = 3; - repeated int32 ints = 4; - repeated float floats = 5; - repeated string strings = 6; - optional LodTesnorDesc lod_tensor = 7; // when type==LOD_TENSOR - } - - required string name = 1; - required Type type = 2; - required Value value = 3; } - ``` ## Definition of Variable in Python -There is a class `Variable` in python to help create Variable. +In Python API, layer will take Variable as Input, and return Variable as Output. ```python -class Variable(object): - def __init__(self, - name=None, - data_type=None, - shape=None, - value=None, - trainable=True): +image = Variable() +# fc1 and fc2 are both Variable +fc1 = layer.fc(input=image, output_size=10) +fc2 = layer.fc(input=fc1, output_size=20) ``` -create a variable with a tensor value. +There should be a class `Variable` in python to help create and manage Variable. ```python -a = Variable("X", shape=[784, 10], data_type=pd.INT32, value=0) +import VarDesc +import LoDTensorDesc +import framework + +class Variable(object): + def __init__(self, name, dims, type): + self._name = name + self.op = None + tensor_desc = LoDTensorDesc(data_type=type, dims=dims) + _var_desc = VarDesc(name=name, lod_tensor=tensor_desc) + self._var = framework.CreateVar(_var_desc) + + def dims(self): + return self._var.dims() + + def data_type(self): + return self._var.data_type() ``` -or create a Variable with a string value +Then we can use this Variable to create an fc layer in Python. ```python -a = Variable("X", data_type=pd.STRING, value="aa") +import paddle as pd + +def flatten_size(X, num_flatten_dims): + prod = 1 # of last num_flatten_dims + for i in xrange(num_flatten_dims): + prod = prod * X.dims[-i-1] + return prod + +def layer.fc(X, output_size, num_flatten_dims): + W = Var(type=FP32, dims=[flatten_size(X, num_flatten_dims), output_size]) + b = Variable(type=FP32, dims=[output_size]) + out = Variable(type=FP32) + y = operator.fc(X, W, b, output=out) # fc will put fc op input into out + pd.InferShape(y) + return out + +x = var(dim=[-1, 640, 480]) +y = layer.fc(x, output_size=100) +z = layer.fc(y, output_size=200) + +paddle.train(z, ...) +print(y) ``` From 2695d96fb1e04bb2d0266e5fcf8c5833ad57509d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 4 Sep 2017 08:59:19 -0700 Subject: [PATCH 42/71] typo --- doc/design/var_desc.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md index f39daa125f..5286f0a482 100644 --- a/doc/design/var_desc.md +++ b/doc/design/var_desc.md @@ -4,7 +4,7 @@ PaddlePaddle divides the description of neural network computation graph into tw PaddlePaddle use proto message to describe compile time graph for 1. Computation graph should be able to be saved to a file. -1. In distributed trianing, graph will be serialized and send to multiple workers. +1. In distributed training, the graph will be serialized and send to multiple workers. The computation graph is constructed by Data Node and Operation Node. The concept to represent them is in the table below. @@ -40,7 +40,7 @@ message LoDTensorDesc { } Type data_type = 1; - repeated int dims = 2; // [UNK, 6000] is saved as [-1, 6000] + repeated int dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] optional int lod_level [default=0] = 3; } ``` @@ -78,7 +78,7 @@ class Variable(object): return self._var.data_type() ``` -Then we can use this Variable to create an fc layer in Python. +Then we can use this Variable to create a fc layer in Python. ```python import paddle as pd From 87ee69498507ca6b731a1b66de59c1eff164e9a9 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 4 Sep 2017 09:07:42 -0700 Subject: [PATCH 43/71] complete the demo code --- doc/design/var_desc.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md index 5286f0a482..3dc25c23c0 100644 --- a/doc/design/var_desc.md +++ b/doc/design/var_desc.md @@ -50,7 +50,7 @@ message LoDTensorDesc { In Python API, layer will take Variable as Input, and return Variable as Output. ```python -image = Variable() +image = Variable(dims=[-1, 640, 480]) # fc1 and fc2 are both Variable fc1 = layer.fc(input=image, output_size=10) fc2 = layer.fc(input=fc1, output_size=20) @@ -90,14 +90,14 @@ def flatten_size(X, num_flatten_dims): return prod def layer.fc(X, output_size, num_flatten_dims): - W = Var(type=FP32, dims=[flatten_size(X, num_flatten_dims), output_size]) + W = Variable(type=FP32, dims=[flatten_size(X, num_flatten_dims), output_size]) b = Variable(type=FP32, dims=[output_size]) out = Variable(type=FP32) y = operator.fc(X, W, b, output=out) # fc will put fc op input into out pd.InferShape(y) return out -x = var(dim=[-1, 640, 480]) +x = Variable(dims=[-1, 640, 480]) y = layer.fc(x, output_size=100) z = layer.fc(y, output_size=200) From 2ea6f478419566e14f0b0dea6b881fc8536884b0 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 4 Sep 2017 11:18:59 -0700 Subject: [PATCH 44/71] update the design of variable --- doc/design/var_desc.md | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md index 3dc25c23c0..db6ea2b2e4 100644 --- a/doc/design/var_desc.md +++ b/doc/design/var_desc.md @@ -47,7 +47,7 @@ message LoDTensorDesc { ## Definition of Variable in Python -In Python API, layer will take Variable as Input, and return Variable as Output. +In Python API, layer will take Variable as Input, and return Variable as Output. There should be a class `Variable` in python to help create and manage Variable. ```python image = Variable(dims=[-1, 640, 480]) @@ -55,27 +55,43 @@ image = Variable(dims=[-1, 640, 480]) fc1 = layer.fc(input=image, output_size=10) fc2 = layer.fc(input=fc1, output_size=20) ``` - -There should be a class `Variable` in python to help create and manage Variable. +### what should class `Variable` Have +1. `name`.a name of string type is used to mark the value of the Variable. +1. `initializer`. Since our Tensor does not have value. we will always use some Operator to fullfill it when run. So we should have a inialize method to help add the init operator. +1. `operator`. Variable should record which operator produce itself. The reaon is: + - we use pd.eval(targets=[var1, var2]) to run the related ops to get the value of var1 and var2. var.op is used to trace the dependency of the current variable. ```python import VarDesc import LoDTensorDesc import framework +def AddInitialOperator(variable, initializer): + # add an initialize Operator to graph to init this Variable + class Variable(object): - def __init__(self, name, dims, type): + def __init__(self, name, dims, type, initializer): + self._graph = get_default_graph() self._name = name self.op = None + tensor_desc = LoDTensorDesc(data_type=type, dims=dims) _var_desc = VarDesc(name=name, lod_tensor=tensor_desc) self._var = framework.CreateVar(_var_desc) + self._graph.add_var(self) + + # add initial op according to initializer + if initializer is not None: + AddInitialOperator(self, initializer) def dims(self): return self._var.dims() def data_type(self): return self._var.data_type() + + def to_proto(self): + pass ``` Then we can use this Variable to create a fc layer in Python. @@ -90,8 +106,8 @@ def flatten_size(X, num_flatten_dims): return prod def layer.fc(X, output_size, num_flatten_dims): - W = Variable(type=FP32, dims=[flatten_size(X, num_flatten_dims), output_size]) - b = Variable(type=FP32, dims=[output_size]) + W = Variable(pd.random_uniform(), type=FP32, dims=[flatten_size(X, num_flatten_dims), output_size]) + b = Variable(pd.random_uniform(), type=FP32, dims=[output_size]) out = Variable(type=FP32) y = operator.fc(X, W, b, output=out) # fc will put fc op input into out pd.InferShape(y) From 29fa887475b724cabaee9c69265d9e5e6db0076c Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 4 Sep 2017 14:57:44 -0700 Subject: [PATCH 45/71] Add initialization operators --- doc/design/graph.md | 4 +++- .../images/graph_construction_example.dot | 4 ++++ .../images/graph_construction_example_all.png | Bin 55421 -> 59679 bytes ..._construction_example_forward_backward.png | Bin 47228 -> 51389 bytes ...raph_construction_example_forward_only.png | Bin 29192 -> 32270 bytes 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/design/graph.md b/doc/design/graph.md index 87f696f90f..e59d04e1f5 100644 --- a/doc/design/graph.md +++ b/doc/design/graph.md @@ -25,7 +25,9 @@ The first four lines of above program build the forward part of the graph. ![](images/graph_construction_example_forward_only.png) -In particular, the first line `x = layer.data("images")` creates variable x and a Feed operator that copies a column from the minibatch to x. `y = layer.fc(x)` creates not only the FC operator and output variable y, but also two parameters, W and b. +In particular, the first line `x = layer.data("images")` creates variable x and a Feed operator that copies a column from the minibatch to x. `y = layer.fc(x)` creates not only the FC operator and output variable y, but also two parameters, W and b, and the initialization operators. + +Initialization operators are kind of "run-once" operators -- the `Run` method increments a class data member counter so to run at most once. By doing so, a parameter wouldn't be initialized repeatedly, say, in every minibatch. In this example, all operators are created as `OpDesc` protobuf messages, and all variables are `VarDesc`. These protobuf messages are saved in a `BlockDesc` protobuf message. diff --git a/doc/design/images/graph_construction_example.dot b/doc/design/images/graph_construction_example.dot index bedb6de011..8d1b673abf 100644 --- a/doc/design/images/graph_construction_example.dot +++ b/doc/design/images/graph_construction_example.dot @@ -2,6 +2,8 @@ digraph ImageClassificationGraph { ///////// The forward part ///////// FeedX [label="Feed", color=blue, shape=box]; FeedY [label="Feed", color=blue, shape=box]; + InitW [label="Init", color=blue, shape=diamond]; + Initb [label="Init", color=blue, shape=diamond]; FC [label="FC", color=blue, shape=box]; MSE [label="MSE", color=blue, shape=box]; @@ -14,6 +16,8 @@ digraph ImageClassificationGraph { FeedX -> x -> FC -> y -> MSE -> cost [color=blue]; FeedY -> l [color=blue]; + InitW -> W [color=blue]; + Initb -> b [color=blue]; W -> FC [color=blue]; b -> FC [color=blue]; l -> MSE [color=blue]; diff --git a/doc/design/images/graph_construction_example_all.png b/doc/design/images/graph_construction_example_all.png index 18d8330b60e12720bb993c8cf588d64ff8db1ea9..181187503472d15779b87284105841168b3945c4 100644 GIT binary patch literal 59679 zcmYg&WmuG5*ES8(AdS?ZAl)Gif>Kh_4MTT#cS@Ji(%s!LAl=>F-7$RQt-jCqYdGeZ zYhP=xz4Bb=CR9%5Ga51xG87aPnuNHh0u&TX1r!u?9})uOo6q6qkT0O1?G-+YK$VTW z*@uD>f|3vwR&;?rN`r4v6npF=gOgVj!op%`zZts@-o-(id7s?FDX6PC`RhQ&U`bQmDTZ03X6* z0hvheVZwa=eaI0awJZ%KUe*k}FHTgzl7qv678CmS5lwYTgY>`O{`aKNJBmzHi+8n& ziAw)|_UEB^-|3|Pd-A`pGP5y-5bHKfPBdehPx1DxheyGWIx#<^@KU_NVa1<*(p1!` zIDN$+rMKJ9o!`Au;e?T5Vz_uXg#4ZKpM29nJ|l?N^(JlinHvOi@;^mo1k|>EMu|Gj z32$9Zidi)X8+1U$Xty39ikybvLtBd#P=8W8l8}Z%ajF z&}kUR^|o2}nfe5COl_DSE~R5WlVZ&~X>fIxRtELwNPG)jb~TPEt-~f{7tfJO>hjr} z-oqNZp^dODR-Sndet1EGE0lO`s0-^m9tY8jTmsUbM|8Uy$}nQ#tnc%+3fH*v3H`-> zK46rO@?yyzz+66dF&O@I>J2DwrRtK*Ggrx1W_B!cR(t7<@TpT7@-cV%r(PjSQSw^U zqoa8M*Yew{ivfW1%KqD83p%TIPa8VRs?h$uae(s@#l_VmptuBJ*NY4Aej)K_Pe=Q` zZ=3U}ss9}uxgboTfo@DyHiA#tVODTh1&QZTCE&}CopK}n>Fq!V z_%iBpJ1qThGb$XF5O6WV3;1D8%J}}UjLwfYKbRET^^hS(Nd@=ADPN2r7>O&xO!muS z@q&ixVg5g}S44mbWvRdrq_2870N`tq+iiqeHIsR&T&e!dUUtS8W?0V-Pd%NJNfQl1 z$L$<%Q_zE}X+_@rse=1|oXO>v|Eai5TJ z8oi#CMSNSpvW9RlZ=9x>=<}*!t&74N_|~!?At1v?%d^tXCl{Jg%YL>UD zZ#`X#fufv?U`+#%Znelf$nB16WmqHh=Fzx9)-u~!q9G??Vp#g~pJB+6Ac=N$08GWY z-0s!^yvJKGSgEdg{V*sw#OyHd-q=UP^^iIxPbO%YGvLqDEx%(bcUaKgUeA}M`@g^= z)PfW+(BXBR(4X$k!=)O|#CQRI1k^79s(P@`m5Y{^kW$Cm+b>mpBs;(HhFLV1_BYsW z=cShI*0xSeld2Q_+lvs^dvsqpf;mDq(Gl+M#@?AU6xVc2xYP=FgIhf}{oyomV|PHGFrnSsCQJD#qGCPC@@d=6*D*mVekh zDga3AyAfIYlxRx$%G_N^*`1@Nu-WO@tediZ`#NRPBH)|%QC-IyIqU+gEb__#`G8Or(_nv)DwGAkk!g6w8+%M3c3} zER>(|?sZ|UiP=q}9XsBEWWvxqCYHgJ@WWZWr!8!|(k_C15iCa4!x5$vJ;|^b>_!9e zXA_Ln?<~u8uO`N{VG#(bm+U&X>9$J@60Si90B^HelA)2&n>iD9S< zF@hf_!iP$H8V%Z+NW|12?77+bu$vlZOSM!Ukk#97KF1uxwPNFC{doQQ{&Ljjl7^GR zvb;+?x3m>Gsji`zL8Fe|W%Im`jESzKLZs7uppkpkl;2|JH76v}gjuoGl$oY*`b-cfS?Ks@DZK580AsCR}kJbJWgA8XZ&n zQx$zxKVSyq_+f>u`KT(Ky%M>ke#NcWVQ);>?fPJQmGpr6q}yp_9$uv3N+E{gqv+jR z55|XTTZpfpzt7KV4P&z0cV2NnZMExwMx1(kmUcX3@#uXy%;gt0qO4)5WIz0J(S^d) zZ$f-XSex!y@@;72u4gb)$Ln5<)NPLqihJTb;LZo(bh%`$&Lf?P0Ff_#akHza)L@F{!@p zy)HibWe`qUy3BYz!UFBQKhcXIrz)hZ3p z4SR2|$}_wkej$+0|I&aet6MPBUcETgTW@&2SsYa7>_P;^EMay)7RoC4#mAG}CI^7#jW`6e@_80JD zxidJFI{j=!&D3Jplg#Tb%lTyIhjrNpV`@`iE5X#$x68o>Tad%yr)XuM)9P}vjyD>Y z^Y3gQ(bW1h5mb6OJEgT&a!NB6v}Ypp~9pbWOB|-+gtmyy!l{ zh}3EiYti{|eW+sZ8K$(Cw`y6x?8LFlxn%RvU#3*nX|`Kx_9Wmge*lb1oKc06^Xk7E z5fckWFUmR6M1UYVSEORiZ$w@YTZL}x%A;i?a^R$eS`uZ!FAJe!LXu9R+NiAGyreQn zB~SX$>}{!VPqpo9KfU5wa!5K+My8+0$8c+1PZU|CjDL<1WPQ`9UV`{t89~N_> zrtjW=ZiDEfml4jg1B=+!fb{vS8$&zeq3faR(5ha&1Ufe#JDMiJaJOr9+hspDiEkQa zaO*+B(<*I83)D-9IOH= znX6Nz)5)ZZyNn#^21!NuirAZ1hsH>)0vRD9aQCx!BXu-QUI8i37lTZ#5~-JZ(O+iN zEyGKi%ld~>Mlg>2F~S^<2tFO;=4Tz_Mrf%o(lyldW(2+pv@uVJIvW!fH_CRruGgU5 z?2D>z3lIJ!?-UNjL>A34ab1%+OBNX?j#4fu(s&_ED~ zCN?y!qGMTtgY*s_2#cLeSJu$w0ZA{~ii{(*)~hRPj%$zu;%Gju`{A6SB|!j?owNCF zG|GQ_HKIns&*pJw!vuTC)34gi=$a{bR-vqi~= z&dXwk5n-b(5=E1HR;l3OuLwQ*9s;ugj{ZfGU;a@D1;MC*$eJ_63p zb#{0}%7^MB1E7S^A|3xR?v9Vrz##Bl?apC&`^e*-^rJ#lwLovlfbfbt#QTgqk~ACk zkmF)O)I&`s>inGk#?ukVy7C!BolR@aa#0vR)O;rKFD$PH`C51`gBg4ss)hLj#sGw`yA&Rlx>>=_E ziA;XS85YH6KzpT`tNDXZ&EVy^H>kq#lO^1J)3cLJ^EG&^l3&%0E`3wR=eYSF>j#y$uxFL#4;5=>X*2p`11O>F4lc`%9Lzp;5}%})jRflWB#>Qp%D%Vxg(>sbl}-~f8Mp2-!* z4M(h%SUx=6T(vTE*I4R@y03M)xPhWCT*?PV`|(LLvw# zE~dWfr`1w_O|RK1I!;JT9JBi$jv<5zQ^{Q61zK+ArJDJa7mu%v*%^&QG8v((4la%H z%8qhhc9apkZF>|gYhRHFte>Zvu%KhKwJZ-aXJx4PC)C(`%#Z}%Yph;tdNX+Aw0Dpu zdt`#_)5NFn$kZSVXHx7D?w`uCx9u;urVH#BC3KvykOeGx7#jX90Ko&^c6In|`Jsee z9Ur2$o{>m#J9(658)3rFGapx7&j4E8LLK%2oOmt$2`XVVX1HQj(@A-CD%Z7zGyj#I zzgx@#ML2Aru4l(WJw738PvZ)p0%N@FW?5zHu>6^K;4zg)X{+F;Xpt{P&{l9h-lOLu~PxrZKW8`2nN{q2xb1PBf z4J?mK1rjtuSS})&LpmH|qv%%6q^M0?V3J}3C z!piXcg2$e7w~i6;Ohty$Gee1C((Bzb31@h-=?1=sI5Li#WO>5x6&jHaF8{Z}7$%&; zuZ{;?lSxoxGZB{z+-eSi-f>IyrS95l_ij850P)RQM8|iI*Mw}KLR5#nXxAjKC?W_X zQGhjb^}h}t3Ql3T9#)Oj$<@ z&h+r4Sw%*R=%WPN;^C5g8aqk7A-)d1-P1W>_y-pPj^E+kaXny=iOhpxqG3lE?DH1ysT%N3Rt(HY4NYT8s_>Z|l6e)*`O8-#)Uc}?YlY>2UMi?su z5!z69vs2Ap{!c@TMCMzhs~giHvg)}jaNQ1ha4M>!yq#y00hJ=*UnlP?DUwM`FqfNX z_qy_+Q+c8|-|!0&^xF!TK=JTe(+L-oe~?mwzmO7hg{Ys|tPENs?QZR4Y#uZ;f$FsP zmyc2=gnySK3j6&dMIwIHgbvP{`)e>L!iTjYvhlk116memTvFB{Gk-y~Hl4oMifyPJFrTz=^MOcs>7FR0O%7h7F zA-#vyfD?BM|N9&*mK*|S!fex-qlj#7z=D_IPwFbXBE8+SLxIx#}{37=O)8){@ zp(8$*|5-y$4M~v%ws^dO_7Hw*g32GF-(PEGjelFl0DqxrmI_*5wB6n@#Uf(p3}47f z2Uf4hqFCb3K?n7t0hnrQLM9eAq=ssHm(Nvr}|1U$;J!mnaGLlQI# z>hxmd?~n>G=Hl4{@IL-YZkcc4io$$*Lr`|EgUKYjM9382h|pjLIyk@Q&4m3Y2_(2f zq?Dp7q)<_6D^@SW#Pab~dY;9Ep3eDuDEeZVMFgsfNaP$_cHDd3V&r5nM1gcO384;Kn~wolg;annTy*T+4CH5^c&{aa@Jed{0EM|vOgwZf% z$lF%><3IhP5HpfyziI}IiDSwk)WxvZD1ZIWZE3|a4IwV(_Pq`;1Y;-j?g{&8l!v5<3bWTPQUrTwGxqXV;R(S)N~{&^{zP_4 zD}x2^z9B+SA(AI_@VvSD?XZ6;q_L8VFwKS@aa{`3Mjl7I)#n#b2)pUV%fx5_Ce&_m zIbI5XGbdt(Y11KNjQmft(aYn2*P?iu%#W$yFm&j4hIrpdbl?}YKYArshb!{)4U~Ua zN9?Qdx{QRJKVVjin$@8I0_g+3=7mWAp!y9j7MEZ%WPWebc+2-#(vQLvG9>8D0PrB= zYq){r^e{-&v;z^=?p|qJLHv-IAN1Y-uedcxihQu{b9xX+p2lx#l$sznKf?*TqtSj@ zE$7QX%L9_wi|d*%fT`_0tGP0`<@O+#FM`thqkJdtkD+HG1q2H99(LTE zqz$*gpvLkOP-nQNjm}gVk5-URI)n-9&T5H$=Vb7pj=l7}W$=j7Mk9j+t|9?UfwJ$} zf2HyGElu-&9)Q?NHCfHiciH3ePG~w+RJ|5^-jo_Er$jh_k@a|{5;&&BkN*y0!kW#UVdb|?b$6f7%RBdfihyzNN3mEhaaU<;Vl zjMGw%SUKOAYIVQ4ubsjEaNcud|LTP(+1Q_JT|XiE*>yMcjg8PU;(XDH38kU-A>j3M z3OpX>yg5_fdqYWmJ?;7>g_PMnjgCSaO{bJcgTn8PUn@7l%HXbI#PF~ngPpAq%8mC4 zJ6V5J63q@nSF*NaY!Q^4#I`>vYwpcrt?6!gd|Kl%NMolSHhpGy)LP%qIymKtPBO$$ zSKzKa+T-5=enc$BDtHj$yZcEwBH{-xN_F`?g*SpB%g}j|;j+X_<&VP^7{TFbtM+g; z7(=;A4(zX%D+`|HkQ4VZIQy!j%^1v940ETyyviKPnJ zJe}Cr_<6Tcg3_lr$7AqJkWG8aDq{y#?5!1gTf@G>kk=twB3np zU5<)D5XXHQYsi3bMkD;FI;#I-aCh*_Xp$e4FN#U9w3cdJaN9jq+DvZllzhhDB=H{2 zmj#&0AMnBsgYE!4ZCezKYZ@Ie)Wx#wu7EkAj0$-=4~{!pXALjVY_Jk}a69k)=Bob~ z@1ESB|HWRKF{XLNweOkY!;&s6HDms`qKL+wA-uN@bb%^XtL9w=%*StASGaD*l*rF2MSZO(Or-!c-^COnWl? ze{d~+D31dYThf5^VI-I6 zw-2AT;$+|Wd*95Dv)=U>AUslhOqF4K_&}qLme(e~>ZxMTdYvje$EYQ`;w^YBHUsmw z0_q8XYMz+O!E`N_C5s`p3ZdDYs47H7xM{t8v)KdZaa1Y8O5~`BRM-DQ zM77<&z%|F?e0?CCgCuUqP#yovVcri>C@!*f$F>g&)%(-@Qy`UE*CkW~_KCx8npl&Q1Zft)UXR0iG8$uos-kDVVBl80S_Z*nwA|)p`*?Uo&Tp;|5Hie>z%8eDg*W(i z0yiGC;LbbFJ9;19z3uHv~fg1$YP-H>?{6zmTvhqpUPQ37@#Cj!4O=|>3flBX!6a47bK z8THlzM~+dLxTpPUf*J-HoRiyK5zlXYRyZ?s_eQ&0+6e~k{rTOb!J-{cLq$Y8iE}eo z@H)s;O3st2i?q%AarO1jq5EAFcC_ZZBs(c?`GPc}1Wda$c&z#r26L zZ~P5&t0cOw!kMv!e!7qr)An6{oUfRzOE5`lI9Xk!zaJKP??(W)N%Z1Iu&H0?mVF>@ zn7gOi`M4Z&2eU~DA5zLpeL0e}24kmUwG2rKW)QdQd^%=2T7yk?SVNP`r@(kX5XO*uG_k9kxAkua2dAv&v}E zaydeFY3EbR0#Fl%2vitM=F|mn@p771#bhgIRqkv7+%YDoiUNE#qFntekq=!j{Lb}L z7fkbY*Ch&FmAmZq)<=mL#m}~Y(b!Lj3!%(fSHoPOlBzHkx>AFoB%K+C#id5jl2@2w>s+$~gSpaOeeWEInJ+0t0&ElF1_YB*&$6A$Tqd-1#w@{PNh|G+Vqr z6ioqQuO#&w{XpHoY29Wz#B)e6+0w{DorZ%BPWkYB_mgEe-F&DErP*1pCuFF&cD1|_ zlyEsAWj?AjS?~SAv6B@XF>gkvGCETlIq&U#n-0E^;^TNe<^raBOeA+>aOGKQ%yMxq z^d~o;uHXnYTrpdgDrRnZ}||Md=3($?hu3SVm~8G?ZNpBZNmc6YqtVlj;% z1P&#r5LIs{S_v7f9gTM<(3Qy$Ik&=I{NNn!tbi1juFBf;$&<2NJGHYB(0SZ@hvkut z+jzjb#gXLuT`nVE_$59W!lE=H8ZfLvZ^y*ytmb4g#|)AYX}&az(dBTY>B3a3kc|V$DzD;e+0V7?n zm4GgUlb>3+qXf)`R@{jvvjc%9`}9gf!k*wpmSEC@qJi%l0k0!mU!J#!gIHccf33GV znTAYglMzXojYd+p2Jwtu5q$vf>*=PXa?ST78zyleeBww-cLgQ);ya_9RRZnHgI*p- z1=DhHO^>iX!?9sFC@Z`E{1lksjZM?`SjK|FXfoveqQT{SHfKuBC8y=-xO^y|F1!%T zo_M>tn{l`^1)(DqT}qf83dD@CUHO8amruF?;RN;|LJ+xSSu9xE7~BTtPD zh|U8svx~~UCAeWuMxX)}MT`Ny`LB18_u$7Nx`bO#5Ndv%Fu%P@A#>z%b2Ed%6k=>< zl%SJ>ZO)}IO6v0sTb>p#uxI(AqlEkezN0yrYDn4IB7D>%%pkSR-^@cE%? zh!w>5Wb>vmC?j}-Odtc{=k1t_#-^3a%9DW^GpvbB>>qBP3)k*@t=K`R(iKrd&lk05(Q* ziLGpAA8v+Qu~)6@L>KD0jGnI#Su2^n91IjQ?FifNBvrj+9*;H;l8bM{6(ik#Dqz(P z2zk~D&8Fb8N09Oerod75-ZRvRew}9?!P#!>;!n}(fAwSoxIvnp#mKC(@pA21n>_oJBGK`7j zVLgnL%$e13=6rLMp52xj@ZdnxKm&G^lS=m$YYMjQm;XFXlbR3iL7y;8nr_9rO<`)Y zK_zXcWmpY!W=HoE!ZtAzrr#T8Yt9=MZ zY`TQkrH-F>vJpNRVc}xfe638~DiL0;-7DBq$ME4@os+6(m8Uiha>zTCx)4P99;1rh zN=^+}8uB*+YmP>hy%EB4+Q(aL;YjwFCGWn3dCN0Izk%hoN+s^Ld!$>OwovIn%}XG# z9y=dg?%K%(@(PIel1DoJ>f2l2O#GHQp;b!sK4uE2nG{|>n#?Af7aC1Oc~2?+5eQht ze7(aWq3{$Z>aD$E5jpniq=6DXP(mK70?LT|BMi2|@Ed#il*KVC6V#z%42C)e7iysb z|L^LtId}MhUUsPTo(Y+>jVLw|IcK*iPWlnTAsy&)RF!N&D<)+!);s*B_vqlhH*s1- zZ-lyA@;Yr8n+-bKLk95PUbmfzQ1{A)ABrQ0+Vtkz0N*A%p^~h+Ql~7?fq!kB*7Eox z57ALzB=})*-y3jcwq7ZmT@#E{t%-(QT}`qWBv#R5%3wYDbRc@aq)D&iykwut2KjFH z>YDqRd`t-t;aDu*)5%NIZL!@K?Vp$SgYMy`K2AJi#@3At?~?XA|7mIGxni^umv_os zFvz5ng!MVnA09zZE{k6c|&0SnpMC`;VIG-3jB+?fMC2jq(A8(pm)|X?8P7rHDbEO78sSqXgx=-K`Mi4WDU(yC2@0v`zN`Q89RbxQ zP9(6=6P1Lsg&~vIYr!;&T8mk@ceOJP2do{B571(tIS%%-HtrB}$ZT@rFzQAFIy1_2 z5TybXt8Go8HXLQJGNGZx%Y5=jA}KQDo=7sqHZo9Q!*d0|Tdc1_$uv$0JQb1kjzkHD zh5f6f_$-naX{4%&po7Yd(^IspL@EIZ&qviqm!>a)xHLByjc$_xErEKHtQ~OJt^?8L z?~|OQf6|h<%{KIf(}|XCWGu(pZk}$-yczvQa#+S(9iG?Diq&Dx&%?Es(<^G?)KQZ= zY^g6jT^@(Vhb$7^AbvjjsR`)`aftfWm=sf}MXiz(D2kto)%aWgio0=!>}5GTKF++o zY|P{7D5K$MI>3H6tmQ)T=Dz0X0qhn{%Xh&3aZte0k$%c{`q(mupOt4&J=S<$?zWzhZzn<>#4ny4}S-2;o^) z@VUmKAF}(8X zsCsnZYJalz6lK2asA`aLdlnd<7mSNx(rP4X)n@ym~zZjrd0Q z63hPO`PSIz^@FoVboS_v!#zSSf`%<|%4^6(e2jGvl*o9@YG<%BztR4phbuQ)^|+x{cNSAeVgy2o1CImdW3eWt+}m(6q{XM=61(f-Lr z#S0ah?NxWeN-NuF&u!XRi>|sUEulPRzyP8doV>e&!jr!SE8`PCe2>CcBu z&4lezn!mI{hRahNk)n<7b*LhVcfAst9<=AIFtokyw;_C%Lsg~d4~dpQZ7j911GTH; zMHL9Gk|TxRy^h^_Me{pk7{XfpqETmInj25DtxkuxQLUIQa3uF|eKsjkMUw8VZ7|G1 zC;P6ukFiKrQDDw9)XiMx9K))!;Vdbb)NWCs`GiO_nsv0y0CD97;%VBnPPUpxX4`_` z%_u=PUdiqJ_!yDX70B2-G=enEmi4LpDq-0fLvXo4q2;iw8SpDi#d(JQoLvv_OIJJF zw8I-~F#foH)vIbfi1?G*G(xTx8CaQ@NxO=7SpR;I{(`VoGmrK#4*^`EAaoikZB*zh zf#d0p8Q1f{Y2*dmyl0@_%%VeFW6d;Cq~zMW2ch{Ub&O?wtj6Zq2%&6QRaS^~B+W}a z4ukL#IS)NPdUw3O)XrM`A(8sCO#-ty&ti~fTFj?3E5s?mob~IId1k@oJM<@hPv*Mo z-N;@?WUQ{xdpYL%E$^Uc0||)=lhsw)=mo8u*!0oIF?I6~p|Yw?j?jdjw`s4*&t((= zku`Iv{$)0B0qSc+=AyjHxoF;a{pps=@|Gb4NIds(!=3{J8n7k2keB-g`YfZZxiBVcqC5R9<5{nhf<%SJ48z2OC^2NCYNAfNmPqPUhDiDI~`6WI~T;w2qj6q?< zgm8@au5{yiUW|Fi_#vEN(iUk1trB^e*)lCl&zJbVlHLOdzx%NDcKe7Ye$vTyjp?+V z$?l+V8;9*|UZuqGs6hUwfiFB>#lv?Qz3Svw_#~d`&x?|{kg>swizhi7Y;)kj{Fh7> zDQ`Xb;e6R6+E$B5u9f<0{U?3tkEb_~C|6kri|FE6oTk*2PYaOGD-{yFOKULWz_4fj#+D84!hHc^C34>_Khw=x6+(+ zY!3?#ul4S<q+=n*A<@kSA3jZcg8!x1yahYw;`_*eV@$CgS?ZsvyNZoPO?D|0aE^f* z!z?O#C0smjm@;t2b~j}edJN&auE3P(w0n_KqQE{tY|b?W*Pqw2(+_ri(sYKcn=*<| zAO4e!eoZp&cqO+13+zb>%C`!$>aAFSeB{@77RjgXF7jUv$m?Xhu!A}2g)hj=;(B(Y zf$#)z{SZp|E}FLV+Y*E*-jjxk*xQM$qsE9Los6Z+EF(>QePK&Xhq9in(qMo8*3X`f z&2c1y-`2XHuvIb^k;`N9%!L#lM#I4_8CWfzNLu2 z7?BFlB6py5t__P|PB{W~`T%U3Uxk?}D{Pawg~kypK@n=~`WzzjL_2r4=`-?%*T5&5)BhG1|VeZhx-Rx}GyM z78ms__(*l{IK-dfDyy)1tj{sQ=_4V6-5GG%>d5oxbG_Jm;A(!T5+Q;3i(W(6!j`fW zEqr1M-6{RbWM`el+pgPqdT5)HC zWQs9&$9L*SCqm(CS{UaQ*Zh3S9qU5YQ2X@Kj*2P7_vUR*6$kP$5_gw+Jx*_e#`ip^ zzc9-eV0u6Ol(tldp{=^hm$DJPx!s0b56W!auY|RpmNl)!&lallAtzw;!h&y zPN6cB6z_4m{N8ZgpZP$5pW|}}xBEQ$HvRJYG6q3J{EqdN_cSI?~fV2l$d&zx>|g$ z59Q*M@`U|4;x-vc&_M2V-t6pxv#6pL+kB1x6oNOp*J6Z?7BZs#2Kw_D?3ST1E>FO{E;c5QDfOmn5yem20ayo|wlD>63 z6J1gLRZ&N4vXjSMmn61PT$!?%Vy;j1>XFh^ujnc2^2b|yCXqdn zO#uj9De>iRQf}GrcnLOUr~ntoQ#*yZL5V00z?zUOB~09K-b=9;@bfl=rhbXKi7xnL zBb)G+ZTLSWc)pW#ye>E!?a7aA$L|t!LP*5d+vu_2_N-TL<1IZZUI**b!5Dg8!FFel z3KSm_MetN*M%no>);>8TOT<-Wh5^m?407G^-1mEus(cNR4@*4g7Fu9t#J-wlb2MhLw+s zfzwiERVdF=km#d528UgO@q&~ewYISFyaK9YaJF>`Dk(@!J{EW_F(uHx$gdI#=#N|w9P{1Idr8>z$S|kl%m4EF20t?oa zChM@lkx(ScDYsp>#wF3Rn1~j2H^!!0z&>S1eJ73Pc7=(c#{5Y*;2bYjb?G{*DZmPx z1p%wmE+d-=;&gM6a`#`GE+F~ZP04?Dy`Ch(+{tL%;WC533Y;rEtkALZcDX5Duxpc4UR zTEfGdzMIZe8P!LZoKt?3bmm`ByV@p)+%(XG>C*FW*qSHPki(h56S5 z#tuxJF^{-wqbrqnL2fY2Gu4p01C~H79m3|;3@$SB9+MJdsMF>8Pr4x1pK7<*7vI^B zMqF__f!uyjGfFwK*L(iXY8X~Y7giBFD7hO(7hyWf$g9Y) zSw})S9LZ(0{QWx_7@$w8^YtsE%fRzZ@jc-HKHW9Cbf23q5}(e&uQ*vwqte={4DkA~ z#@fIC1#oW+MCt|40q1u&pK32hKo)JTsUPwLnVP?`6I)=)ewg%YPasc$lxw`{lCyDA zsdpOalgVOuEL5Z1aaDiSa$L`+__&iA&bc6T_P4L~tjgh4N}EO& zNJQzJ*p2ej^ox_0D=HO?fJX=^v0`gW_K^t-#Fv9i=N(yh#+#aOU|9WGU$OkI16_?Y zyh^J?GY)z=0}w~Hl6(PeEk=gkph@>FEz_9czZSugRlUm};YBkSKttVfOTyLofo zG*<>;oh$Q@*F(VNFn!0*{Jri7-nm^!5Sr3rp12TVm!sB0V_EaJklK5Z)u@

bg)u5o6A_QRsi$vk`&jynYFs@TZHCz(jz|3>?JfWo0?Dog77^lV_ z2|SIdb`t|*^*7?W$bb{HcK zdVlc<;P2x4HZ^$Jz0{zFP$FBOLTBV`r&*q~GuPk(zYpu=V@%HrBNT@ zC`Y8G57~HF;{Xpt*Qembh2zbk@3Z(5t&!p%FjwX$|E1Z0=!)0O1=5gGk`iV z4>k;{!az>OxB4~VP_L?JKpnhn)6c505(3*S2yGbCb%17}L%(SSt^{{p6JGs+aVnt+ zESr>mEXJ%Ul1L|h7wlr4itlj7{dO+Bh_h-IgMHQUVfD5v^HL+iSK)LK>Tbr5$ymxr zDQ5khYe_V>ZaAu^w)5200U&ct<0SR^I0DhN0G{uf5R~%lxdi_>b4`L|Z!I3Bd@5}m zy^-1fUTDE!iO1~d(wz)5RNW~@W+YN{C?xB4R4@T~)HAH&Hme}* zYhxQ_h4PPeLaWu;hHcJj;vpa(u#;vG=?4(#*&6r zi~mG=@Cn{v!}9~Y^cQ_quPedD-^jN@)Vzp`o3{M!5E*_G_G|@)pd-+>Qgdf0imY$C z4c_nCte?Q8G1!zYRgRG+Ywb7_%W~3qN#{Iad)5P@9A(;NY3liwRF7&g^TvHd3mP0w zf;!&iYi@80CMBIRS#CwZ-hE`!YFeie|LqzXcG@wo+C`hd(bMiaVdCi}5^uv3A@5O+ zqmvcXcL=QJg){09HLWE=#bWY}Z@3f_h7Hy^O(oOJhL77l@R@KU6|LVV5zW5i3SP^K zmp)aPs?l^ln@GT63W6>=R*ze2;AERI(w?z>**WcT236p_KHJg7h}`P=I}{|tyf+HP zvwRE8ghn*&#SLNJQ^eIqb_rk$_i85@D*IkD#hYb+3^EaIkCNi>{WV7)7-y)wYZ-YX zf={PA)LiEB++#FfK^>uS9=DC~lV1^qBu<`s3{(#omH^+zf^T2XxhhMD?GEYu^R7^j zn~dG!aq&AmfL8m20eF&0977`f1AVNzel(7UfM1B2Q`UwnTR+*>#hz9YiO!8d(VXeB z?A2kfLy41gz&g-)^iI$X-|5X%#~Gs-DiotAdOPn$am~;)fj*>HDt$s-!|pESL9g=pLQMOWQaJaRKRB6(ez)bN*d{CP@yzP* z2rTLaCdTNP$+%V@h-6nB^uMC`CZ4TduMWy(RCAMUM6F+oo6Y z1Da_vvX_x#A?VHD_vJxp^$Y#azNzvr$?)?5yj1FUfEi77#lr}Ey*1Pa^IxRPWooZ! zr$yo`JTiD6bK&nJ?7rIyK7)+@r!*L4>bRLg9hDz{4F@>{HG;?yh7`B5nmdSb1#|WN z&Pd)C)T60B>O%_reG9nmY*YFSqswVBKM|cMF@~E7`r4iz1K-*8{Xk*^M`eKYBAuCt zt9d5+L;LZ&>7TYkb;kPnku0Hy64D9=unth-lE+_m=(Y@=l$=5zgG+8kY0 ztqwdJG2W9QrDU+EEQ_it#i=jGr(S)uR8$B6*dm_EW(o073V#K%eVm2@RtiqJ6t32>q_(HJNhb&;b?8|X<=r_vgf#B z&EPsJiFUPtsb7_jFBSjjr83kMBhu*pG0~gn+p61PT3|AIb6gYOPI|(pMH?f|dKAPu zt*OkDq+MYTXa+zCRryhlk>WUWZP7P!y;@z&oBipPG8~Qy6OSpuiJ-DDO1IisBiT;^ z`&k@?pBenUO^1Y%e}YDH%J$ARwKG|Lpjd!Vy!V%AT1Y0!TUl*Rw>7C(Ab&m5wE0*3 zM?NO73}uZ_>{M`&xPc~qv26@QV#A;-61cw> zc-@%&&o@Ds5c(9o_ognm5uPLm`eOP+gfGt+Yu_rrl`iiK+B$4ph%E2>H}bQF zLPDI63NVG}RiK`9cVV@L_`=?jJ6H+ts06~4h=LB~d<6zE#2FoaD%cIM+onx!#GEXn7tkzU2=Bx4o2-F0{4JNeY7yQ-rd*%jC%VcIgL}S2^?2av zTWsUd4~kO$xF+zBx$_c#^Dv1CFSk+b>1F(;mKJep)iAmF|`k+l9TVDhLh-=xn| zKCE5C%PZtGK)ew5r`ont4ke1x76Ck^u`tRd4Ll*4l4lb0)KBP2yhK!(hOjh4?v99Nnrp?1 zWQfmKLYZtYvNAbU_Z~NeJKhwG01>ir6U*(GG!}5~PR4D*l82PlDXh1#Io!_^n`Tkg z%*s?q!PDY+>b9GjPGB_ep{1^R=SXQZ)#B0bl07?=U(7UC4(WJ=jtQwsg*&Gv4o zX#*Hi=Z&qG&8ZVvH1?P6#L2oBS6VLGvF{J)%O<$7fGItVY1oVS>GA2Bn68cRX3L+r zvXi!E&7RmagJ`C;Q4D2ZlJ{~WGEo)F?SwLx#oBLn+{GE-;3&#sKJOw|e%m|B2eM7$ zE-vm9vx^1ZY`B*BhlbBrJA#|~e7J`u0UeF9W|z&sEb{7IG$5rN3GN81|G zb{jPmP8a5Qp4N!v{Z7iD=ks4w8+d0F15TUAMe5_jo$i(~u+7_xWL<-xHx9#fJoD|b?ZW}j+3S}@;S;oEME=;rMKi=PJJ)G<3g;>_) zjB3Y)0GFs!(_KE64D4l_;@(xSFyQP%%wsRUso;Jsd-ZrWX62=a0yQ$QGU|Z-yzVN! zJ`jgf@1&73QU(R*N1;6*le3b6RHU=0Z-&Wj@OI+a+np?9rC9;ng=DO@3#R(-{xKzm zw@iLW>PZgCfXB<4(Y3y0cD+SKPn}6b78G?;{MhZ@buBEc4~)<{g|N@AIfkmbWgr6c zU+Af_GYlXBm8{{k<1mh=~gXek{xVgHL}d^ExAa!T!MScU@pjUm9ZP-jJ1GbK>LNWbMn z84i;|dJv{kF66u>zqjSD?EwKwa3p&ocwxKjw{C1^au`({v&Fd!r?n4{V_raMe$Z&V zqC8lxTkF*t?Kyvc#E+uxBa;O}%%2?69Z6B@{eO|Coax)B-|RzSeP9Da19_nYv7)7@ zQtw9TqKXT&v)u}5kz?F!yDxd)wp9Gf_!?J6m?6Fm^4nds@m01TkVT_4Hxa&l3uYK3 z=;YAfzAQ?<7w320P~Tg3!AFSJ2axeiym3UB=G{AUYm!kwzPvDSKJgiqAWBG76nMAd6jc5pg{>8X+I?{3e~Y zci=w;k)n#KLI2Uqw$B3ViGPZMb@HKBzu``7S*bmsmZ0v#*4cTVG$c;m?1DuJH`Z?u z&ioHztbqOk=CCW-jU_~)J6j*pO^~br;nR_^>Pv9K*p2H3F(l=b(_$#VrXu^>z3Kfn z9jRM}CkAd3F^nT({=B~qhX|`kbQ382{=yy#2~Vn%G<6Kva%9+r58jw1FHY?P@t9j0 z>ns2S9`6QdLM$o%FM#G_-+^JZTeI^wq`yssHc1MEMxZG*JU~6bc6Zir$a zqem#<)9s1USNHNfwWy!h_-f{JR)3D5{z$pHEJ!#49goBqbcYLziMAC+`YVow^FFR} z@Qq^gt-gs|%7Aza`2bTS47SG>+Ls-f&Ssy3A$Yp{;g%ifZu^B*DKOx;O*kgUcg*jI0g;JiNv62z=47yRpsTtk5 z`;Zdwo!hjL1GcV+uS}Ii$1cj+Jd3HoQ6nK{-7lJL2s!pp7D0MysU)7EG`~0tG$7RO zeK0kRtlAUBsUOtjsTsT64yI9FYJ23T7O3@jM=#m+1F?%+%$y0uzh!voG`l=ACe+zh7`)^rO zftOzaNL8Sth+M(|q=@;!SJU;2gO}bx_Y!Lw4L0Z*@z&W4uI2?A%mK}#v_bwQzMRT1 zdyYSTot;BeV)aS=qAZQI_Q-eIHUatv15{k_+);5}12w+RhMmwMsU|3Xbxd%y2;C7v zt#^=aB!RXF;BGVHh-|vi4-@qv64Y8{T-wQmVD)gbqProDf8+`hJKCDn*V6)}q_BZ5 z&!@HYcDl(E%D2k$qgnl--%iwXW@ABoJDIXGm>3A~@%Ur#e^j<%w|INVA_l8Ub2C8| zqX-!K&MU*}3?RY)F={QqFaONkF9PwinBoM>Aoi3jurh_vbj0bB`W1mF2$Tjh?u2yi zHoRSQ_X}P_Al$sE7*d|*NgY(r^NRC+N z6sr-!w{3s;j$T}lG8XtLD+U7I!f}&op6vG5;HFbovaWGC5LN>{8S5w0*xKik)Z_`i zr85V-z9p!HdZA{WPVrwh#zYJG^;k#w@%38k1&b{Az8`zlmw|pv+K#xm1UW8O_<=w4 zy;KC0%tHmdU-z7E`*gF&LH0g~fIcr` zJA-%Y%i&3Axoz773X1V|pf$9cqw4Hl2vOD|+zVNRMBlBZ$@J-Gl!IcA=d{1a-Bpi$ zO~C|lN^@xP6YuBmuY`VY^9$Mh&n|@YP@GyipslJDK0dfgyFvVkePn-BDww9a-g4NB zvMq5vnaB21%G!zXM%UIXuIN%Sh>&}m@U4L$XaeuHH;FxQ-{x##v}HnTGFRb}P-UdR zNRS+R3d%L5E>la*)eb4)8JSbwEen%7tc=CCFH17brxi6}3S+%@`~&zczl0B>*WZL- z$gnO_k(B+SIU$;bu5Yw4cV_6RlC49xc1=qV%A$p?$m58~(iCN2I|!!LJ*DEyH=~~r z;Ed4aANI-N8GSnqu(n?>aMLD!Iw%0S96WomXW=qG461D3ajSr92Qr-M!LCEzD^Gle z;-0oV3qA@B&%gM8n$U;V+*dnIe3#)-lz*y`L{fweN_EVE7zj{FrTcD}z7UM~=V|OMq++5^5bAGP;1wNg zU|iT@&7_W%epX;_hjXp@Jr#F%)qcr|PWV?BWrXi_7=@4j2K{4AFSkNB&G8QvczUOH ziryA+C=Lc=?s5EbvR!nTyUPiz_ZU5{;~&S5lk3T~n3t;S5IR#M^|4Qh!WNt*)-hZH zaL#_I;zny-=1TX+o~x)DQQ#_?O1H7+V4^VF0l5=^RBJQ0Y}@l0S1NTG(0V(^bO|bC zQ^?y0+5fTAyY#4aKIOrihw{OgBb9u`KbN=1tume%3rnCe$SeB?%Kb48hl3fFmnSEz zRZ+H=8iATf%6BBS)7Iw8xzL}S07cjXOU&OOJC#VfBZ5NIY`wWr&Os)F(DL)n;fcuf zvD;LGk#`zhHo{e$b#)9>ZLH=;tWCf%$1kxCldCO{R_%!h&?4v=aFV;2^919*JikMUk!d-> ze7l;T%I#b)+K;l@D@JfkF-FY3T0;HwM!)9xbic@2=>3U;XnpcEtREcV`1IjlbO)2J z`y&fTdd%j5B2;9#A6n9BM|hC$Y)r?qdXv}fy(&zlRW2Vuz zXYy#d0pV&)x^J6tXf$sqEl<=KK0n$;|0?Qb;h>hripWekW?U7)@GO6XXP~$lu!~tK z;V@E1+sXI#AQj6;M0VSN#aniWvyv5|1QG>@_P4?!2B5ja^cj4UC7R15)5F=p|3{$! z#nRe51?}f1tV%_6lg|l#PtNpR0hQnZU>B_KRo#bfCeQ4Q!4MD;i9G{VhDe3m{0qi?4KGpEAX35KaH zph-&xAoq4;xNWl@>ED;%G9C#&dyq_^qkrG8ZgClYi&KEPMjqoL=2N0EAem#mQtL|T zT(O9aG7Pi~goyzok%H+m6?)m}{Rb*Z9H#?>8IS<%-0;yy zA@I=j#YR`YIvJ9!cs;B8rK8ThfrDk0|(xuJe2Uq|=08 z3O`Gm7f1EYLzwMQ4@UYOHHKoIKL0hR8K)bz?3OD`#3p3*1FN4^rzPHwM^!uBNa&obd({ zjCv#f&{7N3b~FC)Aa3&`3+An;j_;$@5`w?k(je4{?!)A>a}J!C0p^d@M}LTa7hZJG zaSQF>bAF%HXQK1y3&3D-@I!o&nN|>_ug10BOK29{h&I9xrBlZm-16D}8D#yel&I(% z%1~AD?J6ARHZ0btCqede{!;U3W$+*V-c*+zxGflc1qbQMsa*?AwO$X7Hr1HI#^0l*hR|gZA&h(`&>K0VR7KHv*#!L0wo%Rsnsj z8j*>i8g7Ka6wGrX*{_Bi;~a4QjRts730fprG=SaF^_0;tq%WFdM0()JS`$|uH^Ha7;)@d2UnX|c|{*~MiF6ibca$&%wt7|rp1)fv*L4d@HXcn zOPqmF&q<8~2=R__FU$o=*DUD6@!gExYjM!j8-(R)d_2B0Q8<2@^1kmiyry`jI@-04 z{7M<_s!7b^Fi89g4o%CxA(cn*%afs{#`P-%ZjDK|6gsI$WU8;RPq3TuATilNS%;>7pujm)qC$#+BF@k$ zVk+oke9O<__|JQWteBQejZYU_Day8W*C5v;`l87_jR0K1xpr>>7J^KeK^HMf9}8Fz zl3_ADEewbdY=?#{7bDI#z$Z+5x$j5@K&xWy_6MQ3?~rHi>1gyAZVx4kxe%fI3fny( zSDOfxLX#oxUMQhSQERbY7k54pClS}k`ys2_3Gk+55^M^N^o3Sq>=*}A{0f2jh zqw(_R)FwM!Im&Ir&^eX|su&&-BxuowY-DihZEKUJ`2&hKSyiB?vb$-p;#{|B=}2G( z<4R7f4wX1*e?CMR{MF)dU6pMYqEbN=C$T2LGlU&gn!bhcIZ5KL|0_*GyEy!5Kg#)Riej32tuuGrsp zV98ZjCWer@gx0l4v-`z6V3bDhOJ=2~e&%7OXeV|wzl1Q6oHl}$$LxldWwd8N^4FA~ z+468NLU0~Y+$C}Jj{2GqvCwhU8L2@+O*7rWp26tBsSmhb%IG3k-ExkepyUGkKaAz}q>oh7ShLy1umi4&nQ zpv#M&pxZqKB%dyUxTYZ%p2W15Wc0qG2t+8AotuYoZJa5raNLFoZ<~`mJQ|u9c%3!LilPPC6}HPHO8U7Kbx=tX$Vg>JabV1T-{@ zFxYrm`RZ1|WVDYGik=>%$j=0H&0ZL8Km+%M4CEiIM6y7WPr3OChR4CgDMICz^xwd1 zg`S^C2DV_r6(SL?j0NN#4x$hffwkzEp_{5TZS<@5AI8@fvwDb8`fdGimbNPSIg^5; zrUFf0NqyqqFd!icnju%$?6FM}rk;|^yX>_YbAJ}Q;&Uv+NW{(*&*8{nWS$%f?tZ0Jk!>z}B*BI05uD49J()p1>+$6MX~ z%^NAIv}NbZp=D-%p@D{HKzLsTpOR>S_G~N1KiY#O4W=H03lEiPH`7JJ$P5=AJRF(P zF$syC1XD^=P^WZO)+HSr(XH@6r%8~6!Y;&>rh8RjY1+er!SV?ks}&dn;5W%(86naM zkn)1MWXKVq5@mUZWorVsb!tRqJs_sjZwkk11`Qs}43T+SR}EP`w2Y!f#CHcm?~Z>0 zO)A8qKWKn*72A|Qf~oI=el;si?M5$s z!ux}h*ol`s>Pm$j^?qaT746AVs}01z$_Qtw^8vO{>*RhCy1L1?$HqQ1V(B!9y{Fep zVAMzhs1-x;vQTR=9g0BH0>FV78fOjnd@dMD(rh7HF~*KH8D|*bbSShe%@q_zVk6uY zaPQ3Ws2KA3df{}sKqJe3IEKo8OcIN0i_tvvAW0Aw&-28$cTy@$;L7*cSaOjLpy&J* z4E8Hv+lGjB*+6x>9rtT2_}roE46&+DgrVCRrE8r?s!;&qZZ|vO#6LxW7-HCvEN^I? z1!>K+2rk#JX4iW`9pWS5kEccXIobA%93Dv02(iJqjt zmJK7gTbA?HR)kn$@6Ufna(`!DcS;7B^qepNu!#L>3#u2i7$%#(YzR;$>O`v@WAI2A z&#nWw8O#@`W5weErc)2d-t+Ut(j`ikj3zsc6X#>u7@Jh#6v-ja0ImRCLS#H&`jBF>jWFP6fYfY4A+ z=`d;#zMtSWfDr5T@LLP^axbgDQ*A_yzJVq+mq6$Zz){MG!F~vqJKSH2A6u)W`N7)9D&*p zV(i!DMW%}i-mQe@x<}=ZesCxNo**C!+|03U-{<%NFN@XfalVhU7|Hr@DK<{te>yL@ z*ZwFZ5-sEOD1m756|$HB>Vex$cAl z&@D2f{||2rbr)@N98n4#e*hcO97go)(L-ypph-7oB`F)8>Hx@Hr1rknHw5rVZp-tu^%TVUXAFR4+&ydZP1^e z3}fpT?0c1)?L=Kn2Br9Wigt!zyLa4F?NZ`m` zUty@yORQ*y`&Z4PUkz{aQCz05>~D6e6CpK--0CS|1l&R|_YRh1{Eh=cKvRXx{LgZ< z{l-n_2x^4$9(c~_7-hPr>idxmFta`watvhrs=sIfmOi1zm_t^QuS$gP?~O3M-)Cq) z%m&2c&s>(}c&yq}+nRd6sBa>vzd4<2%-JQ|VCdBJ_+&bxEnBk=wy{{JUEE+%;A_}i z62-LQqI;_ne}|@X6%lGaE0Vp4PeCW{V5RvB_-%YUoUvn z?$|gw^}$R(+Td7>U7@345XOVTn6OLda;uy8Fk=@A#u02ej*{=kNN`O9H&6Xw$pb|a zZBqC?k4HqK=-)oE5GCXaSb6m?)%Cp@=ZuYK#6SD@>25hY)!$ynXnPQGv>ff10sa;1 zt!+PMW|R(JQbA_T&f#@=>u5RmkC@gC_4Qr4lu|jvi{;unlHEUWzGNj8KeF7596KH; zV)0!ZXA;~ov3F_B`Tn#K9>Eb}HSToiwpjmc2h!B6(#NEdG>Xk*0({ci!F-utGA&co zbZ)2!)2g0La7W0)ZOM{pS@;Gbe!pjRw5?b~VGD3TDTaUdx;?;DFvsxM*OABP<52G& z`xU}SP%Fst((t>co7|;|W7<{IY8xx&pQut#ivB53QB|Q1GO%K)q25ukV9gLg%%|D-*rl;-FG2ypw zkvp@SSSjMKO8ELx7Ij&nCXIx&daA*?%{S@V3C3`(n%+m~dsSUYD!gV#es9*eMxxDk;?;rd73+cr{IR?TNgMV^d zLJJvPfrL2k?I+OimVqbyFsMt&kG6&c+fB-6x43md(!_~}wMN87V#j_K-TS+#7ZMEb zg0`Dm&8OEtcQZW@n zISM%n8H--L03Jfm+%@k|Jj!7Jj^H>Lx>peFRq;v@JJo5;OWZE>y3KL}BNFB91JJIW z;_pr?i<8ArZQG6WOW$s@`R%&C@>g|3@lQ2gx^^}l><_`CeO^Z(FUgWWncx=O8pTW@0^94hsB z?jqep_UlC?8nnhGavHnu50{CR#~`e3!cO3Et3+xXj>C3@?e|tN<|K)^aJ%5$s zfo?8Dk07vK2OdN-JbzxwB9ubjj) zt{T2T|Eknt!^3=n!ZT3PIweU|`v}=)UER4YKEvEEW!~QCOZ9@p&Wo z-T1i&4Y|{kuJkuCo>$%f=B3|V6Z+IFP;^j)PhUScK?xaS3{CkuHHBY+kMO$h=i429%QNva&? zPK3}6AN*dSq}8(ma|oq~Q07BS`Ljr&e9Il15rpX4=R`=dtXWo*Lvek+^-vZ;eEqmn zgnu!D^4*4sN$0ZNOYHg+9H7ppIPI4Utx6k!_oMhkiGR$zTA?WpyoGwOR?c@6j!ip)vjC%0`--zq5B)A zVT6+;3+TQyw%^Iz%xuoqwfE>RI879nE@-G2O7v{L7p~q4=;%8=ZvKGKv^M@B^sgcm zdlUoxl?ui=pNr*&Oq9TUuv{xG%mo6tupbc~5L6f`OV$Y12;z!1{WtToN(TMEILs7f zVohYf+isZqX&56=d#(DM{kg=!sGE(gZVYS(%_-FFW|{7Pa}WZYR&Nhp)AG z3Qh0^;lW7FnGtPXQ&Btg>4)mvTnu+Tw*$4aLXvRO1riwDfO4P)|KOo};oHd(9vn`@ zF8KG&Pb4ravs+>_U81UppsA4q>F+{%lKT4ZJLQc~K%bq)BncBY3W2j4N)=8St=o;b zqz?pS6drC8(hmiA%6&!o&92TN z~;_3i;8n4A`T1>D(}V|6a-kLxSa@7UH@gW`z!8Wgc%IZGbku zLYl$JX95D}emVM%hnzG@tRzybPFi`V-8SFF;dYhd-tl~}bl?7<;bEy%O-~V6cf48c z)PIn8Xijjl>e#iTn!^U+Jgz4#nC%>m&_ASSB$l-Svk~H1ku`#}83 zq>(6rNFCoLBN9czZ1MFHoBduX7MTZysB0W(qa0&S9s(db0PTOYI9wJE2zHTu2$vb=IinWJkTQUU(^Av~2zv<@)7a*3$4+pXZjf z;pyNk;18q(hNg0f9VTgNE?S6z_E4aeQa^wLxO1ybme_BF*oWA08Yv&W2=yMY1KnyY zKE^7i8VT;watZLhJ~Ac>O7dX1nAInPj3Gx;P+U_N&7yk%l{RD?F}C?i0JUc>DFg_$ z;%pTT0}ZWsf0~S~F54IDEF9IZenu~$glQ9~y?$Zigt$K`J#X4c(iGYv48}L%C2AR^ zHXI}@5W@N+OL0@u6K)kRE!40wf&^2TT6mIM@9cPLa=OsoI>fT-c*0OGV|~QqdXNGl zfGsQaiYCD7CC@@}+lGwo$Hsns-8>iDO)&&Ez0RU0hwc)OaJoe^HZ2LOhe;9Y z{;blARbDnkj>Xo+c%JmOQoAl&s1>`XLhucIW0TfNfKmX%g)CngbSAF4u)~>3w+%b% zXdU_r9fG3w)_l08bF8o65}#J+>%`0s%vszHF_-^%0F2+Yg)>ivI`0!oRHXs}oO%a?N(Qp?7o@X01@A z5qOMQgszT^OeHVsHYTdubz}U3bZftnKwY}|^QvxTn!V04HA+{cW*1TB@rdR2q#p~g zq13&cN>8F=BeQ0rx`Zlu=t{F1lI}DtA=U5RHTN$4$@n*TpsNt{wro`g|9IWk2H$u(^A=zjfVCQnl!Ci=H5d$OXg2p=_qY|snuax*NPq`eTe(fLX{hD&3m$Xqq&M@}M3bmF>V z?xgJ?i0ppwt#n<0IX>==X#tNywhC-3({Vju8SiF-b!V@xCp= z13prEA90w!NghlMlL|;!)Jt=`ioB9FW%d?;91}ZvQhsy;8PA#px(r3laiR0WM~1`M znO6;1X~y{0bIs_PQm^}IXbv%ep8=f14gOSF$CPl4433}F7J8|AMhZqt^p^aw#0Y6l z-@pQis(v$9#Vkt26x9R`?@qRy#bB#-$$<17++WXCBJKJyXBdaR_(D3K4BEM0>2W%x z7X)>A>Mf`XAEe|H)$auQ4kWr94aE}UTIp@-GvP|;NoBevmv6jUyFH(a3#VKh5Kj8- z{0YV{GkD97<}%Hq=VYoXw$S}GkDT~S+qxvu{HWv62b2Ws@eO||D~uE^Ku}x~;WeiE z_8JDbIDryR3^+>HBm_Ca%B7n%=X98J9-XadAfuX?YbR!MS#4~3W;ey}9I+cEeHZHY zl~W~R|8aS6BCGwS1m+#dLp8+qYe6PJZ@u=i0})Ih++4704XPgnri>9f z#9i1-Ul|-1vwmXGUw_@Y>b!v%*c6?=AqXC`zd%1b-9cwES`Q=v6{hOHg-27W4ogYQ z({Z%ii@iAczhuxeuXc96&QHl=QNHi~AkOx-S!|Vms-1+mX|&NDY27_Y=6c^c37vviY)_$%3`^J#g<4nztEg!^eHq4M_;pFhiO>ObbXK$oT8}gRVJwjS zA!*?K)NNrfJy$vx+$Bk;zSB9U;GCl?pfk#tVoJa%d3{`@!aw;!i>1RL*>fgNO;pS8 ze|7>oTlq#0a%UO|G8vCGgia?#q=r8SV7L|4La*gBt`KnidY=ir#%1EarC@wAZqsbM zm!BFQbqu7@{Ui!NmtpG()xnv2(Xe}q;p2D+C;kdED#vYgjk(HACsd;gYD>ygWfdk9 zQS1*)fzSMN>3RVs=Tm~^>+~xrWN??$0ISyW21<*rmoPw98;sSVUfFDS#ZGT2C{W!w*wJF8d}rk;!t zm-|lv{V!d4>jx?)dY87eF{&6%0wtB58)^1`oz~NA5h231$j@H4UZY5KUx+|$Eq)E3HFz>yaOkGP`N;qOPKdfAKWI&cP* z0G}&`eZQY-ZCjPqLH9-49lSfX#OJ6rNs{NAyf6f=ITBD$)*JScr+8^O7;97CZW%rH zVjsRK?!zf8dmiOtacZr$Py}&-!FEQL9<=p+7l+I^Fx80QgPK77Fzhz0Ce)@@qh^~S zNqhpj3_Y>YmR|j+`ijEKl97{;k(el-UMz9Rvyz8SN};>cZJmD?F2BE=YX3lggvt9k z&gFn&)tKoOCr`Hl<@zkE7{%R?S*PMYjaC$M?mG5>e-T*ZP^ptC`1SqD-^;!i!?vw@ zXCB?v$EDWad_moRtMjwm)ofK{BmJ_|Hs8PAy6ieICS7T}`Os=AP{9>wMji;wa_J6k zlG)KSQZcKYm2IrH`D*?7?(gH;J|2V2Q`Jp_rOO3_7aG`xR^}^00;XQ}@0*hjg4=AR zB&XD~i->M%xr%8J2-)Mj{(&PQ?jc#X0o*mqBddK2fb8%0XJcz5F%Kt=;8mf0BZk&0?U4xh%XvZe`SIg0w?gBKut>5q>Kk& z3{zEDitHz>`T_AHVDI>O>sgHk(v73*e+L!I6RV+nw|lpmL7hg8W@+{>3y~J_wXA$O z=+3!!vjKd-d)t_=uhQas@6B%h2LTTk3|aaZPxpwf^S`Z`IKTm=WnzvW;_QFWSG4Ao z>}7CEd^tOaL^1T^hChL{Zr6#cd0OhM$?%mFXw%uBg;5_R1rco367+L^N5N1ESc3qA zhNb(4;TDOO7bqXB1rrdm)E3~nlO@nAo4)Xq{+>fXU@;!xNa!oo_Mx$q%@n3Luo8P+ ze123`SQ-4NyPaU^OFw1W&z{>3`(aGGbgwKX2JBIJ1f5m#R&Lp4TZbx8a&^B^v*yr@VP{Fw#;Aas=~%OSdp%hQ~G} ztaQc^0;t{5Mb<$cM$i6j6sZ@frf;^JpgiZWfw%!I;r=avztQxk>Q7a^I6g#Sk7RNY z)NFL)=7zq@Q}1}RG>;n!3iNz3y%*GPk$#ntxHWj-^;cb5QjWBxhRW+EkNd;5vmig$ zuP%<&_@)m^cZco9-k1+n-XqxAz9@5rNXw#_P%jCCs5Fb4Q-^Hr33eJECYkcrpkhZpp!|n$>(3q0ztwNmWAuU^ zgLa6p-4Dt#1uYEtTO%cR!(&hyr*#3AF0<$b`?I;K+VwAd?O885=H!#$NdNxa?M6d) zr&&VDj@WmnDW)l>9N68~&Zc104Dk>^BFxML{B<|xyl^`r(kv1YrXs6xea_F`bVMC^ zrji!T!GXXUFHH#`V~MiD06wO15}pt`usX1-Tjg8jYN=KPz{gxc_!~C|OvCyDPy2%I z<^-$5R_BLtxSYo3(l#O`>O1q?udclX1I4OOb)je5r=Fh z>V`ve*i5q(bIxSRXD#vvI&-)*Thou-ie;}XM}@aIx|Vbg|rCwmn_E;C2}c9Dks?yXzUNFRC?7B z$;N^pJ`fCbLnVrumHzHfmtIb;tVnMT8m@1VUr`Us6WbG;3c)JWYMOqZNd2VOlRjr; zqzKIUKSsHo;+`L}8gsDX2Pv)+oHb`Rlr55 zdL_+Mp2t~rO?NajsK=bzUy`xl-^t=PO$4qF63efTD?N~gmrhuS&q);I{>)WzvJtq%Nj&LN^ zQoetBZ;Yj2Z4|&PMJa1kMG?jFxX)`zX9@;P%{#`p>U%GXOPo;joVXBJhF7SJ}!^0SLJ#+}n4 zCGhv8(L$>4Ug z(*Lpb)=^Q0UDr6m00W4WfOJVnOLvE~(#;@9cQ*`3gGiT%v`BZu08-N34bt8H-Q)AV zzvug{_5I~qt{LXOuQ=Da_St9eF9l3Z=VudYdu^vUG>q$Vg8XX{5tE*(g|VE2lH*C5 zdH3E`1w)>%IZu*m2SmJWNnycSnBT#>+PGicT=lr0K4QGYscpME1(X72K; z7gy*}{yW{W-&>NVqY{?~dfqQCe&%EocD1bCs#>w8xHqF}{qf(Gn z{BZhfOFJLC-e11I_&mefTrg6$33mXoH{QESh^-X0&FyMoT!Yp2?rn_BjlqGV>{~uZ zkGq3fHR8u$Gr_%Lmq_39(IJPV4vgx1M-0j>u%x&otdB--hwsob#vPX?Yy#RwE)nE`Q%tlJ;oA6b2Xkw@m zsUXi{0Lr+^a)p=2P}}WpBU!q;P{SX#Qsc6g#%{a?G-z=AJqSuy+R%;is)%>)%?XK2 zltr2Max3+)5RKF2EKjA`)#1V20&nf2L(pB~5IvRY$%SD-%#90UtW4?o$sljtYH*mA zuG}`pXFZY41bOcw`*n{sY_{fdVY3c2CJF`7o>Wns?y)#B_m?}PIl}G&ud|BA;*2|` ztfY^eUE2jkD$>zDsw#x3id{nvjplOn>&dgd6GSEd#4z3X{!OI@EOA@X4spcrt-FjF z`o`|2Wk8&i;E#D*5lv(P83?E@TZS#oM%iYso#s>&@N3aH(ptYe zHlf7U+NCQ`*8gZ^XYiKNFi^-6Ej&dE!myQTPqU^d;<-gnQXv&Slo*mA)4Cci4?Dou zJl4adl0{ooJJ{ScEC%V&GY;p(Lr)G22y%6)-`3hfmM6k&z0n=q*H|zImPF0TtuHE6&Wqampvo_Z!hVE z_uqsHWp&fCbMlDKGd3oMCaib}q&~2Xa33b4_M59o%u|x-)BFOL*mjL^P!sAJCEDmYn zq7oyKZ+3r2SkZy@7%fKaxtoLmoH;In`|n%nmyUGID%gVBgAV#pS#Ulj4>|VqH=OEN zK>9Ly`xCWpGs}P$xLDCg3$}UVFtg{Fq)~YB$cBKYP!@CbS1@OxgK#CEN#)6`fLQ=3%J!?5f8`?54NiL-*^g9txJ_%~pNii_0F&D_lUg2zI)iGfwt8 zDQT;7%o|pV=ZJc85o0Z+?=R_fw5UKhD+$p*|!B)L_9|x^wL^S-hE(G2j3bnN%m> zAxKPaT(e?gUM0MLuS&mV%V_L2E0Ce#RIfPitg=@Z1w*tm#vvT^?GpY={F@uM?U`s{ z^L6$`*Y4@&cxOj@#3V8IAD-7)+2go@84j2$NOmre1qZ3bw9hb_@g-GS5ZUeuaeA#M0P0aOoq&*Dx*NE8HWKw@2fy&0#-LV;T>ZHi_tm7rH~a z-e0C|VLNM$THGYHb7+_H?qJ5R^-4R%l3I?{(G~udT+!y&JmlKJ2mbfQsnxQp{WMQ5 z95nbFfQ!;?=}t#w!z;()ttnyhzw(kx9mVbqBts*9kZKlw5z7>zW>6rZR!0897|}*0 zDL_oin~SUa&H&8bRq^!bN=Cd%X{Lm$jV1ktak?k}T6=(-^^Jz#&(!|jwrh5(ZKqp- zC-yuqWH}}9G3809TzIf$RTlwXsxeBe--A@`^3q)9$=WAzLT3{@D;7uNbxx0CaY4** zgG?3A6X|q4tw@3Fq?;+dq=#}utIEFYjA$~aZ;9HY^07Plc}3G{aw$FBtQz)0elJ1d}XlrE$C;db|7>POGVmPyOhbpY6*$}IjE!{k~P5ThrC z@hzDMQ7_`6`7Gl3tM`qx8Al0FWGd_pJgs?DA@Y7?Nc5oGwAEVH`)%N_RYP|qqe>Paol zL#|}#!c52k(jx07QYs$PHH2JuorD(Q@E`)-=Od!`2lfjVG*{Qbo_NM%QpHb`Sc)@R zDBj4j)QN%Ea}fX9^QR5Btr_`lsjE5&&?TiU&qv~;sI-#7ezF5%6pF%YV$pxx>fpY!{IX(b#g<~U<$c~2xHB8C ztX}-DLvR4~dkRE(2&VFk5%n)9rg7MX3Yrb`%=zno7R~=G9mAjjxY|f*mGTbb^(-0c7UMuZp(98Jz3sF~{0oZlMKK-9y`Dy?IJ9uU(AOZ@kl?iGMsQpt3V64^bE$Ir`sk2T*^dgO5+6-Lu$#-_BI# zCcC;jYU}T3YXeGnrx8BatDzl_t-g1EIQp4s;0oe4O$W8MJ!U8U>GrK}IzzFNr3rMT zru3Z$nKj?m2z1#xGoJU+X9CI(tqf(Yx4#tG+d&+|LT7+N2Mg_DqiZ~12%)a2@7Oi5 zn^6}<;d3m0bK1|=X%#da@C?bc1C74eizo)r-ww+XgseyNCVdz74iSC2UdL*<-7WgG z(HqOLS6mgs2RJcIoDT9X_X1L*^jC}SM%RlOdI!!q(L8a6EnXV)i{}T@t!%!RsNY-M z7aT+H&&O3Nf8#0u@K+ftT=mtaZqdz{gdk6pt3}eR9`P5HY?<#X+a#=euP_CRQVv`^ z{)4v@<rfUi1es^JYI^wdpWXtK)(ePUv+ypu#M{)bhHxBIkN=64C4U15DrX7-Qgp`3 zku0?-LJ#NTt2c11Saz7p)k(@*6TludGdaV3Nk`=A{;V>t>a1$}DzEJExXk}S0WQd$ zYL;f%Va|57h={>VdADDsqGv*oC0cC|g^u-pDUoEG;hBq&^Nkwg&fekRVJ%=VXwK!pKhV5n#+U0|-BN z9!%>yD~+^xU%{F!f`Cyk8LyBfb5(~_D`wQ1+jat?yf}tsocJH(yU~Bfx1T_>Hg_*)?V8+?lb=y~REv z^=xoJNT`|%8kRX!`Ku2dEj1f)3Yr417u+6iwoo|%5NL)TlmEbBp%5^Hdu8hmsa__l z1)lTpFOPlOFs2;lFz-fwr{iHLRc*U9z~c8aJLR16?MocsQp0{ulJtlBruiTqSbz=j zDk7d@^U3J-j{a%w^7DUXE>JVCt2GSUT!LX=7px)1!WIYLP|7%zj=Y$*nhK9gW2R{2 zaUAMtM`1kUpJ4`(o(zp;86pK~R*d|YGbWowWvy{-PY*{~wHF;2Yzg7?-|NRK=)XAx zo*ym>P9V6>rDkC&`qLb8N@`eZYwTJ&GdcsBCZT3X77(UX@BsI;zVVY-CaFKj}y&gk0Bn+$?!4%Ad%;{Cg z**Xjf>-o{(>4DDe!J+I`YF^{bPld5ZSQIMp#ZlW+tE4OY@*Lm-AYd^QIYJ?zpZmBS zAa;mWobCiLR}2ivu*WD2s8;}x0{J6ieB}%Ze4?XXy``?k5AJ|33)hpECDVwq;@?#AM$7r^K2ix2#tDE&A$`Pef-Fws^qrd*fWywWA0IN z=><{HJxvN~0){V<;73J4#y(x^5_~%pll`a+_Fy545z4D7fZTixEFnf21+BAsdBKV* zQ&_}o2e4gPWg?#_T$>Wl>}ww#&3PlsJ3yqHsUX8m_d2_$6{AU) z09;ZvB#?D^1ZeuB*T|xhIs_cA{Qczh8%H)QRR9C+?7R2jn|b>--=9=}^M)|$)W3+_ zZad&ar8*^+)Xp)!uH4rGa-6sa!@0j~E$4AjWP+OIjwN}csTSGcra&Os{+4Qoh6Ag0 zP0}$#>m$XekVhst)0KL8r$Zo@-QIPX(xtC_d$G6Gs`+60)qDW^^_LJ*r$5wL!34VR zzVQEj1(ekvX%hN6-h$Oi?r_)og`YeAHOwPt1_e(DSWez?*!c(CbG$=Um|}S~URo>O zuE;s~dR&P&BdB)Q92hfhwZ%~f_?TsMv%Z0D`Xp<0>QQYW3m~TdjwZl14Teb^Ymfe+ zz(Bf=W*$b)xc01z2i? zaYyz=J^Ww>R3oW~GJG#*kpl-Kr^3glUXTfJ>6?b2bk2NjSY&z9DH(i*VoS2tG}x)A zw@m@ceB)yZs888yy=d*u7iylK<4N*0p1T321q0Rj>2CSSG?dH@ySg%OZe~pF7<=__ z_3_~@^fwU}sv{`+Ek=hXfQ23}L4Q+YvlF;YHL~f@4pYC+uRCdlAy$9Lk*T>T)NEyo zKSv#?PqCFq|I$Nv+;TpqP{dO<{P?~G8$KHFj0y9TJ_x9~67pMCktqv=1;EUVv7hyM zuRu{dM8QnsH{dk+EyS?NL6Klf=_0B80IhLUd4_bN%WX zxwGm)tCaI|@gArL2|-1lcc5dc0bZ>KS8tsViJ`pf zHBuF{tnE?YwTFem9{zyQtK5=`ZsOIOq1xK1n|MWz@K;Bawj!Ub$@E(7CpEP27d(Uk z2cuP1MRdxlyw>=|hiwr9^81ixav&4QiWKGw4r}7oduUahqcHpCDARD#O|p5rS4KE@ z9jCyGGlkNNqAazAzYn@; z+#COVDNm|1F`m-Y-}U>yie3r5{sH~;fsn^oS#6+i#`^@(?Xqc;h%uH$R!Rah)7qJ9 zI$nuKXZr}>_M1EhsC?Dz_8Ra|d2wf5cr+x>kN5g>;k9b}P;8g_xieSsCexTgjO%j6 zn;w7W9w(IvCJ!0dZ_x74HK2-~&ELh-!jyGr5uSPwAZF6IE_I|3DK+s3(}kYSer2g! z;HZA0@3F$rCObUjwDWh^+A}}H=@o{$stt*Ni~Eai2II*? zN@%}o5GWXj>M}~Lf0Pzb?aMIOsmNP?(=^8Ayz@41xxqh$49d=lAH7&q70D)CI(CJp z%ttlB)71WTDcOxhJnk`ozxPwJ!4#TZ0gh}Fv2^xjD1+x-#SZoRSBU8eLXNGtGaea( zENk*}D1)_7S6$Lxb8cgN0j3|0%T-HWd%+)?i&c(UUgVHSC}5&93?M_En^YL%$^XK22LEmZ(rMU6ETM4ci!rT|L!8_k@L|U*5FZ z`O!r+|0_t^TZa{xLp!+EEw89|c31)bPOw~aCk!Dn?FcRj*#Q*8uVGG^YksB8 z8BB}G8(nUtPXMs=Vvy_`hEO*2Lyu-NTl{4x5ak3X%4;UUC4@%i;%s6t1B6FK5)K+hcZnQHH2!~=iTeW*S58FjG0&%ja^84*)+n<7%2S2dqcHWQL8MZ2_h@j#5-o zs|)G1^k~&lB1=K_fpEPv&cN(@5#{!F#(mj0h|tKVqFFxCpOuEqyC$As7PoH2c+h@c1^L z!uW}|iU_LXf&Yrdw!W;$TadYYBQrJb#Q?05#mGIzsqKvPr>haM_W&#f^Yf>(X7&x< zABJiw!ma?dNb7T+M*~~m8`Ri(pL5hIWlDCDAf$DC>R0R#F-Wrn$P8-eL6H{hO_hJI zP$XK7|Nb=ZSxi^l*!i9ZD8yx5A)enQd(9|*IyY0!K8>qQ&heA2j^1sGOSl~4D0aP^ zT^y9I{bM}%ngTM`PDe|iNlH@3D}$c~NNEv4B7;j9nc_~=`qm0n@5Xhvk`5)SK6{f( z(n;Dfq^Wbs@Qu7jLibaR91-g!ba{4Poy}C0O3JqHU5hN7sR;4KP<(|!nL{weD z{1co*ZUX;`a@_fiYGiMBu3jm~`jR+Flo@^r^CRTsNLL}{V8zgllVibo!8`BDVC^mr zeI@I2;{>?&L(%t|OYW80G7XJ+8 z716(8!q1lA*HX@#?{7!O6_N~I%(_diwG z@f-qoz>yp3x_Hn9Jnb#Fz8-4UWG!pA;A<^Rs8WPz9%ooD3G@J++tA4X&G5xBc0dvu zZlnT>p#>YzbFz(#;L+JQZjORNn}8UM>u@a9yHvHoTZMqrC=Y*W2!rClxFjl!-UJ4lUFEy|2* z606YgG}Xv45d^?NKNu0Oo!vw-ipWjf*cW3Oz!;w$u71OCtDwMbl%MP1&2;Y6ABC$o z0Q_R~FFoE-l!r`yru6Fp;2WR)h19R=3+I%@E>na#k}{(B(7#l$fu0}YK^>y|tdsls zp0-)EOo*G{jiuhCyu~Cz{ zZ9N+yW)A-`Un>9vayz8FusO+te!@ux+@-_z-u^F?BW6tlKc2xVs+h%b1=qZRDwsB^ zTKXqt62t1Ce@GV`P-~_H&V#*96wQLNbyqzj;u*WK5(h)@8r3gz_)7t!2gjWLL0zLi z=wgL#DV3aBM24l2fMZeFm$Lj9^kSAHE%`s61JE@^P+UdpbI4kCz8rVCl>~O%+qdiQ z5lJXSh1LFxiDmyw9Bd%*xY|SRrhLSoy{g$TnS;QSFe`u6|2|tOHvBZs9S=o=(sUX? z^X~+y5ui!}0hF@bRNG-ikN?+4|A05X!vw}s6T}KpYMoFE&5P2oCxYV0|37ez0kIXE z0R)wHPF8op(ei;OpcI1tzh{DvRwSCZ>H$;v4t42vMo-3SKZ>Rjm^Ayg@V~HM4g8rt z-3k;wFd-D8iI>=^dS|NOA)1ftH%a~95M)V|*j2NUP|=hxwl)CzE#py^i&U>HahC%? z_i;g-AV}nKhIv=PjwqpNue7ei_jbbDRK91?BM0oy$Px_W=&zpG-0dJl z{7)vRfCQ+bod{;A2%6gaL9~p7ACb9t2ks8>Qr!(>pCOGgIRd3Oz(sMRvR{h9eoStU z&AR16>U~4=)%V^^|G74x3M6tjfU8|gRW-y|30TZUvawcfXJ3}N+c&Xw`%|u}(B^=d z>-1%qG;?&zS{YA!D5$-UY*7ukUrZWg;6Pvre@%Wlhc`;&Y!gEl=3x>Z>PYjhk_Dbp zbll~ZhUI0=PgkO`I|WyXdYeA)KLIFPFNwW&Nh(c7V$5N^Ys4GooeV2Rlh-1~cOtT8 zi%BXXI1LYOvGK~d;=brvExU|$TJSAQbG%tq#sj^OIU@cNoI!PE3OB+-FtX62K@Q2)QAoA<*kZf?cbZWw*p-#{U4`VxytqR z6l$Ik&k&heD8_%3cj#xKo>?UKvZ&^$C|Y0e0?p5!4fFc#I6YYuZdqNVG*co#4Xg^A zCp7oqtJ#(E>s?(G8>UUa<_?k0+NEK$bk56iTikrbppzi!SeBndYFaiJi z^SYYW;frZP{-fr(p6t9u3z}FBtrFjFhZ-Q_aF~5tcogXyK+iXtS)(7=lh6blhTqhz z6uGi!Huv8{P^LmjO0KzkS5p=1o~}(y6v6VVvtHJ*fG_W?zrP+&QRQBu?_~u>V+bam zv4(rY58o~ygbwz;*PGrKb<`uIpyPAsVyzVYzA#mivJYRLgbe-}WH&MOf1`MtFa%Xw28ZmxALr>x9I-F5|tKE8za7ONgjtsFS*p z^gU-8A1Xr$v?+3p1AP!$H1gvd@PIE4k0iHTR2JBOgPFPuFpIHly;u?4Ks>f=ph?&( zA#KyEMd^{=wecFm_S%cay<(Fo69-$u7&@Re_m5Rn4H+Y0Z?h-gD^f;`#I~Imh%yaq5;K zdC*h7LVAX2mL|869chCAYNoJhZKx4$etNt;oyvLnY#F?b$hsV6#jj$gKCcvGwEcmiD#G8q^ zcm^m4d_K{!W{HK1I$y7T>DghErc?s?U5f*nwm@N>t~(8wi`V~7oiyy~1kC)N7h4KQ zdlmAt!@ow!!M~_DMy1@+DE!N;OW$yKxzx}I0*=Ujv4TfH!+GGYU6n(->(wH)iVzjY z$?qx2XNKtH-o?&-z*aquMwrv`d0B)C$PB~ZWE_44yZ9s_roaG=o~8f_9eU{gT;+pY z6b!bzeRNfH&%Ww#RV@1(HP&5TJw8Zl>Q`L8-8b;_EUNH5|KQC$rwu++Q*rezCC>u? zmPSBS&^8$`P>0X#d-%-$LaDuZ7%c!6CI7oKoWdLw>=`o;%AcCt`8VP97YB7yte4Y9 zMP#Wqck)cLVGt}4i$z;;n-i7D5Y2zj3ev18YyB{J{}QT52|r#jx{|1rTM?N0dXiCU zU-z&155}QPWi1xO^tN0@Gz0B2FzA$f{`}V6cq$G=7DGoSvZb%yG(?ASHw+_j&Y_&kPGf7A?+ zAuvBnArP~3tw>f$(Bov=ZxFpxBy^}T+uPHqLqP1)krfvM8bI2psJ@G!;+|N?<#1M+ zNxVm21f-Q0oFaQ`rXxtbEOx%1w+fC$sOUD+9FsUBadj|8h>?QWDPUdmaSGl1dcz-p z-5f>p15}#})s;umc3PBcyQMB?3HB#Y0?P{`x)o{}&;m3XKp6Z(BmLr$1%$#wY~!Ry zT3f$6o8#NavY+?u+=@$?qO(00VhYxPU|Da18c(Asiwckn5C;U{&YVydq44O~t6|!J z?jNruK~D)fClc>VcBOWRc@#IoXrx%%n&&?3*ccfj?-8xuY)Lzu)XbXk+!B6(bw?`{ zC=S+WgMUCL!TMcRYAwXlKR`eUalQRm8YW+En_31$NfxA6b6T$z48bAd zKF~E&gnZy5)u@-GEdexENCRf$xkjR~2X%z)Py2bv5$xFSKY)Hw!B&6>#_i(LME%tp zlmPo;-;tEodbwchi=%DCIx1xJ{b`>RI@k!r${Roupy`M;P=THfco#}W&y z&dWtDQ-HA&@&f;6r@(CDUHHj;{xE&g6A8GU zI`8J@nPENKH$t0i-p-yaN}qZMjB+#O50INsRO`ExWX(Puy4G@<%p}Tk3y5cfGPj1G zF7YtJ(_`JFMMXI}rWPmQlv8}^I>8W&gjEe9q4oyc&27{yZ9(P!($=DVuS=qX3|pw* zL;j70es8?I5D2LH89kb=8%QG!|M`VAQ%XrWv2R;9E3NSv+BIxe1+RgB5gdw}Qd6#Q zYqYSen^)dQ)JiiDWNFg@=1t=aYL->D$%u$4E{0}LB7UF)DB|R*-d)wPFK`;X)6b7T(S|pcq!6ymYYG#ZW(AKGA{{JN z6@ZOnHQ-^X9KHZ7b-Ae+p3!t@WrAvH=O~BiKzR>{f$To@8QIIWb^9`ZItaIwc*Eh6 z^(Z7^q`uWP>$GB>KvB#v){hMlk$FYe$1xU-p1EUkn;V|6;#W*n?PdOVhc z5B)|BEFhoG8){?z1Jv+e2a)%6FQJ3+z+8KwV&qWumchf1yJC+e*-~T1je`X19Y{GZ;61)nq z_APok?x3{yJ6NI{Okv`}z99?-Gf5mQF@ym^;O{Tt`sy{)#lmc@o;nFrK*khII2SDD zS^uKML=e$~;`&VGLTuzqn4cO5JrX!+nLncr8mOM+Nh|8cG=D!b!5+mW*4{UY74oJk z*BZHb^1BNs^)Buu_ty3=&WPzfOsbi}C3@QtHw&DAL|nadec6Q%`&Ii>de&1o&jwJB zF@>h}$?Eplfbup2DZEfG(i@LEbL~4HdYL$eAq(+;(AXH2m0t2u+t@1`L{5Whl6UsX z><|Z`Mgom@VB%R>@^B|Jo-dz_=eh{>1a+u=vXr`m+Ii-~05&bp?$4TXu!Cq~YOdafF_1z(MR+}2%p36Fe8CUB!&9;GO_ zhiAaCsg0u4?ij zMv_qE=QO!i{m6bx%-Jn4y(JxKO<(Ox|BdqWdVLf+yi0_A;r}$xnrjRt9IZHId@kWO z)6RPCwf|Odgi?`3cUDcMChelxP;Y5(v8fI$j=|bAVD&&~ICEK}XNSXo4Z%Uw0?O`7 zWvY8+K&qTBkR?N^t%r1ka>ukm4v{h&+=o==$}oC5!L1DQLGAM3O}+uo|K$$|Uu@@#qE^u(?{eCfBavA9 zwi?!7f4GgYvGX_1>ct`)^1Kf%AHIlO|EMvi(?77O3jgNgbQjptTxYDghXpO6ef~lx zp>9~D=F6Bb{2l@93~_)ZIlr(}Ks-ZyZSvIEO`JFgp5Yx9(iGI8iCj36FabJ`sO_7n zI2<5NA*abkQd4@vGA9!8spjI5etRWTD0mWnL;O?Ckh=rv?|uTqT1ktICZYNYuSgA@ zW(p`|Ik``HX5<^LWJ*Wc=U0X1e>DlF%D2PNJQn8?xZZ)OSKol zS*T4!52C7Gxz^Hz=UyGu@7=bxjsyUt2LP1}?}|`rUo2<`V_CX(DAZ08Bm+?v69fw^ zm1nwWyHnoiCLxpPx_fQWH7XnFbDcFQRTW^n9$liML*OgEtC39w3;227`VnwTqraMC z(iQQlhA+YwPzh{pipcs2_{&^mL0NK1SWuY`tG^M6$BsZo~^k(6D zBGbQn7|D83vF0aH$+I}&W$v*>M#3KWXE0mkM zI-+@&tnhK8`k(v5PoR4IeMb@r!&4gF5~)Af7l;EakEwT{rQM|k-97b;$GmZAK{Ch<~TMb{|?mS ze|bKdIM1yG0DaMv2a`z+a76Y3yu9uXCh_IgS&Mp^2}IHQ&-cG?MB6dt3w-+f&5f1} ztP+jXgxl!9YlD&j^FgZ9KTJu#0d4o*0nW(%YBiCEjVb3vxfm!6aQq$Q^ujE`kPI0N$AociS{L?@-6vjr`u6o(;Rr-PbJ_XOP#H z&4^}yNH-%cGFV3*pdi*E$SnVrwjRM~VWJ{xUsw`be4J*G zYNx;?lS&&FT8XJmduMcStO-c;OgSSXJ)`zJXliD%k zhm}n}He)Sxg;J6bX`K&A?3O8Vj+=rv%K2}1{NktB-^2%&kogHy_^NNj%I>=y@h;E& zTBF18mdW?INq3YHG&ls~{Z1r2IU9~-t_*uPG#sJuw=r7!%Gb+8%=hiSthw@`=x?!b zJf(E;#Js&;^f{Tkwo)2RCAMPkn>1Z|xf65Bc8b?wwY@Zt9uD=V@+8K)C3Q9dp#8|Y zC0zg^BU7DyBIT3;E;aKy_N@&D4I<-;o8|=TSDeGz^DUr|EwP2G0#iVQwE*Ky96{OD*oUw+cfY#R__e^2vK6EKLS z3lukc>3PVH(_4r~fayd5Z?1mzcEz;3L(HKQ^Pr`AN>`KQXlPUgh`_xhh&S}*G^U@6 zt;b%)({a8j-Dei;*P;^-140kNs(x?o89Luiemy1cOj+0YLjuENcSgGu1wxoTz zZI-UnmVv=Kld@ZZ_|SSM1!ysD95%s|Z&Lsz4BP%}G)vux-q`b;5{qBzv!yAQJ%@qxlM;ayHI(;&3lqMt=NY~gpp6;R+U90#11viz=_q$KrUx1R?x zXjmAJ&ft{DpOh_joa5OU*#nswWYN@~Vt-5cYG_Hhc38BC;gq6=;fO;Zz)wuKe9;uED3C z!@7|$kaxNIx9dsQf&5Ln9d0{MVz5-z;M9Fm`yj6JVX9Zx1Og^=ZZILI?wU_}Wn{t{ zRy#zOR8X$EBEtXS0m~lw95sJnSY%!~&tw-dR%nY#AVC3@soxerf?ZWLAtn>066Nc83|8m+(cFaE?@vD10<8`mAAgF7J z+nrdL`8l@6bKKtIXSxs+S}h}6LRzyD%t9<(zX2lc zui>dkksV1o-60;`UjV@7J%}S(%ord`u5?MKp8=RZmN0O^%Cd#IFr`uNpN%M=WSe`5 zumPrK7leP%?KJnQz9Ms?*2RhLO=n&6<_D)}{D@hfTm=lqWTGpjoPnFXGGF_nvMOHK z@mDH090D^6=HEf}66a^s4|ns(Mk$!aMUjGn)m88e`l}B7v?E&uI#b??zXwh{K*3MR zs$+GojEY#>u>P7_nkhIW<_#dEGvwoI`Q>s4_BR z$UK&m?hUqH7<1bAUTKwXFGEddKEB{5v$yPZ?KB8g&ph!7T35e62cSu}8RM=h&SC3$ zixzw+2b^u=i_Jh={;4HrD03xg;&W#zcKtKC3~5=>=k9UKg!{e+44gp(+Ym&jp=WZNlOdHLUmfNc~J^vnq$%Mp*`uz^| z0O|KLiDp14tH}DzV$73vvw*&1@T9in2=}ed8;M&3oL&$-FZ<#mfN7|&P6aEBFQ`xK zzxD(U%P+9}UjP#XdsW|2F1RT_=tZTQ15ppJ>ct2{6$Is}`r$5e!QVf3TuoT}LUSq9O_T9kF&Lcr1E~_SIS~iUJd+lm(BcAAJ}#KxybcgUV4nDRqL$;+K-bo> z%`di5F=_UJ5Ti9^5de_j5Fxy{27a%v{*UiuBCy+zGGz%_40!El%lNHN!Ts*wpnALI z%FJEECHPm*lGM97rm|{c^LNa@TXeFoc?qBR4!*eIyX)Jn5|$29I(*gCug77A{y-4+ z?(P#d+JevwD0U|s&52W)vBQ_#h|9kOD-~PphbondlKt8_(~2b09(Z5sqvrS>f#$n) zp$yA0X@ZjsZ@05f^4MU04_z3`E64S8stAAQ6P2;Q@k(Zdep#{7X8_@cr;+eVb>eH8 z-0tYUGgZZNzqc8b;6Qk=7Bnlsek7=30o_*tXq@{t90JZDwz|S@)R}} zO2sw%AfF?3Quhu|=m*jDosIc|U4~m4pei)`oOSg~*LpL!00ohIze3g}*`(%l5`G=R zCi>4y#LvMPq0tNObILz3<8ZYLpsokANuF_R)A}?VPPvmQ*scYMpT?D>r?fj+$1GpI zi0~I28-%sBoNp{2D+jzmFP+mhU=MG9Vj(B~ca^jRzYKxFSNm)JWhT!Ey;#!kaHQPe z&I|B-5j=Pes1ox_aR)Q5U*VCou6C;RHZv|l=s-lLNJKzjsF9Nr zSAYH)yWjkK>N13)4}05#huBsB80UHL@xKT&@)Fd@o2MQVEP#gy+#ZvYdveoA z&-U8cE(TtO(ccqP-x}9qcC;}y+jBE*`u7A+d!*Xd&PGu?wqyI(MGqI|r~Ro1^N0jS z`&`bN)0NNWthzt4*nYbh2DrPnZ6^MfuOBOzbXKKq`|r>6*g*Jh1r)Csb zRUZ3MoI~$&!c0AJ_n}^^;ZPXEzt!siQb!UbcY>EouY6LmJ%Fi}h+q)MoM>&e2cJn3 z8ki2)VkN3L3aX1Qd2W%J$a|zY0t`PNfIN)<<((%UPrC&q6;VkIFmV_57`E_9{|@15 zpMMqSn0aE-nCpndou?B}J)x@5H!4f9SPOL3YS~Jfn5|#El<4ONxatk(JL3hmMr&-* z9iooXJ>~)Z3rAdlOe%0)^M^ zXuO<{ighrQ6(g)Ai-uv--TB_(?&ujQJRM$J=V4)YzORrwijZ6-)ivB-=@rR_Fexs{ zGq2-zR0S-b-P<^s)uW=;6s(osnUs{62`sfU)W6&2d?#SP)Y;G#>tnjVl=wv*!b8!- zzXx?I60w1_;QpLQWA-Mx8-pn`cW=gH0kXn>p+F&JRMsx`GzEg_tQja6Z}QEMqUJzqY;i)l93k8S(wy;sA6c^vhECzD)B1nNdMX z?zW%hY`UBAEqJG=LdbkXfS)-HalGROdKO5!93#?9IMb1jovoAb(8Ckx zxtJsHNVE&R2N=j)8~_i#N%FxjKe}yrZY*hn_mk({L7md)IoI4wGy<6LdCg8{UHZ0X z&0)@FlAdzNt7OgR$^b(;^=q@LlA@^g^<a)PyOWu~0~$?Mk-an&F=ydwDn8&H8T zP(|EaM5%hdP5ty4MmHG{)W(Y!c+`s5DW)2F574#Ij1nZHov#dK@H)|fu9xMOL#NT_ zt)}@DRFxL-+MW;Yn!E)pGMu!KgrwZh{$#V6&bS2HTmw zR)~$T5il$ccWuDkW!7!pc3eU>%8K3K-goL-ZEouu45TQ4oQ#qCvPtVON5?@+L(=PV zf7Yz$ekBO2mW0_YHtJ1W&Gh3GQ@A9UkzALUM# zpbYXdtZnyBRbOoNf8H!vDykfK{6cq(uIPD0miEs`1q)S2~V!9GlPZ4D8!^d?(#ITVu_3ETcU zz3~2h)bM2W#<1v9f|1`1?F6X?Bh8E0t|x}AigE^xXUSZIk$C}aSj{H~5wd|t0|`fl z-UpL^KOvkp=7rloMN=i&EZa35s0JI2buXJA?9jhil2uPcjsok>I2CS*bSxk_BPn`< zaV364gw!v8qzu%Gfsr$EKpe7eEP>Wd3OMOjMg+~Ra9ep^_YhP%qQ2wZ)P`)@U&qcg;M}eQ?=^Rt!(Nq z^N8m=h9PzK8d#XK79)rKYGXuH`s4KAMP|9h#gP)L3RIHF9 z(b`|rdE(p`pRRGMgED`+L0n+qHY@Fr(G}sb&w>WnraxTu11`s^e)Y@C zTKZJ^*l%}D+HR7zk`RFq$67+nQ}%U2UK|}v*7Py$_xA`c^rx{%<={;qO3FFp?w2QiXs*YcZX{!yE18)*FxA)SaY@ zg1K9+;}5?QAmOmH%`Nn=wXe0~0CdO$_(rrv*4Xfp69_ zLG~*tFSgF`KT8`HreSg6KA}$oz4WYIQJNEcgV7n%E%dlTh?#%Dk$b!NLPS3r@-Wqs zD@?i+o5Eg2x9A8bc{%FB34-b}z|k<3j0VM>`IC`bRMH9tHlMcEDqvc@eyj#s8`%X6 z-akIKkp@(|sBr#B)Z;&08csjIpI1=19DM#~V?ym9!IOr%Yf?uqPzAg13$i{)f^RL1 zW><)F)p$WR(}&vWv=5DQY#jfYG$k6m^oFfc@PB&y@^GlX=zonRvSf=sRQ3s3vJ6F- zWE%~#GnNptCS-}k*qVqi)!*o7=vB7_Fh_`OHV_xJt(_xwE1@I2;y z-}l^e&pG$pbI*BQ^%%-XMN>MUMAl7s=99BcJv)af)H){cyi{~C!o-Xs%(ZZTiEaL3 zvFBxB633e3-1o}7ieM`rW2yG{MJMG7YbV4of;-YIcv`7zz+yfc=t7)_r`j5(($Zha zY^)>HucVRp@D$_#uOOIe!%_&h6#$$dK0~4X`3X9Ye>9@Htxh>otzj=Yjz28MhFE&0 z5wC4riBGxBQNN+!c8<$Jz3p~x8n72}XR|YXR?8q|E&rNVZS`P;V7ur6)u#WXVEGle zcH2D9ne)0d`XzaOb=0#jQ;j(5&u6dDmW~>4dWt3tdGq45SjEMtd=1bBENlw052)?m z>0*{Ci`e7G;gU^8@K*nL@4hqUHpnF-!z~X9QZwRwpkB^;VhOhE)6Uhsel~c2$mm(b z1FhFv3jDyP25b;2X$0W`)^yZUDCGt*6%u`)>%&ldKW#zv`Bk8fd6EW}E2-<9nb;ao zi}W$I<%m4aWjVxlETyNPxwFMdtQYg%0tJN|ZL?R8 zqw=RQx0*@72oEO4a zS`t^0M$hRTv=g72TE_3uvcAXu6Ap%-MCIEljH*zjl$DW-H5>AlFzVvzbKC z7?MO> zDWgw5f1#9aa9-R9aETho#)bowq((W3TUowfROcef&0ihHjaK9hgI{dlh+>l>+*6uis_U&MVp%3gqD4$P^F$dA69Jy6L?SR91}t@$(?Ip{{93LM0B8Vp zqiUemu*=GfXOl#CFYuxt%ZkLF6Vgn}QxaHEM;pBoAfsTjgQxEdDgco0DT@x~pK|?l zFq64;AmjzR!6NXk+{!hys0(pbJj7!D?MZi96BCQzU)6Z9luyfc*WPJ^6~o7eI`9y` zcuk(_U?;O9!utTuF+2Kr_ZpV80GBMGfbN@HKWQAcq~8gC1z>_{vB>GGp_df>c*Gk^ zwaTr18_SQvn-)oP*6%b@?stjF3Gnmq+0scV&4o8>4?QAh?^Mt-h*Ke(Ot49r@Tf+V zQyfD=fldH`a=A>)jHF**S_iwk85o`Tc?NzkoNEpsT`@rSsp>Q!5wr{3<24rGu-z$c-lhwrF0a#P zv`N1CdIQVUQ^>p#b7gnJVdnsiKMo-INLsDTgWSRgb`FjMy(sC zi6_fS$d-|v39yW22pay0?%cwJfU`q|XS(LL#@@D9-|&sDdo|XYtY=!;pcG#ZHTpDw zO=d;-DI?5fRc}AyQ$4F);Hh2`1s-uuz}2c7DvsN4e6kD@CDprN-At3mX-Jz*2+=7O zCzF=_{9np;9F(tDHv~8=doG7%ZY>Yw?702H0t+{_CfNmY-bi3JOVNBgsaV~8^Gg?l zJi_Vz+AgJqveo*uB`=+J>M5LAL`!JYm)rf3JrfSsb)~xJ^p4?8fLQ&rIek(a3r|63 zbbxwB0hOGn%lYR8j9)O!L@^caBg!QQe8dOq>CfuT4UNiF>_-Rp2Z0P=z%n0)O@Pkh zdA>5Mh`Nyp81~-vkEWoK!_u8tH=z$kc{*PX)$QG^W-(BQ0+Dp<9!sDV(3qD76d}zA z98EmCFuTG|>t;#h!{OahWR$Gd=^DlE8b%#ec$Ef!2bnkfm!$)2J`SU^++r+s=Auo+ zFbl5b%yODiT6Bg(3rf({nuC>>=7Ph|&5c4#2DMlB0r!dAL_C_DF(;7a2?@#gy6rUn zB5v~?kksqbpkRAL@S4?@9&PpuG7HJ!Yi6obwyLw=F5-dhM^L^UAF$ue8^T|C-t&FUc^k78Vuka?7>-PwN-tFakB3NktAUMu2(DrDS$~VKnM16#6mMs60{E zv`@slYa0xaO)zOXBfjiOCR^h=x4GoaW3oX4w@~VqReF}7=<(||0Dua5*K4yxlhWGR z=Wu4hLplN`x*~9k?nq!!YgZypc?S{toB1J{RHxK!*7$BMj@5e`QCh^pt7e$T5ON*W zCbdU`DxLP=qTDA?apKS=V{encWi*4=vln$}GsclXH$Io)v{N?O*9x%tCC&hZ&SK&I z;q)02NqaEkk1Vkx+krC+Mlq`4_yOU5axdJ}06MmbG+&`#1J|Uv5H3?nrykU?WY=f*y)z_TVht6xxJ4%OrqMy@||&Jfq%dTto`Kj1jB+(=cHvA%`M4u6F~ zpv)xx<=TA^eOJBCS z2`_tLZ8VTOb;w0sv3G~BP{20i8h`<^j)P znLjMuegb_7=oVwC%h4;JscrnaN2kT>Xc($e1S_wpX#AaF~Pl-V~d{)e%^l{+lL3_vJD zw3vf~F7GR*fMTdlc=nc!RVk-<%=Yo7m@^qI`0Jiaf? zg(drf+mJs!)Fj517NK zU=|+n0G)r71kf&+P;zTAyI8a&moIXo32He9g)mLbhnjNuK^80wJ~5T+WqZK&^oxAW zwdZ0-jtv1QSOI2uQt%CZsdIwfzWL*14;_%lr<0%MSNPF{^%}`b-CZxhNztzFo$WE( zRuP10S|a_4MCD@3eI^=i zGxP!*$>8BM$OBI5o>}~!F9g1P;~BIaQ-y&gQ88XwH&O$o%aM{chu!~;rGz)mf-Q8~ zUOsIs*8s_Iy-+LmO)sxZ$0M+D0zZaXw#(AS$J{DkrYIpq$(`b%da-lRKtI1aKMpwA zq0&A2SJ)^X1-(Zn=8KLSy8t#uZ)or|VqthHMlnHu1N62;jP-PEw=`UaP%6Z5!BMs- z;R;Bx3~u46~bj0*}w)aae@Tccqmk+u24PK z-2Y>}zhnny;lxp?ZT>D0cl`Y_t_UcWPK@`wTSBFu_(^x`wA$4j$VbPtR=W#Zxv;=g zqsKvuL-?OvO7Ozsq?sDN zPZ@fb`Wqrk##sh!mRJL9JTc)dlTYC;HQCf3Ki3)cH~T_DS~Wc&dNg?tA=`l$wmbxt z5y^iAGme_PX|IiUR~XlS#Zn3hVkDRkwKjvxJCngl^b$oTQX(Zoc-QU;=_jzfcNDKK z_1qtk#XW3mVfQe!WvtW7A^~ni&_R?qaNi2h_(s%4Y=%ZaaPj82L8tn#x zX~ka90Ps2Lx_mWa%L;wDV%m}`LV=iYj&qFCj?`8#6IwTx&F`mNr^!j##~fg%BfH&P z5goV00_+wK2f=vODxjhbsBv{Dv~)0l^ti4-?z?NI??k}N**{&WV_F?g`nBe3%xpfzH$O}aMIuf!T1%O&2|Rr%5n8Q^BaZ6$Ad0gKs?cMgHYZrUDD*tQW*AZM{Q ztkDBnVyNOpL;T{pvZo%Nfw+~p`3$DE+*K?KIx4kp969#s$T}tr0u`~Cb#PFX4ZN;* zyC4yXfjPQ781PAXa(dKN$IFpY%>LcVku>H_8F@9Gg&>_;vg%f5eQiW!O2o_ebwu~cH#F|+BVgv|47T9 zKMSXT60Ld{iL%oF)fIrbxK8=M+t#ZBML}51%}f6mvLpEkDwD&1{r*29bNUD9ZMWo6 zFmM@>-4J)XCsCf^{S~?1whmkA-mBs4@^$)kt-_IsT1=c>j3SwKtV6>4mwEyGl z?qWBm4n`JM>DV)vcQeJv$|_SXoJZyH^4$9;4Z%mvFI0lD*TZOvUcWXN%GdE8Ei5iJ zINV$J`E}sa^!S%TUeFV!^TV|sw;9h~hMqZdM&#q8&u?eGF3b2*F+(X%P`wgjj#k8d ze9klaDNV-M#3Uc+a&2#Yk_+OJI8tVjlK=4MhE>c3B}9oy*Ri9GeL+;+8$NoKA8`+%>NaucZP7t z?%p0n0RpbJgfdWtUQ1MZ`y%@qapBdYBa+fG&>=2hcQvtFDPTt!*f48u0@T?5>O@}* zi#@MMbIGKXw$y6gYqk~1%K+O1s9bmn+;Ljp5HFw!Ui)@4l}K~9H-oOh@B0N{Fc)dj zlZo@=-xdZh8mf3b3?jUwX{bo}~wWdYDdK|(@8;t%(`>l4)&<)DCVrc!(aU9tP1w!S`ajWYtce4J@zd$AiM zbtlrzCDemDc!s~%pM}g?W>hnDfo+UMK*MQs_N{{7M#kRvnJoSW=OM6sS*xhu#?(rJ zO?f2N`zVmLoe_gzcet4_1y>0$;v#Ey$N zfS94Bp7hlIaAki7Z;LMF5r(W7MWOYv;UYt; zOr@Pgi?=hiC>iTNlP}45F87$=JUswL=b633ti(F5;Dm^BStxtxT z#&nMYwJdi&GDZAwn$DWeYw}vuo*%+Y@hJM1D*Dd1$AJ4p7AOgvUJw4`CeJJdXp0S{ z4stE+PLUi%U(~De-?EdxIFg{BPRXsyo7`i{$n)9jAo-J=gjK_X0f5KTE}&$>1nJv& zC3n`yLbLd^L)*kYV)G|t`x!UDM<@kyneGD^326Wh@z;RF8wH=$MBtMP0CuhcG9!V^ zylfjf6twrv9NqOklF|EPK0Ix3;c6PwRHI*?@x^>rdxeir6o^46={suViGlJSfa3LR z+|`jYEx`p^6Jf@_n*C8qQx0t=bU3fwRpZ*8o##DYtL*&TED+zA%5~1;IC;XmovW+N z-)1}sejA>EB#)Ao_ts_dd)C(0e1MZP*-bTRXFt0#x9JAsJMGUry&c&6;6q&1uilv` z;8Fy>eQvUt|M(oZzxm5F)sg?l=br_BzXL^n$CJ}M8wo=nL@A&0j>(1&4jPDC)O<4b z*_bk)i#|DAep^M{3THVaROf?7o>3wu?L8lPmj25b;`N1HOF=2 zW!F2M=}k4h+Y6mfvo`NVxA^hhZ9SF&S~ctJ4r|KfHS3W=jq14qWumWP~0 zAIPQ|sh zL%eo=OC`|Vrcnsh$WW_{+ut_4SdbvjGv?f{f3LlEh8MXjyUFmc4eI|NDQ)|!gqux9 z`h=JE9Kxm#mng3mx3xU%cyuU#w07-x4{5CyMUT)+oUa@h@kec5Ue5?aiFQJUyHj}r~vsC{wlDWe=gi0>(2KHFK pkAL+?0&HZbZ{vY#MeiZ-! literal 55421 zcmeFZ^;g`<6D~|}0t9yr?hF##gS)$H@DPGqaCaM=;1b+}+n~YSoeb_S?__ts@7?_u z?zumG&Z#q0cUM(cJ>B&*;mV3qsBa11LP0^H%1Dc=LP0?%LqWY6K!k@#PG0jtkk1=u zRVgv3$}wUv6qE>*jJT+}`r>5B#H+=s|7c_>t{85i2Mr&)U zXunps=0qnmylZ}5ULJn#H^j>^x6t<6zU;Nty8X5N#qsw2%;@m-(J_I`8BR8kK9g+3 zH;PMBorPYvc<{H#~gQF_dAk-$;o3?-N%P;s2lCexp#PhaOhfjK~$L`j-lb z!UWl4nEz?~TYoa;MB4=wC!9pCE4up@QhYC3Rn~=OdBM83T1DmuqrXNei|xu4rO21k z>Liw7>AqQx)c3OYL`p7|ANc6^;WO;Zi{8$%qXf$w5W$~W(6?<+@D|o^D)FyzI0<2C zlyKAheB*te2^Es%%6_l9MSfGEp;t=_4|{RIM$G&ymS18yluXPfmNuLqI*kBX8Dy}D z!pN{OoNk53y^QYtSruA*Ji+q+EJYa_vceRol(n+|h|#0Iu_t#o|JM0G=7AW?9}``< zs1~;RO7p*BW7wop6p+L?!~bvQKSoLV2GK$Nt+468Vpz&tc?$rGr{@3c{+AbWVIex0 z-Y~Um{6|cV|HgtL&}p*nzt*NEhv+~u23P!7+>J=5Mi3R5t??hf(!GJ`kb{zQ{I3{M zQY3BISpVwBe|#>21ks@bt?gOJiq?HWYjSdHL9{ z^bc6@F?1zvr)%t{eTcCh(G)%kYJWR8mmM~p%bww|s?TjOd0a6WUGWA5M-}BGQla}d+Ar@hdnb{m{rvGqVdiHwUxjC+V++6Z!$^Bzj{W`&n9Z zhFe!I4u!OBC1pjMh78>guX z%OJi1nNO9TLJ#=`e{yBVgN@FYA?kL#%u9R=)`l6?FDp(19*k3B^BzThuP;+|R!ZYs zq3bA22iK^We}E%baf{bkidb5@;~0ykDF4naN`i||_JCFsH^A{WalhhL;y9!ec)Avde z%O-!mzC2nyT!Ft#$M6vDK8Wx8z7HQ z`Tp`xML|S;BT!dU7b^7dn^MR7ynlDewhC3gbD%p;DxNUgLtj^7Vf*1;fX06JHk=KL+LYy86!j(D4t) zO=o<0JQcp*&+$6|&Wwt00U47@N5u{N7@^-&b~*6DcDBDf916}#+FU@QJGS~^)GccR z%(E|2(w+Q|i3mxdkK6+j_fff5;xu{p5RVv;e^Gzx`+Oq0b$>Z7TwI7Tk7mKJEv9>E z9fx>U`!VQlNb1D_KEtu21#fQ?l3j3)C?P&lGV!Vun|E8XrtQ`vjujG<5J4el?YPUs zbA|48&@BC54IcWK3v3AhIP-k2{kYtZXLQ7SjzT7+|4!)6cw8k^7bl$&9*y)!BO8C{ z=z_~?jM(F>8=>8q=McNDq1w&4<<%~epIRb!k{saOjCv&fbpZYMW+)k#pyyRJIQ-Vi z$lFX~#h-o|Z&{m2&diPT>}2@TwP07zj!2)1%z$-fIi?)Zrk9 z5g>;Sk>%Kr=Bo*&g{#}8C!XZ+V zyi)6bGVotXazYvd0vh7E+BEh4tpNa$vdeRRVE#9hApC>3xTZqpZ5aOrCD3Ug(va|? zqQZY+6cI`ZNd1;vI@7iPw}u{Zh!ik6Q~z&d_o)C<)EA|R49hG2tpWH4F3_Z>r~JFr zM`J_K0+DsEZNtAyJ!F?!F!YM6O{@M3TI9x4Kwt=^y?aIDzcrZrkpe&2=>FR@oT{+t z+*6*m4Me=qBTR`XP*E70Rn}rDZm&;?DEjr-fBjaS$bT|lB7sxun^YprtYrVLut)9S z&9YUhMB1>O{FOY6WUj4Nd?c%)BHTbMUdw*Ac``lk?@uy+t+nJG^y3NRRE3_)#k=9_ zlh)Nm-{*iz&3*k;mc@H6wv<|>Ljs7U!eIO{D8+A9rezqn2bC`WNmwOe8fM*oBWDw+QP zDb^-yGGs$)Xu0IPyk@!KE;>oQYF!H_m;Uf2`h-M?1kJS0=Zc-iDN|rgO45C_NHIXh z3V~|Tu=o{4e}|(83AAI|``VWWhwM*N1t(vZ@b(gUoy86P3<|t%>*X|_wc$7gOgQ|bFJ*PM9TO@LH#m`Dp(rLp54Phz8-IP$%VQ&S(@afdA zNrIxX)&A_M+;j5kVFn7uilo*K{q5|6c=lu&TMTBUKq;uA^Ul|R@|K4K3dTx&Z-xII zNI6Mizd$t0QfcHD%3ExMb9ASJ$g7WPDQv5l6Tq_rBJtuxX+d2@{eK@tRo++Mu608r z24U*UQS2)BP}h_!T0Goa#nY>a7b)W0KRU*H?BiH3YZe&rV{AR`sQstogqG zB*VTys7r??2MWh_>Mbd3Dg2r>PY1=AV(h|QRSqWxT19M+O}O|PGZIQzELe`TNMpKG zy)iaXB73UZ`#lQViezCSTc8&4N?W$t>Ty5bXJQ2?xI;93M z_Z^A&QoWO(5Z2Xx4h7`=2)!+X{g7&idB2M{0O*FrV2t}ph}A8=_BC;^L?tFX=zY-S znjOag_8zi9Tm;tW^Xtn#{*zTU#wK^!%JVHA?*>$|agnLUZ~ld8?r?5+HIL_O3IsC! z-}LR3e`ogzZFJ-LhJIYaCmmV8|EJVfj~(CT`78e zqVLAZQ9N%JRkLGm4M#3%LY1pdssoXqo{pO+&;&Lb>pwmr$L#`=X5JA6&1zy12rQJ2 znN%wW_Zpkf-ktRhb(8-{=n}vEy@@Y<#14vf&Pw2;Ttp4*YOqqm08%a?(=SWl@yegJ zvPKaTx)Qb?YdXf%%xVrRa~;jiE&VkUWm4!*T0XMRCpmIcRuojp9F}1yL^NN{`zaW! zn7(wwQ7qeG3Cb@ZW&1QH8alK$41SpB;UE3&9K(W;DHwv{G)fZ1!7wYm+9X^Tk%Kb@*A#nhh(~d*@ ztB+_EsX-P(vF7LTQkhqntwMmHIMc1;U$mn_S!uQl&;IXG7*w~9oAA2dFmN!%$Vu${ z;x-^=R{vQt(6l_(Ipr=?bN;-8MN4A!=5jlhLilOp$i#T5SPAcb8x6zm8=k>T8rmp# z4#$00c&Kn5zOisrflr#nm+e(|W#*2e{tYEE+6{q7>T0d0$bqIEe(;%*_ol#$`+ES2 z**Eei1PdTBh2Ii1^d;4{kw(#%yU+13 zXkSS%G6ZOTkw08bd$tOKD4?jNIn9v9(ZvvH2Hj@TIEUCWgGNl+w)kvZdYbKOBmjX! zeURfZg_i#2(Mn}I*K0?H=WSB8Wx(pyhKhOz;TkGRxWrdX3yqErN zJh%H>cTmf>H=+C4ervyw-J{U($=N?CS0CU?lA(Pdt{CH2+Sy6ukejLY?VSlk^J7v6 z>;nb$3M7#C4NlQIB3V!H-5%H^=NH;dIZ(z>)}*}%ChjQ}a23(H%B6F-S7*|=jLnEs z=mST-H@>C@3S&_z+89^eg)Y4OkPHv(>AC_DQQ!|x$E!q6fK3q3g}#ZJr|+d9ONY2;({#Z`c7B)}WZw9!#F_oNyA=!dF5q<2 zMVjfQNcL+#4v4dc={R1B@ba{?JJ|hP!mM#6L1hqFn8I#(W^Bg8?QD7WENUfu-v=zO z;wQCvtwi28{wSzBaMN3fahKaDnXVqS>a4;MNoC>y!j7btk_3>=QnWm#>n;`F}8n!yi-6{i?m8asDhY{esA;(^dUK9b( zL<8k1tjDC1W5*)U$oKrzK|jZ2!w8YBzY|^T=HX}&)OKIQ_A&HlccTu+t=QM2-|(52 z)o8=~&(wTdEwiV#RD(HSFPdARK?Ta5H4e%t zg7aRY5?>gu;lhVsV#ow_mt!D(?;18GVf)eBb**eN>o{EN1TGJNW<0GKo&}x7OJ$q* zko0?$r%jzN=ghv$iQhWyPwv1`9K;`G29WbuJ2@?v?G3DsCVTOLG@u`{;LGu7-YaFZ zy{vhlVpi0B=PR@Rgn0w$vZm-y42et=78HJA{93k*Cy47|zI zOeskJYdnzvuF(qc@f1z*xf_85>+9hA?s;1_x@SP7zK|ktZGVJiLbQFZ2K5wSA@D;{TX@MTMW1J)XfUVba9PlqMfoNL*?Bro0-Ota=(C0&TH!qo?->dzo&uvRT3=AH0 zI+5cKdCV-v|MF5{*MrBp>k zSMyYud!*8VD$&zbD$$|ucdIN7a9(ZlauT?Ieke#dUgGs4nh|nrPyX@|)!V`%AOeM- zj)oFm*{+NbvHd)RP0wxw#7G5+_| zq<-w2lidjXLIje>!FXG&A6s};HHT4Rl}JkDD&n_{)=g^w$pE49N}Ex5B6GP}Z621U z%;^GHnZ|=i!c}f!dSwd=>i!gg#A<>*2MnulD)eps7+&c@)o{=1GmvHBl7&P<6SxS( zR2#;&Fj$1S+m{Asy>|sJ)k?U`>X0S?V=Yj9KQEUEaI#wKE5cpQ)>m|rDbLu-`G)IX zKH%>Da-S7{zneNgR#t|6_xTDu_si+S^CQvaVO61k!L=6~qZof*4_@e~F7>b?WXddl zp;U7U=Sq&r+CW(#`OM(DZ$>t66vCJ%5*@J)<^lfrd{%3y zG9EI=8fmQhD!t;&X5&5_4HwKQc| zGOxa`F?_Va*so8_81!oQ!;B8zJ&V-ILeDd71bBu(PcGhdCkE50Urc0M4w*^;nbCPH zf(bG+H9ijo`+Xi%sXQlr;N2|4h@MVuaq{G+t;j2{8WRaRW&2_hd|7n`=k1$# z!Malo>OA}8$$eZOlE1Kxl-`Oz`wXIon#3h)&Uk1s`h1pZl_INhr9Zg`cNZ6tiozKt?^!3c~|{|AgQu`xq?UwYO>~79MBG zG25n?pdo6p993cx{opx$DNl$7%LAIc(A2F>@VW9+d1R+mX7n>spl*?Yf;LIT+;q1k zY?0TbL|q{Y+gyc_@W8UA8mD0!=BCdlLH*IX+WHm$p|N>)S}EmUx9cwOI)hsL}0Zq#;+Zv0atWPGQ z^JA;-9TB_357UP)e0=ZtgWyHW&?#%80VzmQ#298pNWb2qbWwb=-7SguaW_w?hw>5+ zNG9_;0+q?@RpiHVh1i?ghARrrvZbW09m=uVqiiZ|k>5zITIF12es(n#B6o0P#$eFu z>B@3e^ejC`B^Pni4}|lYfby9?S=<1xppXZ?#=w}NJE^9^f~GFRQ%SXx%%i}0NrsusMjLd{ z3>1(D0Q!}`AR3GLyS*?s^#z7)<-|Cz+r^D#BsN4KQJP`5l~RU>KCyi6jhHX zuZ(zKM>w=K%t%7x#52P1vY3^J{-Ozno5p6jK4HE%?WBkm)XX_SAPf6?%)suDImmZ7 zlf!zuTyH(S18I__LYlowAt(f@4(t+wO)Fn0d>$iENOb)kAxsJsV!8{+9R;)ATn^gf za(Pt*1SSur8bv6-%i&s(xEI*JUt z+VHQC-V6WjjGEHWY3J*SkK=4v@)4vhWPOYKXm<4}p2Q9N9&(2ij92MrbKD@|d4=Bc zbn)P<;{AA|7eAIILF%h*K5vje_?>I|Xj;m=2R44*FtYI}!Vl2lw8{GVOj`@NuxdRe zT^zX?qRkon^q5!m4>tf_t66jWA@Jh$g|_w_6hF6BJ0d-bu4QoghT3yOLsQatO}ug1jNJ?ocu zy_b$XK^p<;?l*FWWnT3SE~dmS(LgKIctn&amCt?RcBZDy426_XG(jbi=?ULnm*)s} zHv-Xa)`~N*FpH?jUKEf4h15ok@(te44brJB;8@-FK=N=KLe&@;c$7Goc>`|2#4ul% zLmJJ9)a<*9kw{Kknu|p%@P6-$#6z04u4w*frcOs+lgr`UmzVqfk#yTi`Z}xeMmyMN zoN!w?-sAdd^TK4!+McM_0*Odm2+t$&Cqsk%@|>&`|4_1)A=qcmfbKj}L9z=C?82Zj zUUE*=X_bSsXo3*$u_yD(pp)|1(D+TLQqm{7m+s%7;7)s11@uAf?m%y3=R*okl-rK}k5=`_AOpMvbD|hehBM5GXw1 zDj^}$?Pov2gG$IrS7wuv>>%{?(e(UaB=`a4SeAwoyN`HQSYwTw(L&D5b6BqMOl@Yj zsx$L#Pv`Q5-Wu@fe*Y!ylaP#GKhE?tyd2OOzfSh|SuffxX_M-Dy*Yf|e3N-0iuJw; z=W&@#BcwTAf4TdFXi8qXf4d$z3RF>y=ojqGL--|311AecoG9j65P>~5R4=&Om*jf) z)V?whgR^Lvu>yy*DLdUV1@p=gL^UECAmL1At7ZIw(Lyli^1FQ#PMVOx&h@ zRQX9r#ah=%n2g}WnX!ilj!Pv*L#AilD6{ab$CktN%+tjt*#rBRVeC|wvI=6W>ii{( z)EU#@$7hY@)@d9o#nMg(jIBZJ=KTI#Z)MbXu2|!o%kerx;CYS=DW#SDf`)8ia$#I zX$2(%AdoIKo-%)=ed+G?X4&xqLf7!ZG5==Srfpl%umj02DSrqc)kb--l<`&Y649L> z(s66>2R#pi4F!)nK)N9=YaaUzy7c8Kwd$i(VLsgBW7Xv{NB!)Nul%QNl&^I+k+0fj zEK?RL8R;zPDR@TSlXX>AKTMx`Vw{UK_OBX1XKi|s5I}xAs>pwAK5&*K+NO`4qCbV& zGgoB9wcfU(ejya9(inq)51P#gXll+b0kB!LwMt^DN0l` z_2UV+8H1m5GI)XCQ1K-!@W?*Al=93i-Fn~Y84ByyK2K1pXO;#PutuZ?`kP4|UOPdi zKNN;RSEq~UiZ1Jcr+No8_o&OGEzDW#35kI5sPFw5ZZnjFOc$U zU-6_ohESd~m)fJ4YWgru?Jdk9grAXgpqx_MEr90O82{Fx{$(deKDPeqB z#6)zUComC0FyZ5adRsvJ;iV@6dsx5pXPw3CfL%yG7{c6PWzq~CyS9Kui#J&MyE4UDFu{C;I1mdvq%mq-We zyNAH+A(`LP`|=;P5!?uOyPL$3-vn@<%vi_DzLCfz;a&s|ut zJbkoc@CnIP$oSgKljFhi9CP9O4B6C>o>Hp2{3ImUc08zhu?yKh?iT95CK)+&I=tYF zS!G{V1z!^c=}okJxr9{2=Qb%H4Q`lzE6vV#Map{~@Xr7`#8!Qt(D zT8%%wWfOaOR`HgM_FMgi11kH<0u|K&@J%)JEFH+C+!g$t8~C;d1}gxJI?1oCyh$HJ zb-tTwbdxQi_T{jmX;^`K)jVr9^%@rn@m*XKC8;mPV0<#YA|{&SdzzZJeo* zY(X#Gj7_*U+wV^>y@<3>oLnkxw85XHB71s_wHTpS?3-5Yf?|T(`=OhsD(7mZ17kfJ zqe<%T&1$?)TJ}8?UIv|VmfejxxbbD^ex;>JtKV7esg-v+Z<6t;E#?IWB1$Ew*Ail| zjw&Ct06MCE9+a)UO{xc?XrGG()geYc{k(IzoR$bo`zN}`XOrfr320IP96z?w@M#vC zanJQ*I_Ka0oB>|UOMksqI)DyL@b3FFavV$(l$}2Ic z5M;>ZVZ5XA=wfqR8_uP+t}7a>Jbr%3!)3~H+caJg-WB~8k!Ardd9GgTQm z4skY1fA+tJn zVL{o+>=rDIJ2bPvhexl&te^@DOvL2aSQYHu^XW>i7vK>2v)E5~6aGE9=DZTO8f!?b zWeMTT&&45XfNjbvQc`r^TL!$uScbpk$O?qob7M-|-Nyx3VAeRl$0z#@ss88qO9nqC zg(6MrnbXuu1k!Htt}`~`RlvpeZ``$hqnW6_Ul|%n6}L1!-BxTBL8LO?IBh)@8qdZo z<+!OY#YQ@D<_LQ*2#0=Ex3hWnVtJx8V?cVFEz8p&4t_b^@3%=y^+%#)x)?AbmQfAo zlN^GI#{-+irpqSvvY>pVT!+7ACjuu|i*+avaQOjJ7hXk8Ol^X562gm(#_8Cuf9)dD z7}*or&bmu8r4f(Q^f*EPF0blq^!p0;*ZNBit6wj(aFokW>c)j8HQEM~@hsr)DEf%3p)#cFOU<=z{wEAPr%HhwSOWP^*qXE+Xb2$ z$ZFZ#ie$YZHh1#n+xP|}z=8PcE9BhZcObayjeu2sK9Kljitb7t`wnloPP1~qSNLnu zlv_Ggy)pv^3r!4u&}qkGWijqzN*vmIjNL|O<)s)dDg~0~hma%86(pO^&pktZ`HOq(#Pvj9#Z73GmC0ct?RT

>s7mDFk-PM5R%u`4c&&!)+!LOV2cNXx8kXN=v8=YbH4#Ra_5ibG0delm zb%XsXSj=x-hapL^!s;f70L?WpZhR)F5=lfN7D+US6Nem@>M}XIyyqrD@4_yQ67H4P0u)B{(rWc!yOzG%+#WPBtl5Hz%zYYKUEQl-$d@4ok6ck0kL4%<>+a zbu+5hlf|010sYNrYWD^aZv6J)Ned?@TR17lzBv}OFmiUwk@s&FKE`jJ6rKRy5CI}0 z@_ITmFJ5@Hu8}(vVe^#s`qn!>eaF%xv~*zp;91Y6aHP0xH$jRkcsa@e3V~(QOt)+b z78u#{2&>toifVvCVMJ!l$`*_UuL}zf?dODkV7HtqxVkiXCFGsjv~<&y&$meZuouNK zrKoYW4e=s-3cMNd+c!Kwu z_~OA7J3EL^YVjCd`23^+lhmx&ZQ+Fq6e7arT0NE_sg5_!KMsV$=W$ zcB|igcA397g+noXuw%2$s++1zzBU`iHq&k5+KXTu_H0WEg+~Fn#xU-NZ=Em)@i$Mo zYDQW#!F2SlKq%lKK8?jnv&S)gXsg2?WG$#Q$JTA|(6+#dt)=iOaz$Hu(zJSe@IL3qcRf*~WU1!g_$#sl_Mb-oa~ zcdR5v^`|{{)zP+P>7jV_NhzG6r4)0}X5+gkHY5rGnVK@$q(}*T(h`F@lk0(MioDbh zvUr2<6`$JksvWP>Z3#K76#Wx${L=cL`ogi=5$v=H%?eW_1no4YN$Uoz&~?BCHLe4o zC9NemrJIOQ?;%FK-oeW`0v0;(#TY7)Wt7D?)I{Ar2xZ^l$?x*l83!bz^6GMsTn;l< z%{18B(0gy9Mw*_C6ihhfWW3QPpZH@S*{6IKFLf|pk1%EtCOn;VgcV$bWvW45nGGj+ z&8tdp=Ji6(^Fi7SO#tw$uiklNI)Or(nf*W+i$7goS`!GN!RX$>xjr5x8csQF>*w5W z1YTXLJpcSnVMfgedOG{4&O4*|6N0w>G}RcXql;H$y4T?yV(dh9-zGS1JGxJPI>RnC zqTjWzzf}Zo6eR6uViCcv{v;@8j$m%>vx3Dn#>At40ZacqL5<_KOG4+v3UDxldBGjC ziyl_Eo*8GPd0^+d!qyJxeBL`Sp?;}}n-y~x3!WK8ht4g7yhNC(8+vtUI9dy!R15Gs zX@T?=q)P%4+l;l3V{|;N&2H9(Pwk_(wjdoorbb}RLFHYUp2CyLw9X@|*=;lToD4&U z?EYXW;hmQR@4Y##lbnk69$j8G{F{QG@`xpy5lKBEZ&ns+y3H){t><+KJHRNwK8XlF z!KwZYG9)z_4A=EleA}J`Nk9fbZ0@OsCahn;HFqwFHoCHPOr`~hvX#qbwvu}>DFtrP znHErjBdbNS_N00L%6Gy8>HhR+6E$81Y3cMsI;Bd&c-K{%y(E>f!lW*WLyBZxo56To zG-`xoE~Vwc!i~Li{IhjxweV*uT6M+`vpwtDb?3+)!E`}@sSeA)_M5+r=oV%#!jwb_VVY(k48ipJi9f2saN<_-NlTAe z&$IF60d&;r*gu4q1%+&0V3WphR@LcR=0VlfAOf^BUH1hSf(x%T)~h;Or>GKd1x z3nTg-BXT*N6)Q~RO-IuC)Nb*NqL@d1L}9;foyA6#7D&WNzfT+fO4{+_c^d=TO^To8 zf17En-*4f2`K!*cucOxM_UKY2=RNkc_a%YztOl5Ovt+ulg6(5SL5|W=)$JNbkq(F1 zN40`f6lWIrNcqR*tJb{dU``Up&%|Nu1Z$t1D-92VoO}Fn8WbDg)Fb?^A4Cnt`lQXO z*Q|?=l5JNNqR)`u$DD=9hhf9M;F=(8784??XdMuzqJ0$7$9eVs-E0dX$jMJ|sq3UH zrO4O?qDJZXT${%jEmZY(Ez;pyn^Wq#XSM2bxzBZWF*T^W5{m-ei1XREg^F-GB80=B zp%XhcLl(=3j_6d5DHkhgYPz~rKDA%>L>KD*;@cfwmYh7tAFgN|K9Ot54u61jlRorF z6ue%HICM$cz25Ij2uyr)G@0PLGv?mN@o~*#u;~@^yhQ*0sEo|5p63wc6ZpMgOy)hA z-*C~LRg-LiJoVyx9oxyC1(9)Zn%`x#THpXi7pLISF~mn{gCU|JUgzADm*K|8(#z+^e!F>oX3liE4l3f{%{wXIj~#*ZR7<;d3-0Da1`r0 zk2AC_RqrRh@?V2ib9#_ITfy5-0-)2%X0SrQr;@km?up5<-TASa`LJ-8R1SitHPQEL zDJx{QPCHmrqe({jKRW`INUxi~wWX;jU&?5;RZrSMc59L6ePWs<>&{|h97lIfm6 z0r#HAq^Gdq6Y*#Eqp5zN+qyjus<=T3mP>w&40XS7vjv3}!_eFb`wpAA>wYDZ*ca$& z6?-C#;V`V7cl_qdWqwaJ7F>G2*tky~RWO*2_z|@sTgm1A<%gK{mGXE zvU1+NLS&Y~v1bUuAb?{0ox>tp|JRV<3F~5BHHKD>wC<6Pl<@v#kesi*gQG`@he=;m zc$MtOKC(I-nxWzxMS-VlxOzP*&pLiad(9Ua^}w0b#wW)gHagdp&3TbH0~_l0AGaxN z18>Tt(5zlyh4qo&Wwew@E|$U+>CcqnauwZK%n(`bY=x$I=#8EaSKjIQvS`7C`ec)@ zhSeH*G4t=*RytwvtaYcIBiEh2dqsI8NWg7Ej<}j4`$QA6g<+8Gw8CmS+6=8;giE!{ z?`1_P^KmOl#tbwwUzq-V|B0*rgf`K51B@(+xjE`pexJeo^OF~`8C!YT2U=3sf$q}) ziRB4HvtNFZOeZ`U%MNmtCG3UWM3^rltf!+mwy+Nmnj$tsWpQyN_=9ygs?2?Xk^@3h|>%Zfn1nHrQ z%kgo=)J8bDdbyR4aZk;G4pylQw**J;bkH|Vl)io5n`fw^|AOawlKiJz-1(eI1^g^H zF35cgoj}vpUd<~ykTY|JO#eZ?-*58W>n!%muZ*#q9u0qCJ<3Z%C_ziI1*~~&w(za) z!5^`Fut*~MUY;SS*sK6oUTGSY!rbh#7Wg6fU}^GCrTAI|k7Qm1oQJ-F zhClL0sC70s?XSW1BIuE4Tim+;)jDLjCqpwV=d8vZ#5F){hGK~!^Ir1?(E!l{ZW|gD z<>SxjYxnN=6Fw3a12tH`7S7{fmpkJ$<+lT0eVQTGT~7PphrI!fCP3Y5J&OF>H-fHx z^%y=SdqKW!eG6_P88z7-KUD{#CH=-VhQUa^Tx#n(VvTb1z)S9_45HUgU=el(uZ z)U#;ZXUZtRZ%cwhFywO1i%R+sZmFzeFT#T_U<$qsB{oG7ZGxf0@jH+8DYHA+AmU3~v5BjH#8g0wxNdlLIyxnJND||p1U{$c&2qy9`078o^%Kt zX}ZpQXm89mN9&8LL zy~R}82D-8P=_>DD!|v^e{V|R{TZ*>Q#nF61Tj>ib$6*mS(G66kSaSOFpj9kYPvU0a zX&}w=H(6zFh$Ty%&AQh+EY7h^O*0jMh#a?dH*y~XbCZpUMSk{c<4~<$osQpz%i?}G zXKCfJbP2|ZGV!V>55QP<(V9mJm_fitdsH3axVi5c)pdli(uvY@&DEx(OFbJixqww% z2okzC|NOHJMmj9*%5|A}q4LAs(34d+bSN%R*Bwh>Cy*S-IV7X7c*(EjRp)LT&@ZsH z8G5;&5mXL`yFsb8?8!D_M#I>KhrzHwB2a;_7nC4A~YKAg!J8c zWT}kU%3CXWDj&_&ufq_&T({);FyrbstOf*_Xqjz5^|TdKl5Zd_43W6L~%`gCi{Fg9d9r#gJ!}kmn8YhumPm|Na-i?HX3gxn#VMPryjmb$J!8$M5&?ov*k$ zRwVJixUh8G4B}Y~#33&Z3KWRqX%tp2Ijkb&)2SfMISMjw@{6rV3f6WicKdX%liWaY z$h~)g_4Lsbg!68Y!K1QsJ|MxhME6MgI@*34LG0J|{K2bHVDgee=dq?v{HGgU%cUZu z9s9g&djE7PjBy~5#?mUk6GmBESC|ZGx$LYsORV`Wo6I*=4QlG~GG%-i@um4a8iYhK zF3ieq8Z_uU9?n!BH^?^7EU?@#;v3NBd5dd3DYlblCC=wE+%X{hF$ZdT*#T?Oo;%*Q z3P;C|?bYUsu+=ZjKlFau?9b1~&$k9TsfN1d4YMoj5sVQ`H9?Ph-cIq2_7ZzpO&#kO zeVM!z)5VeUX%rXRu_vdGrxJfS!o6q`LJ!hAx9bwR4Va6cKSCatNVu*o7LH%~y?`H> z?C&o|SkT7sBsgc{r6StSLk8c6hGja(*skP}#%$1ttU3zEOOZN}WGE&D4)?!497@Cp zHVRG3o>wW+?-HT5T+dZHW&6yvyVIxM5A(RJIGy;I+Ch$e+?5t9nnc3K?!T|!A3Qu9 zQaE;i`MzH6RjRfjJ~4N;d9^?umyVv5(7Lr^QJpm^hg zn0axQK1XqJ(uC0!J?P^D+crAuE=6RLhIX~KqhP#0jD#XTv%u5q{XjeX8GpT*>RA<` zn#|z@SdmX=XP8mq9gj%Rt+78`z;g5uqzS(|0&x9dMwkXm4$7UYkT9kL9|@g{*|pUS z?q#&TTdm3WqkhH;L6(Hy`;@F9PU>CVESe>gso1kqzfQQl`(XWCuuJu5sS+QErZ`R& zVs!Qf@@~8Ku=2!W{6k6%5$%_G0}3?ZBR)!$v4iq?4)9ZmiWPAv!7G?Hlp6+Ke$4wa z+^(h2YunMN1GZE-? zR!hQ|g~0g!@xeB9w~bv5H#x^5k7?IMA72jMu3GW9btcaDm;NEz9iYUv>KIouEwC+i zuxUNwM`y<%p>0?cA$58icizvM)%1sF}paVt5s{2l!wK1Dal7t{uYR-Dc06S ztt|A!k@&`sX14llTz>sg!M|~RjZFx}^hX#>*o~>m^{Sf#Qy#&W=+HN@w6`-5vPIiv z43RkSht4?%%*zcC`>&%fkI_9gX(32IL*LlYKfb${XTXGqXTz5ztY^cT3if?o*+~0o z^R_Ff$b@95C&VcNj>*h>PrFa7!9p$-N}*@zeKLM5&?rrqp!m~UbEY38HW855I|N+v zI}wcHnud!qTo*E_Z`I#{s`R*CtywU5_W$y z^052GQ8?e4DxJvO^f_}iLGGcc|AN=`haND1Om(FFW;lnroG#nn#QcYq$Y#5#;q7@) zjmc<8i>MgsVZlE;FOO>j|~ONzJG%Qjj`?(=So^bGRa?s&x!i zo0=-VSr1&?8SAUTcfBZ`-FmHe-+nxmZJyAA^}}TDJ}~}_8t(q{n1(1Wsx-pQ>b&)v zFRQ7S0whrJTriAVnwsF&qlC*FWmfW+1qXhNW;1+1^Y1vsC!NVU>8(`}BL5(CusrY% z0XKEg;&xTS(1V&af7CoCC>xHV4LkA#!Uvwp>-uVYPetN)VNUDteKnpKP~F>bHiR-7 z^ldOIWUK#-IwNd6sTM$=XX>vf4WrcC*tQ+WET1oFp@YPn z8@>3xzn0c_<`$izk{r-nn`*f8md$o{>aBD>RuaiNucWyC)PAxK493It@895HyG#8< zioW>&QT3KlaWql8c1Umt?(Qx#IKhHDK?Zks4;I|r0}LL5ySux)ySqCC`{sGidCywk zpZPPrx~saRt7`B2y3gGuw?P5pP@VzC`ot#LxN1;U%YD#cuS=nA$|`Il5tvl*unjgZ zY0u@o5oQyw3mtUSd*3xoS%0H6Xn}{HW4=^&XdQ(+R2cJx%ae1xoHP}{A_k1B z;`jNSf+gc=8fNCB+BGGvYe~t=giF=pJH-i)4rb_8mHFlh}s!^&u ze`P~xJf>^J?yVN0!0jI1ymeuIuMKh=YIZkw*;e4O5V)wt%?+D8Peuy%8u?z%J6DpH z*w10qyj83Q9@Vrh4~&+o4gZZ8Yq2IndDlM#j(eb|4Vg!6D<0?IC3O6o@bGHm>)_hV zfd#u{8hDxS!Uma0eN60+2cFr9Oy>6GNEJJahSguEDzdImsR&mq*6cXYcv0+2hEn>l z-Bs+xMvUvliOxavl&L2CBJiXS%+dZ1cE*aMYZSG6Wyey4c1IY!;zDJ(Z1rSqPwr0W zaEv={{hQM-y^3?d>up{|e;bk7oEJ0mRqkouDU3cp1i z63V9sOod9h71DfArHBP|Y$5g2AaP2%BV$gb$W&RNOk-zO6*B2K{;{z!37t2=xE!h; z{5DK#8}rC^3ye~#cAki4zqraXwfWrL8sZmL^vT^VQQ@~(oIQ>}ds;Mblk?KXtWIxg ztYtTgu68W-0g1Iqkyh?6!28PmOiejvanpo-c{DUm7r-oqE{#j84j%qa`@>AwE-1A} zw_Wb<`pcc;bM>GHh&*A0NzXBMyCk-bfu;u?$4tnAwiE)LK#ZC~2#ZD)YJ0-1R+MNG z*ySASQt&E)4;RO7WAtA5VB{_l6YnQt>%%6q2=(&|Lg?*B0e$~rqyBftKU23`}Fo5Kc(j3J74_4PO5P>^dFf!BV{S_JWs z<8UVA*{Dl#RUZXFnK+%fpE*#5>Rq~++oHOwUYIe*;9?nB~lvQngS?;X|<+Cao2y4u=55W{|D|5w3cCd_+ zgFNh;BVTIQ)F(7H_=D?l?o8g#%DXX@_gs;zZa{Vc(Y}J)#JkWOz{cTuZ|&2N`(r;rn@mUa*GDZ$PHzcT9B_cvg3aF4 za-0GWZs38NE4pya>Hn zNwDsgot~+gndApGqiPeCN~md&+iw_U>2P;9TTxAsY_LdWSCm#|O<`~zgix&pWf=Z} z@|;~3TSX~HC2$*M+<|YeNG?&w4LB7WNNFi|#6*y&3ofBAs0lqbM5&hq)>UXO8Z$Hk z)h;v1(%2l+JUn|T`HS~0mM8Acfzzv;MOut)wd2cX)Sllhl&WI;?C_I#yRECg8n%f9 z3e1oIPOMjQZH%x#QDV%{R2C7ycg0kds?aua8pexc*OV$g1RkB@`$O_W&ZtJb<5by! zUY>V!lV{}S9e@P$%zYlhzb{#yc8;E?6h>bnXMaql{#?SnGQOhin)?Bbt4a+5N_j6= zvP}6^5DsLORv zeACGe4WOY7=`t%V*D(YNjMTqKz{5!7hI$Yav=7KV*p?G(Z7|t~Ea#%VnfS2DpFw@; zJA8bQA{h6%_BA`fU`(?dUr7zzqnV@+XLd23GjNY1GL8YBuIAzqn82nMJbYSIKU2aX z=Mpu6b;G2f{Tdt57p_XtL87!emaOW=Z$@Vj9wn;j0mBc?I!Y8&DSrg*7TC&$52(#> zR;b#;h{^WW==L}Djtcrh7L_6dRp6tx?wN#5MB%m(sZ8C0BZ}jFb|0e&f8Wy@}ZncujeJAly=m_dVeIi)?sDVVSwc`BA zztw)>F^dp@;-_fXu8a#w5P&TSJl{0ucDx>=^|rm>(R-b;z=lAKNE8nJ_q~*V!_S0v zVby@HQS~!d?`*S5O@_Zw%SgFs#HWPa7M3~*Q0e?y`k+BH})l;kTgNqs1 ze_A-^OFBs(_Rsk{DDzm{pB;Ayz1TQf2D#(Di^;hbkL7)2+BLmgkejJVD?`9A{KpQY zc@gX@u)p;>IHt6h?2UPU#RMnR?3G7sbyBNmh5ka;!AMYkUJ6*NfFl?bUCBYNK zo0RqX}}5{u)Ehezr(UH9M#doqT=Do=kh9t?}9) zYYEAU!1v>J%8edN$rZZK5V>A4Rg!@xHnIt^tqjOK*?yoCYfN38!E?z=qFu?Q5XoDr z&IO{k>`@kr84Y)ZvZvt2wf_}SzXG&Kw~bc?krgjqJylUGuR}<96G0o7H|MJF?;=>IeJ~k|wA9(iW zM!AOB1Tt|->d+6QJjYbDW8K;*ZcyQzZN5g~P3F+YHe+I>U8#&vJOG=*4sPMiW9>2+ z3hkn5w^R?v*oEi&J^b1g z<#eI$IR|YhDTd7aR}C}pEYq#c$hg9FH(KFlKsM4W9td0b+ll&ZG|hDU`C&*Bj8fMa z%6fk<1OzrMkYJ;aYJ#J{bJ{__uG~O9ss884#`tIeE91NVEoSmQitK2ouyh@N1pSK@ zGE?Iz^G#JD3qr>tcxLfVSXAHA!nI=1&)^1X4!TREbta3?dqgXKebR&$WIq=Gm_Gzm z(NR)SXOZu`eXC_~G|c~yu(~k~k5Sfb^5|?euWB48{#Y~kp-)GC1_C)4%3$4*kl$?y z>=0b8j4%>n{u7D*ms?{zMIpKa_s>`sgpDGFi;0opq(K>qQ@~7?t;KJe-pO?&;+%7A z66?h=I>u&uzRfOBoINhb?%XV36|tlSZwYhba+f5>=8SO2^`tK-1?1AJgp%3C|w z%(|@&JXVYY64?*H86kG?T!L&WBYt5G+vF2g9hR9s!1Pr7@eW=79_PC5f_}*uQ}jC| zVzphU^~BPg4&~u&Nh-KSH1cvh>Rh;D>u5>s-?pHd_PDg8D02|umB$Y>6Es8&On9(d z-F3Nk;g1;jJ`70*-?x|6&yraA#89YjzJXuYUi=(>Zhbd)IFW%4ltxo<-Lr}cGGx-G ztNw@dyiSKZp=~ekYd`hPxL6po`w|!xO5*a#?53i1O^tIbR7wR5T`C5S?48AskfJAs z;v1cZ#m4-10COv%Z~zBuA?`UxL~do+H%J$yi}@K@bqY|EUCr5l2d1Kt6}yq*MB;s} z12B)RL%Kvtn(UYSPQh8h6^&~N>b@FOS`?A5{T8UA@#qCWe+n#`zepwBQv9azeH2;> zWH}hY@m)=VSjEf@`uTmjRCo+gm`F$fBPeWXN}enG&UNfq{VN+VUmh)=LV0TOlGo%G+fZ z^6D^)O{1b&K5uZ=+06+(h%ybO`J{rWZ#~fGXN$B^$)ih3^*PsTc=K^DxMLwXu z2V1?5zY@~{vs%%ZVoqM_8AuxL;E#=yceWM*psc&bKn?Uc z2`yzoq=X=NyMSfC+7C37n*f=mrDidgPKq7;W7?!K6WC-vv7&M5jadr}W7qgJ-fpLx}ImDPBw^N5OynL%4``b{p3oqKAjQFl~MLg3Zr;>;qvauuzUr1fCk{x8gQ6Q_3Vx8qOn~)_++;;Pg z(NV>JV~Vn~GjjEk@|fLuz(0G;X2U`674Sx#_G-W7kjom}b#R{6ZQ9;Q^{7IC#x>`N zgK5<;j^bLpQzSW4(~^9uzMo+++Wv;;Mq1%Xf8FI@P0nRwP6d1uiB55#5abK+PjICQ zXcq265JkAfr7`@|zp>>$vAaLam5qH;oLZ=%G0%j4M=-3LEvO3o6?8}o!i~4BIu04! zN8Kznj&-rt725Eo!>h%{3RKNAL=>U74$}9o3cyWHm?z zlJ&)9erv`)KW|LY$qyBLnj2D;OtXD>ot>=#*yZj}{6!By<4A@3@M^JSEA$zay}Cp7 zWt6)L(M0WUC3)jFqC$)>DP?VT-4^hpLHtVO_`a2HFqF!F_-!9|BFy&j6RvhI0T~n8 z)^@~AdO+`nz0jZw(={GymBW2Q)Iz{o%2n)h5k3E29|Pp+EEfIU=MnOd?G8~SRr?rAgG zjw2-j+-^B+nArY;-lNRH7jvf%^74Xj*6;T=|46t{51G!y*r&ZrV@c1iTLr3c*Oo{m zS8|0hW0<8LHE1x%i5*Si``*_9LGK$!;EO6xlEg0<@HkO7Qx^rpgN*47yT8Eu8!NMS zPX=+OZ0Tw3W*62M^@A!8c;>MP4}UU6$lEsX4IC7t@Z`Z;aw~v)!>n3Dau4N=%FYQD zMSX6N>6cr6YPr@RDnwfW9GnhvpOvrgs9^jvFJJ*8?_PAa_!ccoij?yeg>X<56caMv zzhVk_Nv(hGn|@Y+vEOVAjoXz`d)1(LHy=yw$e|!Dv-2CH?5%P0f-1#5#gi}bwJj{3 zvf?2xUEBVg#$wC>E$iwt8yrY3DAvGo2m_eYrxs^n`R%8JGZeXIMH`zzj{Hx{HUkkT z8WO;k5*lnpwM0zUQdnUTAgOYJOFT#;C;RSb``?IBBrhlb$qc_38q7fztZ>+;>nMe2 zn6#>hYAKIX=0oPz%MO+>m`bi!O!R@LX^&k7aBX4f_K@^l}9>?hA!`J;b! zmPzDoYXnt^)1B(uv?*oYY)t)>;?Sfu2ncLJ5Bcf3$jRvTN!BV5^B0ukg<&XleUNw( z(lN(*3TFqqr9-^LKCE22VtD>n260pL7x|^R+%UuQ0ua$>*LEpB+VrgCLQ_J|SHsxn zb0wy8CO%m{&o$dv3Gr-S7N@Ij3f4|0r@Lv2!wkxq5>*^%w~kLj%P7r5 zQy)n&gvvusE=ouGe^PdZR^FPs(<@=!Fnvq1NGwD{<_)B7jCd>-zvmx-x$*Jadx zpDZ<*Me%Y@>-2X2ko-dsefgJlyz9AGmXDUnF!h)0(iQ0t_J42d51FJuk-f@JvHyga z_WJSbu8GUG2Pqy0==g#2ki!9b<=sqOqg-?&06uX0wZ7&rs zULW4^sDfzEgD5|KEMNO-wNmeOFxw^0tEaSd?k8hfE->u`h^M}Wl14x@oc~2`-d8xD zjdfUfj(*9qGXED$Ot-VTG`iXqXggzz{~_e^cH4)!{sLx-tQ=y|xAR$0DmpIo(J0 ze6@;Iwy<8-O0F4XexM63kVAjohKq-Q;+~IbK|iEIt7z<+vB7F_Tf|t%OMnUq?+nnujrhIguH6?dK1mJ3{_fljJ_nt5fJ0 zJGR`6Q0TTUu%~(H+f)-2J-Ka$+cf12-Hf&df6oY15xXKoYdU($-tS6XY*wZF!i&_h z8-uCoG9TvjRY>#S;t&_e_GbWPO_c#tc+gQl18}XdkbCk)G-zK)`sPjY2=VSd%zWH7FdAFo>FP5D( zaTE8L!uOs2W{`Bp`)2*a_!V;M)5&&hf{}lQ!0?p~;P+<1&a{CKr--_C1rgtI60|*?~ z1iMPn@(2%Ah{<=;sk6?@HfoV(+;y{moIu--KI^GQk)YmzSR(ClO?!OFb)}o-Gr{Mt zBuqDJ&J^~Y=ku`TgM*}zWJjWD!=wrFA$%6`;$jOrweR-j^spr?`lgB?%T}Y z;2~Mo=J_hdU+3?Q(N}wFDWf=@YqYmaOKOmioEYp^r;{Rr670$;Si+{yoex^UyS`uN z`O5w-K&yB}WEjnp{upkBbh&@BzR%+%xo6Wy2*VhxJUg|rEa>=v?;$I@P@Y~kvC2ni zykZ?SgU%0O(XMOL;{=!O>c4}Kg_)!BC%tYE5>I^D3T{_ZqHe)a-L}k2)Kt%RXMvyOiZre!OH4Wu?DUWoNSMtO#8D4V_lDV#8(# z7=@DvVDx$XMmU3D5Ftw_r$J%`ZU1wTa!dx$dsXhRXj>;BrDz(T;i=cic!4RN%T~;Q zCI0)hB&nWZFn_qXOE|Deoi$!MQB2Rf-W7x+><*X`y#H-YIC}Rn=VRJz`)<5H_v0Tg z&}oVtL1?h_){mF~uLf8N5uNz&$g!4s;!bdBFXw`Gn#<2~IB@-oRWeJtaIU$Ic7n4} zIZl^yLEok`EEeh|%fvWLjn>fo`Hgx;QE9P>PQp7S-TPF2xR6uB*9SgUkRd!ptKTV<~Z!6Mwmvhe2 z$}|QlO~e5zB9I2pN{09GaoJ@6Z6tzr$$ThyFlW|yY;9|!Z?Aje0XLb-S0}sF6cX~v z8EZ0qv#rFjwx3M5zDTvPWqZx6xtyZB4xbJuFdbQT3 zC|Mlp%G(*{LZS>%#=I#SDQFs>%y*%KCGH~CuM$1b4g}&HuKNNrRKI*35)6Vidq2wU zTD9JxYUP;@DT}a5&GMY6mzL7LzpALVpn}Hpzk0K^(X_8+pF)>xS~sNmfr8KB8e~+t zhUgZI*bM5Y9lWqbb`8QX<{WLpc=#@I8#Ro|I<5;l=Kgs|U> z;!9++-6O#TRpkPPX2T@R&?iniRlX0J>*c|n5ZL7@---uhvW_ z(6@rxh^QBf!f-Wu5hKXOv|fPv8Dz0}sl&;$L1f>uEJ}~(pE*8pZw7;xXuuRIzZ{4P zD+G8vlVYDik|14ai1KhoYLcolm+y)MQ^cFR7SJmO zV!qVW3L+I2Vmd70?u(eg%u+15s+ zsR>V+HV@Fg@xUJXXZEa`Uwy8PIPglu1c-UJpqZ6T&+btb^%jp4e_p88#sx&+K#f|N zIFu6xut9)kxzCPO{tUQAl0Rd>-ozyNTluzl{>9OUz#{gW(Ww9)=LSF`7yKfw(i-?ymgX=XS6duTi=B$h9p(5dU_m)tglwbTBUP?h5a8`Fr%ESE%J?Il14 zjrf@F5|r^X4?$5-jR~SL!A$Tr%G;Nh&qxA^WW>0CR251XZ^;5Z;FhsM7uzS4V1^m= zgzuvBF!f}Xez3!KbzRoR0>S>jrLHC{RIL1`ng9a}X(pV*OEg)LskxbmL}a&-&J7A-6i~yO`e|%$EVG889%WIH;K^ zu}aA(A;3__USP13j4bd_@>9eVPSW^sAs;zA^1%cjU$pJCCdK>N+^}uCM#wnXJO5ZZ zd{2UcL0GX#Q4n2fyOg>{cRe>o)+nnR!>S@XB7OCwklPE(ZA{Aa26@uoJCiFkLjI<^ zLnHS9^)ME{T#{3%rw7CKP0m`adUofB6~6T>>X4h?zhQPUb+BKI8!UJERT-wf#e{tFL z>c4C{ui4;+)(0)zxW~N2wFV2m%QGLLm$Fro1PyNi&lU}@ATX7LzJd|yKs2{&^$q-v z6xsyFaR8E->&me0fr!Qg73LD!o*;XX|H;dmv}WjyKnnovsIB#W*NE}0;WL)Nl=0MX4+1KUHkUCk?_+emRC z=d~+?oRp(3aEUAAPt+(ArPHdd-6N;#vtc0egEmv*E3rs8Zkh=J&s0Br$HzOt<_WtY z*&?z^a~ud>o1;NWL#;mCiuKBhNs6939}c}<5)fXAW0gY z;i`2Jp>~R>l(IYn5HkuTB@ESBO2L5*Z|a3s=sX~Z|Bnd!ZSH&kf$5A465p|#@cS^s z&E^PL_BTFX7(Z7N`TfWi)^%xfJ=)i@?#dG8g+JVPBSj@=YBg*)W7o0l_`wZ103myj zp^dnCKBNt$H;PrnK?ig&F&tcg619^0ybEt@8?AdR`yUcbr;N2_Ln~y5W_C&91s-j; z+evL`hXKD{&h1bmN>l@F!0l_U$}|^r3}XKtRkZ5RAy_YR z+{d@Jae2&W2uNZaHLPan;l-v&e}ftK3WFj1&*){D?!H0$bX#NuB6j(-{`623qGOK6)HWalpU^K@;L8rr zGQBa6(+Gc-jrOLS!6VJgM!9B|og4|YVJb`%#nXz@9ku*E#ND^P_+ORn8O88wd~GS! z5)zbugc^WLW5zf9&?PDQPF9CP zj$_$PB(9qtP7fdWR?P@4+h2TU`V=Nre5+3qsnB+KKwCDFDOC4(Czo;5NzGekKXxGr zn~KpNSAj!I)d~!dT`>js`W_Qrl~vQ7a=b$1_-~beUW}c@KWm0E-6A;bvly2XCTi`! zX)ENu=eANw@5+iVMPMT8zMUtGzmcM2Gl&zwa#DK#w*9B3+0Tjh60R0`)^WF?m$Pbr z**6x}l_(>f0x~l1am_A@TMkXSnV{a7WNvU+{h0n5f{c#OE|F)g-pGD*wGA@{dJP(R zHer28fsCzCfe&yWLKu12gAvnj*{AJ3$;SeaLZGD=G zWf$`14S`+z=Y*w#yOw$gq`;!!s+D?(8Ismsr?1(_4ZF~9LM;-?%u5mm(FrTab`D<; zYPtw$US0US)xDihC?Rupj?zB?YOz_@8-h3@1gukYjTVgnqr_pn_u?mLW;?kjCLzZM z|83eKlw9@-??FuICD2Tmw08L?98)7lq%VUb43h=Zcb&U2&elJf6jvH?{X0+BmNlj1 zQZX?8v9|B=X#QK?^yQPlU3HJDs@4pTGhh&UUy9-W5C=<#OV5gZfo(cQ)OH?_&WA^X z*MGvdi*R?SG>mmRqHvDa93YYDhOgr)A3uMf3rz;sHZ||y6JEu-BNr)$o#@(9;GmxL z-%g^kNP}Jw6Ie<_8`7@BrSF{ixP?Ym# zn!6N}GCva@)P>!)kq3R=5D&0JOM|C-UWKP9r8G(*we`AKx7x=klxxe{1EgmE`?d-MqwPi<)l${a;Mk2h#p$P8?$-dOW1Q zl3D&=q74(uXsQ>1l#*swc3e{-kw$~ys&H^{)Sp)%rNN_k2I7MUA+QIJD=GTdWu@1# z@m3f!Hql;@lHU1tC+fE$80|vv_kj?gx<0X<(xl*OzHC^$oKd4dgIrYN(zlp6*`-G6 z7&@$hEaiVKFtkqtMt-G{PvlnvCFsUJJ9qV~JWDdTa}6W@YDM%@GorW%x0+T7Gjv*FpNw1vcx{ z#B(Ldu_gB~z|`&>q=lTGKOQDN>&2Yd?Anu|!DFl;bdxKt1;8S(^tVgJ@34p?2G9J3 zLBt9W6=HIaw02~^K=I#73yKQ@Y(7r{9w*jYTix8u%^uG#ZnHMqTlt*!c+Jf=Jd)rS zzrc!F-VH^Eu_JP8y@%mt!J&hB?E-)Ba9)&Lb7i6BZ4NQMA&*H_t2^Ws<<$onGbLnIF24En>1v2UpJ!gYqNsh8Lz{ZgmoG+@GoA?~De|+~)TyYy^ zQ|`+IVgnkFx4wD{8V285tl^&~q=&qJ!BAcg%PguUH-WA2$26hH;d2pff1P^wyMs75 zQ7P)bqT;U5Kc<8-AX?Jd_Ow&q%Tr2-VGv&wK^V5XMcH08v%J4gc*<#iSb`2L>RjLNOy;_!>rAzaY&IxA=_ zI56gVV35aSb6}0#+*D6XOveZZwiInhC&f54#`m@Ix7i8MXBBANex_7FLoq>)R{J}T z87k4RpIp3IS0`KN-o#ixrP;07ZBr4}Lis&H+1B?{hJ3n3LqV*= zaLPA9kt`8-IS_i4-S`0?ssT@m$gqT72^$r66t~Y6mXA{k<%Gn77!TP6_*w&<-RKXt zxx9a52^$}58*-XNRtj<;p-XXFlS%e2UQn-;YJ{aVdNNCsL0=w?Va}*5a}p<6{;9F_ zA70yUA{m|wno0|l3+&R{jPh{OT8SK1ig~Mm`qFY7XsQy)0Q?^mNI}5~xYZ52Pa2Nt z{eq0{hBEWw;L{#g4_@)L7&^hdE9H8$%rVYLu{vl+ZSj?bQ6B`H(??;4Tw9#in!HxO z2%U^-Cs^1zr9MS)$1ik#SEn?0%Y72j2yH#E_`PtK0Lp7Yb6|5|XR|U=CUFMw*hxeY z-)G7IbaItviz|!AOl+IuX(p FicZNdOu{6!~x|$4e8^R-WH)b|ew`rf%5jbt;cg zX`jQ33{D7~evwbBrv2&VcG7&^3N6Fehw3JB)z2dsDab=1b<^W)iHzsy582TyEdc@d zVIJvcv(mM?-Gb*md~GQ}+CFsieZ)K*Yr}CtaJ%*2O@QXoxnV8RQkXwPJ@EPo9wehT zX;gr|KY6w`3Qhie_>IL&dUsg2R#Ms=le#nhw6$t{FQB)Unz5h)M2qHv{oo%Q`xC$j zIV|ibf^`SJJ?-uYcznD8o^~TKnbB={?{&$Yhg&^!#8G7_Lg0m+e<0@WMCWggvqO9r=}R%A}j`+uR-3u$X! zt3pcuk(ScEdwa(0MKgE^Abx>FM*#h&qKX-X2IcOhyte%>mi6-#R~{Zs&j%M{*jvU; zh)Z4t=!*tM-7cz6INc5WwglCs-zo4|!F#lktdk9?EpD6Vf(K2gOjbn3GX$`dBa*qF zl3yLGsl(USUd}Lq8Gwf+#}-d&*OAUQTj+5aMwss1SU4!wfVSMhMZ)=OLUQMJ1R9hg znUw&uHS!C|>p3i+fG;u`k|e`TN~()rp(;~H5W0i@q%ijC6bDZrOrS%ICY!hexJ3$X z`n%Rst^T|Pyk)5FGbCk)BO*v*D@*r%o83#A{PNj;RtrIzmixdYGruD@ay%_AnGBs_ zfyT|9L_rgio_wcJ(y(<+++!>07O}EMsHorA*RSUwp>vxrPPy&Pm1F-Wb#vzx-?ER4 zrW9zozb2G&t5>W#AKnp|0rQ8oMruzAqqW267e?UZJ8P**+iB zUsRDlrb$3h^95pIXvthpj2nt5(~$nE(5azOAJHB6Ga-P1SMVSq3>}cpq|p~G1-vd| zOn5`QyqZ^wtNu$6V9; zw<&YzJ`VCB_9A8ze1yF~Q2o*kn%MhqG_u051~2vKEQm-#OG3*wD`Pu;q2BnC5Zc41 z?}bZuS5Z~~)h6M9^|!(a_4`Av%t)G%`nWrjmy_fmnALiR97i@M;hefN5a_YZriQw+ zHu?42GU$fyrOKuY(f@ouCi<~3^+6`5Uz6^ky67&WfET5;=e#ktf-z4sED~uE?$kfL z`>9O12MC|?th~sGIYdAkB(K-EDp&jvE}w+!*cp(jz;dCy*-aE~#0ec326RfuiMkf6 zjJ6_`ht|=AvDfVjVs=?I<}=&8r(yHEs1kiI$H*!}8!%Go_k`>K_cya$?Yd9FH=E0D zhE0!_-1U3xi@oW@Nqh?1z5_mC;OJz$UI zu8`_uT4NnAJ|=}J5;MlR0N~q0t99IMQ5)X_0`s(g-OpFNM7;fby)9z#V{~bEBC%8% zLi#Up+e*6Ac0qG|NDSm|E6*j%RLpo+&x@L~&tS{B@!74h4B2 z`Y{pG?8Mc}^Wiwx`rKLj;~0tn$H>5(==#q_P%>Cwr@_b$zj19wlp6n%=Ig#JtErA< z{7FT717Fm&;aWf;m_iQL{Ba$3J$A?Y-xvv$oA{F~P=z2Y2S`*dK+{Y+nQtp zJ2YfB=;(p2dE)c0I5>3#k7wpf&z2mQ<~6Umkwi3`w#)nxLHH!&!?5hu`%C&bP_i!N zUMFG+?luvZ*i~aOg|3PArtnbOJQEwCHKM-Tz0mnM5TeA^YD%|NJ1d%5m7096^mS?1 zr;06jU(}DiDWz#lc~Vd{F=cX--Qtq^CN)F55ZOe{7S;2Z&b0b*C0d)J&!S6tsnKSA z-eP@hTt_Pk?S(UoaS>9*9n=Y#oW4$cHT(JBY6>n8PSmCoK7*hh_L~ro5O^Y~B+MNz z>4=V}0OXZcLhuuA(?JyN?sTzTcg|ANBV*9JM_I7gnH&p=o~@>UxsmDO0Izgn%4M4< zsao}0eD)4Nbl5QYCFX=rt=0QM6H7MTHh9$U7lKX(uHpH2p<9R7$-Z4ct?y_gXW$A2NS^4Ru z1-RPv) zmXa@KH^)EJ9S52_@Lbtza63^j4u(#4ha9sJ%2+b)sO;GHB2A~wDJ0CPi-Mucexed2jI{}XCfj=s{G&Xcs3%*tr5eu=DT^m})tGQItA55n%S zELpeJ=MqevY`o~84*EK0lkwdsTs}n8=>9-dS6wfU6%~wf@?ri!?8u}msQDEI8b`lT z`ixNJ&Th=Y9+JOCTb-Da=DWW|U`wlhJ@jTjD%#d8a{3P1V3E8=Fj`go9mGWNiEE1m z=-EAjcOp(8-}X)Y7M>;^@JN!Dw4ABso*j}MQtHTb_t6Yo(z3I$gRu*;5RiFjQgVb> z2YpxF`T}-1-EEr~O@W4yB86=N>G}&qv?Jsw1J@=Jb{WsPjfO5Xwp$2?-go_TIJceR zZLl`|%sc2RtW}xEHn`KHib{o3nc4Da$S$>vl2sTN-Tg>~Qlu2iAJ8dJrNau{xn{qB zmesEcJA^tbk>%xN?%>pfzq0+S!#(8iRm!pdGP^UkpM{`>=A3^0M8Qa~u0RO*f*Pe<+9!vR zeLbEL3S!%FJueqG{|iT5MCe9A^-WylXPy-W3X$YNeA)3uYwL#a^#gU5RA9I{bNr7g z4Z5E^G4@YhJ*{zD)Y3flS?)&We)pUUQWxVFe5eE@70f?Uba8^uD7;C@8xZr80gTci z)LDPt-(=y|S>Onir8l7ABQY1nzju`d1mHG!ue&JmrQ7Q@(Y9_f4Y@PPvSvo(BNWgD zuMyv(ZQfeMHV1rJ^*AJhkPwFU{JL&9Wqgz(bIB-b<`_zUO7KV8mvf}fk(t05ET$Pa zUAIgGzCTY+6dMf}qEj4Bce<9%3RI_J2_0+jE!es2Q$sclw`cr(-AR+W=q}<0;IFl| zI?w${T1SSSOKQ$AHs(&$m_2DP{kY=~OTA~oxNODBNm#-LOMqI)WKcu-R&ZK*c^50A zuYJ0pxY&B}xS8Jm(Xk9i3f}g*nG!8WvSE)!j`il>cKRct-%pfg@sGi|jhRb;YaY3a zclAZq-M8@iRpNI+l}P`gTU&Iav`%pn4fi4bNs$%mmD5*+Z^g?sqiN$pzI*MzuDZ=A zB_*mY9oBUn`<=|yJ1=)*DRE8t4Qv0JXN8MoGgXVJaZ21zska@bXjgdgXJ=_e3+#A) zW7+qyn*zRFiS7XfU2U-BOMHE6#c`t_UEipX(fQG)Q?=r=+j3ltWCA$-p%-RCwanlcUAZyFZbI;R5nQ~|qdWM)!TJ z@m{NpLvKc&<**ss2L$G07Gz}UY&B3^y7+rOvsexVIUYO5;U??Px~ptM+WDhd>p;Us z%i(plqr~K39{=YMwoA(&%l#z^q(XXz0VSWy6yRD7^xu_n&pVT&$Tuva=RB0=dI-;1oAx*{5Hd#*2Yo#@E2CynKTM|Mi_@7?Uxt@3r|HtSs_ z4iTWp$)JP$(9k}Am4pmIWv-;>a?~QEKU4q90+437?N#GM`oEW1NJ6)nG0c{9UfU$+ z)jZWeet1|`82-;4M83n%g?xuRi01?%@`WrMEvb`up5@LWY#3UNOPL{m!~b`i6?CXd z-U`yQP|(Jv6hl7w zV*)67K*RHYudKuTyup?vMW0jmKQAPOUCxx?(%vcwQnH5sSF%6E=oXg$)h<7UL)pNF zVm6K{$#QD8`G3o#h2)=X#$c+V`K$Ur=^x}_11psOw^n~L(XO(J_(kGF&AW_bC-tGFsx1 zLY|-RSl&Wc69?)6#kh!5{l9kz4`rV^63@k!bjYoe9?ULN$`a2+mE>I(y8~)C`8?h9 zzZD0C`!&i@Cya2E*gK#>O3rCQT@1Rqox!t?g*4}XHS>2B`O{b#D6R}iMxak0*7DPdkyTapa<2Ct4TYmW#ZT#7hCN{~3Ej z(hFa{HJ}jj0Cen$2@yNI9@pJmA-;63HklA#V;hKH461J|A@{`leWmX*#IoMTd4ks* zvO48EuX})?YF8ny>18v#nVuKTw!zqHM;5(}tho}=HE^U}QzYJZN__jjRlH91O5XXe zy1qO>iW%zS5c?me&~_(WHZS#h=hJqm zZ4Wf}x5vTg$k2I+$-60}B4%(EHbQ4ry*BcR`lAA{n+OaGxS;ro_fU{ z??2mh;=z}PeTZRqeU4~oh_TD9H5&sv#5AtEYPVhw5Xvra{<^aW3^!rU za9y!%STc%q0sKxX3#CrcnaT3H8NPZIMHQGEh#_8e)F$#NB#P z4{6X7f@W7+WDZpV*l4&Mf*d(iXT=1H@DJOvp0F|lMNtp}_ET6}O6LYBV|XYW5?&qJ z*Iid_#ZjGEt^AEK0wG=v{QL_*6ym`*NKgCP$UFXhkgMaY&9dN% z=kfFOev`~%pfOSJ?Runq%Iz*`%rdf`$9~e2dC6qimGpc#4)3zIbiNJ4t@r4VNW^Ji z9-D+^NwG|5RvP3MXG_MhUs2yi_1`f{+TuYtD)y5QI{bEZo8^lfL{aoKd)RR>-{_it z+vadqtoQgCM<$a8rPvfGg4n$A>vY*9QQj=(7%-^gO7sAMm@lz*z8VR=qUCQyyJbMy za2=Gfe}LuktRzW{oqcgmyJ%9Xmr_)Tt7Va+h!|amrs_-1yFoXXcLwR*+Z8>$P7u z4Npla>2Id%3Oy(=WkuiWWj9vH4p6QzV(GT&?bZlakO(GV5P@H1x2f=f7&TlyYTLJ) zDx6pp{j!5_-W{~Jg+MQ~84e=u8xYPp*2O@B^c=STuL+OEWbMb4#Jf;4^0IED0`j7K zRGuxrzn{w5A6B+qD@ya42Y$}Vn?q&i<4_^*O)^Hu)fHt z6T;^lmu?hMwYm|j(6ymBB0IaAbK)E3xrROv;;a zE4aBCW#!J$`U83J#y~77sUXp4JIs%7$9WC?INBf>9@P2Hw{!dLaU4Dlzz_&UfyGTZ2$wEES@OSN=nqXEp4`@U1|)Dxsi-2RzO3QEn^>F>yxA zR0G7JA+FiPmr-o~&q zs#ig-^LgBUr_xmD5;8ZjD2SS(DxB?!qwpPzL;N&jcn=^hc%HfP8G$I@j>sTpW~jD- zU%B8jpWIV-zrEgOe-N?70djSE0jxTuZJ&0|;BxV2AwvgWZ+C7OiL@aPV(B6~s-LnX zGa>kLM&29!|FQL!0a3nBxUd2WNH<7HEGQw}r8G;!0@B?eC0)|exrB7HNC-=ZbfAui@RMOVhxJq!mtP5OVml(y9Ra6>ZN%6-kbBUnb8KK*wE-Y)MY*Amk z+G$O=>x?#I(z~OX(ICQCBSq=&pNpn8kmVx<&i8!$IZqvePf99DH7pOfmX}A7H>C1B z2n9Pn=~U6U$!Qr&eE9xmOnTT;C@{vf8pl%(=#MQB?h6@|b%5c(0? z$2-bM?Q`_@rDx0ejIM-CTL_5(|Cw{|02%}Lw^&E~+4JR&APEMd-t^ss-R9|V5{Id= zaiII}FAlx0kHhBVzL?6a%Gd4&{H8b-p)Bhl8FP-$M)Meg!Ap@g5O7|vs0EEF6&ERc69cCkcagwJBj&-sQ|%HX3m-I|f;i!7_#py|D; zW4~K6>Y;Q#UcEU_+SYFEPpIw+Y~LsAdF{+Ijh-rd1LZ9frNEjhoxl~kU@SFRG*^C13(h)Q9}(hS zJu@N9jqLlg;<#{W6ZD>0hFMUsjfmX7v;9fJP;ICj-h>V0fqg}mh`CT_26@zo-;U;( z@G9)%tb+N%@|K!^ih*Om6IdFU8UziYlipeGObSkqzfcs|NHP`%)01lB0V8m0J#@Ed z11CqOpq832Fx+Dcs;o8!GW=}P{6@6&1usM-)zFsy!4 z>~;mGR|2y+8{z%rN~plr*7kpx8NYO&GY*SkXQ`qOHdUFYqwzeS))K1L?%vvgTcVs~ zoM!1dMAOAd8mEvFD3P)Um$X;Gqd>mL_v?(9^w=B&r}N61l!4h2b2M7uk_@)S^#m{u zP9=2>8hcfVYE^!jq=-CnAGN>F%aBMzaN;cJ#*~rndg^sBi6~nmhp}LLYvjlEl8$)c zr#EWq_(n^F9HJzr#&|^L2Vv7-A>Ois9f^RZk1sHi@u*9si#8s{K*sSJtTx(H)nL z;;ybL%e9r^u6^LPFzRz>$&!nxAxvT!;uRpjNE(N=7R~qTC4bYc@}Kr+{nu}92E?i_ z_+em=6JS6X1^?oJqnJHER9mL7W}iNx1MoFT^c*{$1bCx8RVpMb1IBg*n38s_Wsuk| z7vu)O8K3o=tq8wHW(>i|h}K=EAQRl#IhG%V(e1=@^o9u>;%(uhP9bOGE6SvQ=`UF$ z&2NYKcDeTH&~Zh0r-q&19{N9E=%s#%DWXs(0_Lun-U&JEw+mLw!>PG8Rf9_)vKFKDV33{(5hMC0ai+}v{8U;` z#_U33=-OHVc+|E>&tyX5SA?li zo}*{0`PwD#xBBk=wu)3+S2X?}qGwB8HI&g!r0R}Rt>WJ|cTN1I`o>D$f#zKIRpmrh z)F_NdLm3uq7jU=el!R+(y30wJGWG^O&G5cuYq&-CI>>VI#ccu^S+Zhf zrQ#Ce9;0-fcapk6r;1GhFym$Pqj{Y{bGz^{NsVY2vXW!)w-s|7C?C~sY9V1FfA7|g zIsj!X;2tFa^H{@ecr<}JM{XCO6XX5skj&IUwSGt&Wp8{O2$!nJ>zzhy?kFyjHr>b2 zk4G}M#;C6nEF*q-RhaWYF9IMrP>_#-KR+R-diBbmIs zOD8xl;lca+OKCH5VcVTEY;`xbBimnmA=>QFD*8qZdnQ{MEz4(*|AWn}#1I|{9Kdk) zx}%hD6z#9@EC(R8c*Oq?SKE&U4h6L1003$FsoI=SHUBLkty2Ae*b&!@Wsv$+{#`1% zIy#*0-2K=bO+77nl`z$WX_6Lp8iIi404#@F3<x` zOr*z(`p=F|gn1l~t+Tzrvzl3?vpRnUnPg+ol*GA-5O|Z83)d@wWa!= zgwI=Mr->W~&jVrFG{+E&1u&SjZc5EUo(1o2M8th0p-f=2pClm*wSsPtwmlNXe?)T0 z&lbSVEP&~9bt|DGQ!WnIJ2yg2d~aZMFubGDB76-d5OX>h))rYA&jK)HOWGpc7qiO^ z{mA5QPWJHBx|GNpQW51|R)J>L1E|0f zRuI(CC%&zKGYfz_`I=8h`#X@*`62Vr2|X9738ez4i09bXhqK9%qKxW^1=kGRe(HeY z?Bpa)|D@?a+!cdMdF;s8)n)jtTw$ z{46;TyZ;VAr+K6qvg}ggIjU$m*9?9DIg_sx)kX_Y`b@Y-F|c9qU9{qcWJOLEk*mxb z1#@==reA>~Ac(`c`;^TYb?@OvwRMnubt&xG{ph3Nj!ATcoW+3xFvi@?7-pAlDpP2qS#2c3l(uDLyE9o1z+o` z4NmOR$`@7dx3nGKhW3LeWd+wT;r4am;#j#1Wl7f;(ZZd^5$H4@y7BbK=mW1pNItjT zdle8bdF}otkf8y-O@h?Y-!E5eVD*KPCJr6x7YCrf(cfihcA}}n9K{P~WdS)~oQKfD zB+d2gce*DgRTrtQ9k&ZDf>32Tz8fPkgE+TwA5Lyh^>1JInGxq-ZAPl8emhGbTMADM zw_gOta0=-x2EVC816sGvyDtr#3h6>L(sBEV5b}=5FC@KSkTW0KW%zo=G*b6rDgY*_ z5fvn#AAXrwQcR-LZ*LKx|_x z$27!7mXQj?h;`-}gI=hjY+}&wzdD6i@@0T;+4prbIZ|DxKC=7zwjeJ_r-fJ8RnrN@ zOj(toQ4 zS1SPc?@*;Cu(vgJx60U^L$j~}f|wfIn?9SH)GU{i2 zCn-XfZwuESd~Rqz?s!MM$~&L@_R?xyJZBr%N*j^K+L$#-P8h6L z{ot5R^O1m*x0x1D;h&!cB%3p#E7BKLpXm$_5_2+#TW)80)fIpDZ2J;Iraz`pG<4@K(58qcXDxm|4k!mqo^K+ zzKgW$2dkxNh$td6mpb&j1CM^_w|}{3XQHdwF(r;uK@zne=?1IVVaumR$v|zz@e7vg zJi%M3M`3I&Z5vnnhZHbN@f?)D?hAX6EEBw>80EcUn*g&+W?5hfA6Cw{^}7nHMzBb9su;5xg5MqT%)=V`cBst7j* zuA;kk#|rGcbOdbwGdTfFed3prwXo_vv5wAWV0vBd@rSlhl2!CW$$X;n=XiYQwdfHl zUvA&K(p&*NmGRTf48C(6G=3DFWLQ!`_3ibjnp#>&m-5vwuNso`dN#xfg8rUh>88ht zP@mdhxcto$u!h2B_}F<8TryX$%}XqX(7*BHM9chp**8yhXG^EWW=7Ghu?%uCT0wm z{`+q690zkb;Sgc5GxK4B_{@-o?U*UTxXKrmbcwimc zzGaSjv^(mxg^T6kp!PA6_9%dla;Cc|!CU|EdT(W#SX$|F0d^?eQJrh- z$8f&NQplUdzweB~b+B@~jKZ$EwAjZ%WMOe{=a4=rx}PA+uI=+7d>G#|!v8FO9#$-! zpc(ijpDIONnMd~8w=6L0Mnr5U6ZiMyzqULuc}TVRcGwbj|DHL0P*cU9G3BXe=K!9; zq43IpDCCqf_SpAN^xi=z7T3+0I#-65<6gt&^9|Cg^$QdmallGBXGFtAD+*{JOK}vg;QYBoFUw8{{%woB z_%j~$UxyRwhCa&qr@65mlcZNRiQVIiZ7R; zy7Euo>^i_F8z%&udss?I8Q_!wzK+P4ji!RRygbvNg`vF>KB0~bXK+1#CgI(bu?6EvaQ>3Fp)D4cKGzZ4q{LyDk~UPNFfUm0JPI+&ac z>HLIS_w*Hr+XixWwHj%08;`ze$+)j?&ja_p2*0T9^e^x=^d<7t6gzrzD*C&sS+abL z`t*ql5W20k^iiq4qwORJ--&&swo(5pLV+0Q*3;CR%GQaFmq^G!U_medVsiSQ8P(cx z*n0(F?x%)A+8j_;R+ye?v{GpGTv8OZLue)_VMvpfYBj~chS%dOS;u!n|JEy+`pebt zsi8Qr1*uwk#t2QU``-HEsB1**n+5sfIhXw-HR86KT>OXSpqDtp)@St)=hhwu=Kxc^x)1~nLQP3YX!kjG3_iBuDP>Uh;M&;i z)dT3NBciwCn7#Nn^7JF@nIwjGybRTS?`yZu-V3{o={L!@lyw-FbezpuD9VZ7Lj^jdxg4I0U~pRubZs41KJ@}PLiIz@U!{*Lm0g>kW^0x z`Z7M4zS^TH!dSE?38C+NYZea5yG-$j<|<~-%)7;1HZP-avZpB&zQ1&qvCankkSv6x z0_l)D&9KzLXH^F7Kl|OP=T`WGm(RWrLYS2KnT9gmH~}g_|5{TT$2*HOSsS@JDhk<( zCMM_Z>wSljgH}GZN#4^-pFN>jr8d7p@6fZG43}ZY7y=+}ALmB%=z&^UBX4RxCO({{V%k%zOIz`JXaY6n~vAP^)S zJh4?$F!%HkyhuQDTn!iUbGw=VQ!VK~)m)d!V7wh*%wL{z|4B68>h!YIO!$j#h>;Ci z-u<|Q1;C$>?#<1Q;Y{~+gNxnLEX**O+OPA2J>BRETb2W*FBI$n29F9Ik^u))TaL+T zDy@@9)f%N6Akq{KQ8_E-Qz_yrOkoS9;uTkCqJ@J@dnNcT=FD$po#XfLma3=G(sf8P zltnpib`3}FZw%o*#F6yY>(8l4q{^&njc3h;Pe&Jj)&!F@NMMHxx9v`#^U$IMze)ak zjv#>YBBLa@krJLdcyjq+zIjwopW2UIRrqot{FgUd1UZ_79BHb_hn0|?y1HK8y$5#p z7cKa1$y~!m7tKp)JuxE7bfwsjGh7qc82Pt8?q=UZ^hE@90Q|%O_b383tfmHlOt;So z)U~OyUwbFIL1oo0^+%aoh6Y%Vg4$P5@_?356PTbV2Dh+6(oy1%G!#ftV>-5JKR>9i z)5M^os4ZM=a_KsV!2KVZne(TZ)<#V!LKMvRGJ@QIcry5nIxQeuOA6Ftgbr1WlqXHUOo3}Jhl)n?mQECbqDN)HrI>y8* z;K^q$YPYGIod;8~>p4@4k~(w>gF)-?&+)7At5rYI6pR=Szb)?V*N@cwYc^mjleB<`ED+up_6{tL;4pdOn)dp(2*@RsSfZe@ zBG)D+aXxqImOc39U2p)<67uE(WRstv%jtg!CSjt`Eo^fV6*!-IsXn%mBJ=9$08MQ1 z>C(gdixEEP%+z&XN7R_0Ew@tO@1S^DAapB|&e2-#Pr=i(Nm+PY5%mFtWpAze2>N@K_OwsGN2(mZ>ESUF->`Yp$l)j8`&#^=cy+;33phh+YeW? zRBzdQ>7drTqI>vzKo;5vHRQ%D|aJu-OXNx|~pWyIUaff`=QrJ)-w~=W)sb#jdl2{xsa^HBS zCE0`5Fm?c^sqonyZEVyP$ zEgnLz*>R47gwt#?L6G78wJ^xqYv&a_j5>yM_$wvD2lxnp1pQ#yP90a*9jVD?7fO8s zWcu)2+8dl8G)5lDmnogV*8J~1;Xnyossd@5A|KD9X>7lC;oNg(o(nH~>g^ht`la_k z+guCxUt}EdWtIjx4OyRl3OE{_TN4?}8dI5yO8|2dEZABJTQ8khX`X}(bT?=1S_NU{ zrF^@j@C9(&WgMiEHs4&XK(0UmJvsTICqqxd8fclK+f6%6FMp$CHzb5!;9THny7e$$ z@4;X2Zx_5swS7I;{(_XB9W6#w5G~7-0gXYSL|IEF1~btAK?n^PpovzL{(7fxx#O+l ze(hWK_j{=#Tau_Af}dkEi-x3wJ|T4PJcVh75z?RVS%@(fyb@W()!`HIuc zb9%Fc`gZBt+wo?1Tud^2l&x%Pl&-MZN_T({Q!BT47VvGH@tOf2NxYpIh2bQu)wPJH zLSYB2qb8f1g%?Gcw>p<2lC64+R@u_Gye@43tmjZ3x=F*-$oNz_ic%O4qW6QUI9ifxryRhxZEJGM1mpeXdL2_(8hG($eOg1~0Us$D*dg>gx{s};4yr%5mO2SmB@dJzJ)$fQ_{f~Pr0DhCeDcZxJ7PSJuz*H5< zhB=B1!wlBjPHncb?UwK(8I&eq`gA`P>FE>5cNEXQx8>g@sefbm<~hvSO=w*!EFqS} zO^8st1r{9e#HU27kfLKNrAaq<2DyCjBm!b5?}ea zZEnzSJ7quVWv|wCu$R81Bdy)aimXQNIo5_-(d`QUCMSz>^vWAM%j94uMyMDdTh3f` z@|V+#FSn6Y@d-?a%XO$Y1X-o1KATXLCdiOff#>N+pD$o?1OA==oozQtJ}1LDT~@nC zH!m{!!}TzJ6}hsQNQAeXpzQE7mB@TQ(Y>!PNjw*_Zg&mi83l*QO3f%LlJz+4%g=?V z!-RlCF|G9Kr3b_-=Kx``JD0nMmC9w~Ko(KVCQL(gr1DJguduS_0{rP07eK9R*V^Z= zyuQ~-)L$#TQ8C$@&u99-5aYP+b(1EJ(ckWh37NSr*f7Jr4|(W{c7A!M|8$fv`}MYg zQUUvDydw1Lwp3}`q>_c~h$n}x|DBD^(eAtJ(b}$eqy(n6Jk929xLOXuZ}?DJOaP)s zZ<;~Vc}*u)$v40{4k0H4@d<$;UlG3HY7h?Z&E4S$FiiNFKwzsjGp>>pTZ@tOutF+I z2EGs7%3)s=E=EGe8hZ+R+T7+ry&<6?k@OBQ5B-2~)HE1byvcphKK{-L{w)MrmweO?|wz5 z>>}exa$q=#zCey-qrde{3m^5v9djBO zwUihBp_(ANVu^GzQCZs5tSm|!;b{CldZli`!@6`@sC@7Hah z&JKGI{E|qH55kUsc$DyyD|WVh=NfmpsnX)+4Uuh`)#t13*;^7jiRrBgBtaJLpVi|^ z_(afKqCtPxgG;+lXDWF;mb$wlVpvAKz<{yGn*YytXe z>gKD>mus?DxwX9p1xj-xD^G5h9(dgpI@_u)wH5`xqud|I%7ViQ2`mzP*4X2A%LvSi zS2oJh)Fd<`-}(pxTQXY@_=7#pq>%a_2V^t`?097q7C1Admp1h-?)@%H1=}&X zm@`$j&Hz;Ye8;yX*9+lBm}oRXLA{pQJQnMS-zrXXwWHS-a$lkh`AVBHQNJX8(IiQ2 zMPgDm_cD=-%G(a{WEv6CKw3zFb3rs_2jb*DcQigNE~)vo)W``1z#;RAHzxHXyc?)f z5#Cj4&1Y(GSaA$wr6hM(4)Gh70<*Ha{cy#SOfQSB0cT9>9h2dxOw#OiJrW=oh{Hhm z%(6_3ZXm4O`%vQqMe{MBPEnM12FjhptR61IJV8`FTEnckL4Q!fK0vJI&_}uAFe7^l z*Eu!xvkpZ^c47?=Td3%s>v+V{E{VA3O@w$UeMN^!JnMjTs?4wsD{iW*wLNRKV50Wp zcOIq@{!}^dw8NuOkV7YS#?hA=t@sCD#VIAkpAiz>-UdHETTT{&(7FB zNXh2rN{rm!ZUGkd;wy~1uLC3owu2VSu=a7{TrK^Xc%g%au8=F;?+8LUt;~9jb=^E|F$Qqg z3a15vD@z2w_!`;yO$id_8}-HyrHHlZyTt@msHWvE?qx%3H;YWPS6N@oYnhk`3TDwfW!sR&}b zl&Qjqp{MqDVM7a z)pw&!?oUM7l!)8W1a$oQB%L=_JGjf6Wpyh4)l(7c%IivwuV3Y z#ocDQ1u%YCBbsO$2=jDyjrUJIuud&UVg=B| zhC&*6DpPiC2c~mRR2|L`@jC1hPMSdQF!;N(Wh&DuG0DXYeIRn@lYf-Ut+t&kc_V+O zOM@f(!~LH&{nQQzLF{>e%S-7m()x1`Y1Ll~cl;WVhgwSaA608}!{4TmAAq6iOeUFM z4N?q6hjf@rBL!IbDv2}_w%!^c@p>A_dtMm2VmwyHJNlcxzOG1Mp>R!wD+Fbmj-i^u z*ArYHBi}SX2;Pi{DuIB+sC%DZih40nCAFy0L znyRO9IkEm&R*UdE>Ojp+6JItdO>T}kaGyAu3l6q*fRFHERCW|+`z;uF@AIV36lVUQ zS4L+}jiaWF>gPom;THrbEeQ~dj+_^;j0GaqF*F5>#Che4g4|&sQRz$MYFU&v@!?CJ zOV!X#3dX0X&mYK1^zMQUyP}f3NqdRb#^FUnjF_hp#;eOw?r}}pVZ?=|h zH~Ch0&Y-q`h)K*^eztcyet* z5JfRW>T)mkYf7i6_fQ3PAPo`5OmMVUNp7P}A5yrT;}Rk3a_+x8EiaD*yPjxfcZQI4 zz&(I6k;(=+daE%pV0Jod%+!HcD>sL>0MaMZ5j7wWTGyh)c&NxyI;=4%kSZD+4 zPewUA)1>*Eeh6q&tgX74xL*Op)iDFF?Lx;J=xDzC&3-*j+tvvi;*Pjo(oS6^-HoqN zaj5LrBj!|{DMyqh0y(GZ9hR`0=TNCUa@P8l#qOqv^*^XGN`6r5Zb7$OJTu)TX4-uilHAm$P7Uf zKYhbc%7s|6fJ@r3FtHm>=n+sNQeA*5I*UVfEut?&{=UKZW&CZkyyM3vilzng*odvB zujRx3e3N<>q8=JAQBK|hxl+*vUa4{epj_*H=n3SwX(!%`22V~+8K+~tDr6}>^=)v~ z0MtmwH0s3aWrdLG?c{3fw5DzdlUgda0(Lujr9++a_TJjpdV0T?n4)T5fBcmX{{L1K z>ltFxnB4FSZAGd@Z46qAJtk^v%_+3P;Du#=kkK|~VC%qO@3TeLZ^N8J1+y|B;h70%JJ0i<&{FB}BOT%1+@6kcS#RcsQ9awgFZyg#s<6YXJzyFuWgG&gigT91zO)Sa z1C&pYW4}KcV64~B(yukmsd|=?3bie(lJ2|E`?enKpPcE&8}pj`ZAHh4(=g|Ey=_cjo@_*%RTud^2>ErpX5DVD@8)3^V!{>CHKAiiH%@CU@iNArrMuN9!iN7 z$!d4vxnwpvYkezX}cPwG2Jw3w8X9ep#knaX|Y%VPJQ)){RCB zAeL_b!VhH+ww_w@@RNA<3NjxtB7T>Np}wuQHxB@A81F1!i-668U?(q034|460at;& zoSN4-n20uq7Z@xd@HFr+B-qSiR^J_~3vDGl>W=CaDxy|_-O^*_IPL!19w(PL>jrD| z&|^T4DXoTKfuc;S+BXZ#^#mL&nhh=)Q8PE2BUP58ltzBnW28e-X+nD8)QdBKNCU(a zODcelL)7^Wn#v@65Eu#cZBZ&5i9}NS{ca~sMyz=noxuoM`mY(5$Vu`3_%v-gxjOHO z-Q&f1Xg<&|#GroGK8KmDxw?co?*uKwh;fdm;-hYPO%1EPgr2}Wbodh}+@7{07AOUP z+eB&v6K)`?W8igB)MBD$FV{YN?1@o+FM%(i(5*))vHDbhNzbqIr3yUqFok*JlmGJ; zGOmd-GKQ9eIF2hJ5a#NQBN*;DGlhdi!OZU30`$a^?ZZQ2l*icKF2ls1M6oh{pSNN{qOMlDg;}o%iBoNb^ z#Q@M8=+~kN6Ib?H0+EUOHmmw}L8#k|w#BT0?PyTZI_ZK5YMP&{sTQiWI%-yD8}hRh z+Fm`C;p^32|peFH$?2-UL)Vyu(K{)U9g~QYiwc~$5 z?IPEyt6$&hU+K$rrPfU{^#fv_UXTzzXM_yoC}Rqe4_jxI)F%Zrrz-fC>^3KGX6QBy zEW0|zy7S^M-P59y#6|^BGiw?@@HF<8m(`@0{kQ`9mUp(X*8ixUn6wd#X-JBq#B3Kw zL<_IcYAAr-8Pfx=8B?}P917W?YkC1`mC)WrkMwTn542*!l$-e({n=pwq#B}la};R( zkW*%JpZhDL5P#ESHXgR=_l$l_YW^7zy(155w3~(SoekfSR5Bsbk0A)0Ru4h3QCKTE z&Gfar)5FsUeYE57GWr=s5vPkxrr96;9NUyG)`W`ocYaIOo16ToCzzKEosm8 z=xlXuaFpo&$(&%@Ftq&nJZvbyPS?)3?sRk$=}%hZ^Zh>A;5O>hI4QQTtlb#bh7X)B zWKMeG=PWkoU$A?-=Sb<;tpA1Q08x?B#Gkyq2p>!b8`i(e40gDf-%pXDsy>bVJX1m1 zlsEsXI5ko;OE}N7&6KTaB{W{=WIb`@{!00w+86C%Q{ON|By7cN`LT;9;YeX%#2yw# zto6*&oDwHPZ!ak**KQ^a(w2^ku$kPX0AGvJtEvDvKy8xqKb2ixgIU$)#bhR^dv`@x zORk^htNK0&~u0*ys@y)TX7UD&7PVaEQ!Z7Gv#-oDjS~Xb08kTVq^XN^&JNa*Y!L7EBqLM z^)kI=IImTg&K#uIVQ~>+z_!4_yhH?R?~CTrbsqR_d~!Yu#rYLRm+dcV5}`U`U#ESB zOTpbAmgTw~baaR$V|(|a?XK;Ca{eHX$-pZ2r4JE}$LEmW%ry1Cg-qMA(>NLlOA&f^ zClc|5B}mk@+n&T}-idTD1h0HLsfN){eZQ)RBe+-6s&LzQtkr)=g+IIwWXdz$v*sV} zvK!F`E&Olk(DQatO;}kQ?P!e+P79McftiYfZ=;%;4Xk&V_TWdAxFwqWcC-2;zkImV zfbm8-&AkmpK5px|4rlS-CM>6ZLRx+P|rvS*$evx2HhRl@SGxUPE zVa(dx>=WPBQ5T8)Yu z#fu86xqApL!T?M)#WIU*J|Y)v@Ie*678@INF{BySr35*3|7C6bxozGelIkPZh?(1A z>!$OR6j_k76|W++Qw+S9LM0zfP;V%6$s0v3oan3=xgG*gYm}Qy+b+SZ>pbDCkXvSf2*!XI| zHIHm&E!v$=(^=EtOZZvJi9Og|{BVhfV`1~*h?wy@n-MyCo{r?d*@TaNHjhl32HkQ6 zRi877503Ike)`gU5TU@EtoynebyHBssb^1-PuBV9a_xIA(JTA4|ozioA6~a4%U?2%n zqT?CwF+gBVl+)zD8H&+QWMWH^rD1a(b@iLZJ< zfj4cQK2fqSmTYYTr%@~g&JQv4zqTg)bXPk&Y~agNy(KOf&g~4v_7)994jUr1)6X($ar_7@^Js%wfnq#NXLiOiS=xM(Ua9_PgzWA7SEL?Kr(ZL znt?@6M$+Z-H(gBB9>`b^XtbzStCoVu@i^f9-nSns-J7ewu=nV#;kAca*8N#gpCja5 zfa<&~R|F-D(&;x&5z%L|HB77otILOhGbP&30KEU6|LMRGd`z0jiOT5Kc zgQy%|yXCVs7+FTiWrPtMG)z2y?v-)m@bY$Kp5Hc0_Q~zrZY?P~TtgZ5bhDOHUVLPO z-z`2JNe8uB-8s%9qx_~*G50`r8lZ%D^g(8J%7PNH>C*>yW+)ZR%Xy)-oKQIp=23AE z*J4e8Xka&G!7MPqjoJz!}_2aeN)pNHUEJNDbT=KL2 z)(Gf40TeD6;!-=#cM+|)6;UFH;`oVd1I$Xu1>GR^+~mSG@=N(iRNB;x!d_WuoT<)% z)Y)cI@*xMn`~bhFKoZCoIMb&0hlo|OcH$*~1-2n=&#R4?;D0!wi^Cj2xWDL zFFOR`w48(F+31f#`L*gHKc~l}J_Ad6e)gf_zfG9^%@LLFIHV440~#Xi(XXXi!JS(( zu@FqqD=O4R+RB5SuVbsf3rck_Edxq+{@2|?2k(eU3AA&llQvknZ|mtl`}LU(6vCO{mc4^JsTFp8Ur@}o|#Gd97BT|hMbx79-Q-aYc95j}05yQk~-+(Q#;+9-f3YcJ1B;(Q4(RlX>GWpZ}|ncp-xDi%QyT z<01G;XsFcl5Y4t1aS(B_2DJ#Iv$aQ?Ri){VHrbP9nWV00Rh~Tr@Ukz3YBc|MP=6WG z5wJ21wQvy-y_`7_*TZfuhq^ZSiNSA;s8Vlotrrxo3@%bh)JPjgW0e}yEdPo-ybw+} z@SD8~8<@Nj=V&!oWB|-qP+FU$bxcZ5Cugp-Ta16}Vxu$tue7BE9g!c8zf_Y-}*LWz$lc?it}wK-CrEgdEPdmDhG$%@3<0WnbrSRmIS0j1DK3m&6l zhv3@BHSv(q-w`FeC?>bDb@>wE4z-uFGo_<|k@_pBaX5(oqumDDK>kE}I}Lv1f@Ec? z<@t%ZO3S^F2M~)_YCmJ;SCzk=t?8+@j2-Y>NB*~5Zr~q{)vpSDfnV(8K}Dw8+F=-_ zFFk0~rx8}E1yq*}9#@#3`z?nEu7tKovY^C&j|XTPV4ezw84t0QIUDo@V=|H4wm?Sm z?LYi}p<&Zj=T57F{VcCrn$RZ~G?pcI zLo`F*DPL-7M#lWx(J~)Gv=SEzcsQVjb0p5Buc}8AN@vs_4-42o*ZSsCsDlrXWU9c& zY=)5+|E}2z8zC8e^9`kO*q7gpY z&Y?{jxF9Lcjw!zo5?Si1lRop!wNR&1HH<$cPYK1QqwUnr7pOh>d(Pu62*H@iyYJ1y zw{e#wE4?86JQT`oAZ=DGjKzYGS#KtqV_nWUj^G5fK8O3sDT< ze-uCrs45vkExiYPch=g=OJ^^v9{pPu1!CjAWy;fl3IVGHP< zm+-HAbNoHpd9%?JZ@VA$5u%IXuwnV>=UmXGIX)ue(YI9v)tOUEo-2kNzXetPyYS$T zeTbds-B)O;U8VWX5GQ~kPePfeH+NpwV%r)snaY_rm$aRBw@tNJ{oi+P z0p$7-2LV#pV5z8K0%?>xRUDMLNX-sHl-3oTb${M8Q(e~YEKL8e9Z*gy?(d%^D|vN( zb41nEIpB>`C#_HFjm6Bj4Aea4=vt%Bg`^#jiXHG5ywaK63X1)~^$NXxux7#lDKmu& z+JI!4vhbmU%Lc2~C1ncNzC!vKRGhY)uJq8LqK0-F`9ZT=(eyt#3%HQVF;)O^ymEIm zZ((lLm*ZqiSt#V~AX;ramqZ^g+-=&d*;1_w$QNH2qO4610*&a~hKS{x(sHYyUF9B?rJ*@{#4&-YGDkZru4^Ha^ zXlQ@6(+g?Dnknaopjjq#7xnjbIm|t1(|l8V3g#1EE|r?OAg*FYICF2bPIAueMAB!W z1s@u|Ql{S3>b;U;hnCaA%J}XJ*{A<+3EN228;P_*s2wPM391&HxkOx%l!wPl`-3Na zL~|z&7}Gr!NvW~xT+wat(_tY!ZBdCh@DZ110rN&GIQ5PP_$WX@NrqfU`M*-A{ePu^ zbmJ;T_yAz}1onxAH@`-)n5U0;@Fs(-Qh#2@$>=`T7}maL4{-Jvc{T420@ ziGl7lIv(VE(bXc+za9m!W7(iLU1@`q_I(KopcNZUr$WLGz}l1~{WOG~p8Se29Z{^H z_zlkGQc#=rO8E(GE^VHA=hT_&-}5y{RR%m@`RaO98*W0f`~UM@>k1mha*YfO(`qPl z$S~H#5SHGnA6aG3i=5rGd}v!rl|MY`JUCLW=!TT+jB=*`_OdNfp9 z?<+8!LCTc}4L`wRmS4&2EVaJsE8?$YXu20tT=heE$?rFgrbYif){)RRmF@93nY~@< z?TZ|_UOfk2Gq^FQ`Q7}tnkK9QJ3z8R!^8^O3bHzKV&QkIWEEzV?ZWSiXOt;|h@C-l zM!(dl9FelK_XjH~F5iqqwBgLbaQ>;9>No8eav)?m!Mz+m{SK%zCTI)O-CRlQxnlQ_ z-V)IsCQ!WQ95X?ccSq;2wp-%-H$&UU)uV9z?20 z%8%X4cXX#q8#Lx+f!I!NWuX-j6lbRBAIHZPaK@zRy&{Rr`gTJLvjg4w>JjWD{`-&( z?(gKeE3alz%wu&$kT&)I>F(VBp-lTYo|cT2Otcwc(19L8$mGS2MrAB!Q3$&t$;vVa zuUUu8EXJuRE1Iz65Gi3C8|x796xLzWIt<34nIy((NRm^}cebYb7xtMSuh+b;`?~J$ zb-3^A^ZCB-wa5shWtT=_^(n&3%H?8OS&he^+T4rOJ=t64x`!D_rU;2?f0!iKTHZ^p z-9A6#j#DrT<&521xOetbh!b%~3-r*)(>ZvgNR#`n*X`lANCcY!SyF+f;l$(Z`+oLl zUVQ#&>vdWt-?XEyqFB?a?ljTzzxd~rjFE}ONrHFu*#$Z;rm^&eW1}z(D_3w91%X!V z3e^bL*8cJa#^cd8si~9cc#oEy3217ZQ5J6(s?F0S{TaQg7qQ-iF7;CmDr^numwf*R zR^TmD`LNv3Rc(_Ci)WT*e!G9>c4~xlJ8LB#D}xTm8$sz^H=!QU7<5{I5v1H|LP5VzYES=|rP@b?Ajuu4kNuzML;D1Q5tj*;t?$SWKSpqUfB`MH$dkDEgTyTU8sA| zW351?7jwPYmN>hR3a$V(r;E8UQ9#tv9Nk6U1T|h-@p2QWN-WR16w)@}JB1#8 zH=ZJX6wK185#Hnr2{z^A3(k#vo!By_aI-;f4OK}h%3lPfM!(f?2%ulA7@}(kCx!?} zpR%3%@WH;vrt(vc|7QYFWS)sNGXrRf>6>e!ft_i(3kCej8&|>sXjNInsBPcv5@5*1_e2}?+KN^t*P1hSrSiV6 zuq%AoNSE~X1hwDWc3_um0f3!NY}{DZznh5349EfGU*At@?y>wePPweLaiw+JjWa=hmCBmy84=t1D8f~@D;mcj93XH75JM>` zdk%7R6uEp;fPtI569Hz7ICjNK+;|W$5GDI2fCyu{+X3UC)ZAEDV0G)8tsi;|)}irs z`()Saf26?Rb&^Z6jeI-HW--UeYl*Yp=}4#tcW6})03*{|8x_`S(r_8%)ZE@6Zi4z; z#YVKW34)^j!jVf%v_q=Km$CMnjW6}sjJ>QUt*EZE-ESJ;g>5CPbkpK_e==PBS?n{6 zq%-PTBjNK8NlVYn<76y+SK~95vs73pSwOmWU4m|wG=v{-wG=kH4!qR;4YTZ@ii3(n@&)t40XRckbv~$1C zI!(2hD2yx>hZb~qFLA%}Q66!Bf`9!29>veBAI1*i1JPH<2g*H8h-WxCWQimCdeaZ| zI2Ml_nwHyqg6zi#J91n_S~L*yrLP#P<;NI~+gpC8AihJow?Zd!`!^lfXX!*W6%6;9T)lWEpi4e|MN)!`jQSg<~f*E$#^mV%-Cr2GVsv+JH z0bX4loKQDGKSB?D4=f9sQpQW*>HGEN;74!Q;}e1WiGcP1%ac*plhGW3#Xcf#wa<93 zq{Xq-;Q#vw8y5dn5+iI}DB%A-v~XaDyLB{N?T!E6 zzm_7_3qt-+?7woDP+mEiF(=wg5VUS#0K?{VN@C_KQhbz_JsK`^6Ao z-h`RU?vq@N2|C;tU8(rc8(rQt5QK@#GJ25M)+&FR2Eg7)d$I8$F7hAkXOlv!5u+Yk ze)_9%q8;t!$LIK$55B{Ub!tjA{H0^4Y&AtY(B}f`9Nf*0Hwtc*D^ZDhHQBI1Q}C=! zw3Wu{e21Cv0>mw(SNq{_D*A(BJVY;<}n>j;pR;cO)AS3?BY|(w~_Md6R(E*?I5N<#(Qq>?6 ztA2dxXmpQc?U}fjN7ECNC0|S6eBk2= zb6|cRD<)LX#o2WG|E{&b4T&}`jGRx!>aa?>d3BC6k22&ar@^4f!RT8SAL0J`I%oZA zd;C#r$)F6W0J8Iz!c+w+HPl{6Dmnw3&o{fHQWlfXG?sV76_O3La92bf=OMAcO$L4)B%p8wK&Lmk7RMd-P z7lIF#onZREC=mBVk{a*+I)1vOZS{RsDu(QAh#~Qd(39ichQ~!_yYrX zf2v>J?pv9JwTSd%Mg4=KW5s_~nSq4`a@MW~(ztiO0vuNDnbB?I@0+EY2vTGa8f$I0 z9U66C@j6TSU%f|T{aBS8bCcfn`af3*1$@$im$Wsr9rv_>Ej={#leW5hf7^P}HWUia zjnFGD^4yp*oXio1Wdy%nBRvHv?>Cv}W&Th7)F^Q~v`S9E_jNYdE&qOefNef2S8xY6 zxg0<$^7P>s9I&F=J=vUWq`Ffl66U!{e95%w=g5-Ge`%vY5DO_POnNFV^7>bVwEvA~ zERu?dwpG`CGCD~tP9lG=yalG<;Noy~UG&IN>QPp@uF(JAuc!>*VXe(TPa0P9#>PaI zb5+u7A*S14QfUH1mL-|g$G12PXk@pzz9o&5_ob&4Adxij>^{3P0z$X5xs}PD@uq=aI?OiDQDzg7H=Hy2frR` zpY1sXao=T0oigq3Qk@WetW*X$U9$6uFE1KfkEF4Zwv?ZjgnZvWl1U_cQ`7ZOzB;pL zoSXcwuZy!F%`ExxA$$R|xr>I8D=l_7gHwoT3v!RR(5h_)V9$%dVgyqU_jBszV^Z-^DhVz8+N=2j2{22$ z)5$}s`wUmvS_U*rD0{30Ukov1`>gpf?6p#1oOC^&6bC)tNZ{?HV!ss)(^f+LMnF9Em{17Pwba(G+OfeQN-&(!0M7i6OjUy@pY}oGPm&Jt=7KOMfJjqVK zhy9WgGUOX(dpoy_wo_5yys3|y>*Foq{Z^7$zNVA?l4%rB!5{>`+S$#P(CB?nI3b0{ zC}%zNbc7wnIVm1vJn3g8h5u#;EOywYMhd*fu%O@6gvP&?U2{7R?Mq@C?XZLPo@^VC zyJPC*Tv|4wFYdOIMi}w~0RbniCCZ=8j)``E9xXQP6s4Oazb4S@27fW0Z@2wG?)`~= zF7&!M+aPqE7Nn!{%PbOzQNrhE7sCRFA~TFd^QG@UGz<%4|mk%cM{M4{# zW~=vtbU*j z{!QM#5lwoKxG24}z=+@l`u4=-UCC+O_Nj0-BiDy_H{CQmsO?6#);D_9G}t+!*{@!y z=yr|0@uwu%8H#n(YuQ^4F!YACq1o<)J2O z)O;{UQci_u>k^cAW^?Dqy80#oy>gteT#Mmp)dQS;LC94g%ukK*7%)8Uu^IK8kFu zg+Y|ZsF@ML_dA3=qBmHMx5*PKPjKFrX*v^fb%Z;2!|Gllf5Z4E4D4(R#dc%ZYs5oJ zI$Zjg>tBS#{A!ixWii)?b;D-}Y6`#hO9}K24e7~+1i>%0KXO=E^36GG%G4Y~ioU9^ z-10wGw2-8Q%=REs9^v;!V?Js-hNm>BiN%S3AsPlj9x8m;F-$V?W{E}ut06l7rc*|Y zFb6}-3$EJkus9Ngn}&%#L6;UKEjwD^PaTxHTAUdXB}oqJpn(oX_&G7K+NYO4qFkeYc!v-e@}7`|+I%ZIBTPwO>DVGK+EKQvJ#{kAE!I z`;~AGI<>!Pyi^Ik+**Wju=~Ndp$~@*au{$LYy8`2_YIeta?Wp4?3EJsopZ4_nVzW` zD8J~Aan~lWyn{r6QAigP=j2Uqnu7~7pEu*#JEU{ABtEgu`8&r~b~X==X0l-%ooxV) zmtL#;?_1l2?10(Io>SYK`zfQ4qmUd^lVW~KDA?3{p*J$6o^K5-FZACInd?Z4f5sdX zwDsF)irMCLm|^sr!0;YbZR3%&GSox%K0N6~Z_)IC!bU zh&jwcGdaw|(^KOuotGLprO5Agvt373gpRk3ZGCW}M{#*`ix<7-jXN#pWF|fmarOp; zqLW)yZl(9UdpxM9Q3u4%WTrfWFhQ0Y-4sfn|{a!6v$u~<3V=IN7Bg2vU96IS~4jITRsbu+>0K0;uo!}`J9od23k_hx2S%G_bBYu9Bi1LcdYW_{pOi=3r zNS$eUD}W0}(t!M6ravA3ST>)hE!X%zjY-t~Tw?<-TL%30x80kf6T43AJ$5W_tGa%I z2K{>@15vK^H*fy!h_xtSQ#&Z|tl*L%6a^LzY`@ivv z>K+Lm-2I?Tmow9T+QwpXyUt#}83)icqUMDZhDg_o*+K{F9Rgsgj=MY(TZftxn=dfe zTfw_iu`2gQw`_1f=zkLf79ngHckSEA?q}XFNrYlIF*HUj`dlE<2RoxNmXp@V@3gc! z`J#aKqs^lLRelNh$4Liz=;U-_?4137lT#20N~dI7@M^23@< zpyVlB1I+&h5^?OoC~;b}%9mg0yziAN$CTboh_Q#$VDlEg8ESk|eD~Z(aeGja8+5Yj z6K~{^kb>gcYF(OC)zaGpVyYf#?ETM3;>uK5Z{QAT6jt;`QB;JKp)>a0J|>NQLw~di zjZ6y?a!HPcYe~S}2Eb#2{NWq(&8sC-{ADi#xBnC=V8V`x3VS_dGbT08|3knZTPV%* zsL!B(lnSlhe#ttXRm%SwXMaZi&pkoM2>V5@k0sisnM|EqAnDw$J(1f4h`!~ zv;TLXWUQFnul}w@UPb-+m{{zU>$?bdnr%osU3lBEd(#)PH7A{d}oDOFp$Oj4u zc+$fZma$58MdFuyJ6R>)^1oSxMlxZW;za5n39U#6&;OI~lL#JXsQUq3&kCFH#NkK} z+ahH%vg!GTEj%m)Jzujp=BOdb12)^10?28ZGbFy;T&BB!TZ;{CJV?!1P5(EniBnCY zRjx~u3&sdTS_VBPVwJMCd(EzonYsMx2lZ`Red}vLY|vH~iXd%I^*r1j*j^#MoiWXk zHK=~Z4_g0?`(KoXIf!=;Gc$kq)ZKNPH(2#u5S(fSUu6(m*;bA;HJ9sCozpxcsjT^U zILe*-Hn-M#FQ46gloOzwJqL@@x_mdr2N+>Ze>D0mTgN)UD=dGrA%&{{zK2o$r|myE z1&b0^ULR9D)c5IWEEVhA=F4?KY#pBI_f&}0SKLpNj!^gk0mC!1bIq^h>f&nrJ@(z5 zB`}d(I?qogvIW$nl+p5$1YZo6+cxg%l->RJ7nOMIkc0zA7&ygn?0?&)a^kSUN<>kx zT^%x?LJp)*E${kQeOz*PakzX0beJdCcX)&C&7m+PZ5H<0XqCW#vDNEixy@lEO|R+7 z)lPb1Sd{wF6y3?v@5_HoQYr$UTu*T~_$`6v92Yw$Sp^vzCmj(N?WoZn@B%S35%B*G zP*X&DdTMYy?nRx~sA#JCd%Lrt;@TXw3m^-m^HrbOpL9zI(o`hypTiOmY=h$dPPAu8 zTCd^a;RzmY3~$R`pbsoChY!B*7jWG`D*M}Y6(B(YO6{I*Xsc!zGmFujY$8(SxPn9# zM#m(j(V@HrewSbKuVbqT{E?ffe-APZ|iD7?;Aw~o!a zebv9W==Ddwu@yGZX6!A2Qx)jv5@ZwW?wAkw0?|Zh=5M@W`fHv->OaTtcHoLA>@+dR zF9d{M;ZO0`Vjw>MXJvgPEqu6m^2#hMtvS@vrUb-SN2b&~)bII!hy0Z>ELm_sq&EiE z)@Lu6=ZHQ*<$v#-14)Y2EPCzMgOHQS}0CyVdl1(C8%vRR~h-WYE}atCp;7!sw^jK$Me_N z3X%ooM0zA~8lL*DN0cu`oES-gkM%wq)4&Pv<{&5vJ zc&+%q9dOJ)&*7vFodJ$)6ujVv1tQrIR0j_J{#ziz6vUy=Sh(NC)%R4lHe=rCpB4ni zhh|~*#e+nR|Gk$2Vfga6u-7P1W2~Wm@tH?}-us-4m?QJQ*P`?1*7|{p&Sn+YUbEh> zqhRRa9pFguvJC##R`D4mEjqXg-dc5#%tMF5J8`<&)HzE7)4!J1g4$MD`W=WFmM0wQ zTs%{gOG^KIRSwnN(rgx?Sz%qvS;q|N={g-L1eKjk4`}e|zp9P(23C8Z+XC+Ku;vfA z`X=##L%P5Ms~du^L8|v50`eEBErlK-Ijo3u23T=nB2(q|7RgK$rVM`Wk4gTn1u(<> zm}-Wj96!Pn^a34sQdfOmK+v3+bqBiQW`Xa%k7JgCJk~&vMW;OjvW9 zI;I6Hg+zS_T_=P_!-u-G%73pIi|tRq_c8{(bXppKAO?j{AfTd#fLKrq3h4viXi60; zLHi^WLN(S^XPmrXgYEl0>D}D#6xdyu6h<$P>mkyD-gp8#pC2qASOcL}LZ`G0 z3jgbYEl66zZ~;Fh06(H*sje@PW%h>{a^c_KIQih&r*PGXN^XRlKyhnS_L!$jRfz4F z<+^CKW*h-@_d!f^IYD{sac%!!1rt|8(&|8WI$hfzW$vb0jBvZioNskv4!A->;bJWz zg7;ogw>oJ>HI3a4oDe}Z{bm~>3xiylg$bWjkyiTmwB)e(6WuQLv4anXgf?krW_+t} z1=X(Q0j!2`S4VHcKl2cp$18!F9McagDNu1=S)2Wk(jECb9mc?h#o3y@+COTLwiz47 zBuwtAHonZC=iZi~Zmp=I zhXQHWHgIJVE^9{H-`7zT?KNxWPkX4&Zr1@-hVf75vfh3iwft+W&3ZQiA~g2i-VCNQ1n^KdwGmgp)6T1V}6WR zbC>6d;ypbXq4BopVtKIx)6#bz9bV`Q zERHV_!b7mO?(B=o@b`~Q3`GVBH3KK@@hqL5H$e+#-lus%MoCf;-D?4RyfkW;VRmN& zGC2wGq%m5%JlFdi&4p>J_9Zjl9{!8xENr_;$v&SJo1nql51-&DnC-Qo`I6$)HV4=l zPX!2D0)cOrsO^odfLD7BWoADaqmp1af%_4ot6m2w$YuR9a2}Zd;Tua5Fk@cj(Qg}e zD#=&dVjJyK{OBR;+CREscft^DxD`2?ecPG)BVg{h#m$nbGUiP>r3j3EKF_^u(9|9` zM>!iUZUfPk{|>kX%~%Wmj<%9IrL*$089LAuUH-XJg+p1ishRc|w0f#$ zLLiWPyu!Ah8x2z&aO(2o3a#ET_%+rtqk-|t$>kFglk^7|;@^Q!-1%|OhsX?86tnAp zqogSuMv~5S@bi*QWLV|ARJlArn;b6Q;h|+K2ajMvFh}q6rtGg!I3scncv+2VJXY8Y z=j|nP%iFdQw#hl^{NjJ(kuS!{;nsD?Q~PAVikP9TjM`XRhmB7`T+1z z^8*H;qHmd6wODv1hHCSrsC| zbnVx0s2&ehJ5`A#xb=h7++P z`)^Z!5(Mk_P_7IeT=k-2Y_Wc?Tt_qW$vjY!wYRz*Z0zqtNEt&|{Q6s!OZNFJ0yW6{ ztcR^blWo>#V`+cU2+64txctj4aVVA+*JnNVP(kGH^{F``QSm<uJ`tI_C`r&Bm5}!>+MC4?u zJ;uv)vAR2NHWu=h=-A@^ePb#H?Z( z?7^Z8J?%YxJ_kalF_jI8@yN?%DT9xv3|+$p5`T+!43;=^f+|Yp_e5ch?IlMV|M&Ju zWoQUDowsCLeXxNu>MC3$-a1HH+faPK{bApQ(EX$}(R^GhK0P|XdX>kcc<_9n9Ua~V zwqqs(2@gQqBsWNmwJ>PWCLur+Wr^9|hm&}3Y#tTuqoS}kl&5AztrD8MF(5Y5*-SZ{Fy-EL7(9@FNhbItFZ*GwbGRj9}cBq z_$aZxevR=-n*@w;sz3pni>;(YKS|oIZrJh1pw^w7ZSSh4oH2TbgTTlk*aLUk`clI> z;z~MumMI-4I?#*Bb~Gu<-aGmO7>5ui{e8HchJrL!@@c`BrxT)478N5bAYyl_XUVcF z`&4U#P~M?f@$1e2p=iQ4tu}fdjCJ~Vo2Y#IXXBQRPsiU+PUlU@cduS=MJU7PJBh~} z540b>n1?q!9JpA1@g8Vh>s}cki4Z##aw~sM$Vn2(j|*`LBn`q7qD4C-M)QrUj{u*C zsWAp}A!RJo;cunL9ebkiDg@U50Dnt2<_+=0XtsJ{?eyGe^iHUDa$XIMPT_ftU_Vv( z&>D9gX(tSRA9q4zOcMGSuMcLl}8 zm}X|&v0;4=P&&osad z+T~qbv}do3TS8fZkotkdAyXv1{6*~_E6K}Qb=^zjt{H2wxW=Dh!6=rnALKUSE=sTk z4+3h*o`~76@Gn1ankLx~t;66MW=wuFW+X8xGdI`>rJ|=rh?w|z0pPB?8)eEbrT>kW z|HXQKkGnn4VkB}2)`Ae@HgbSH@E{AI8Q$4OjZ4HdeE^RO4!zJEo zWav)On@ggF$~Pf1|5P&AqF)m$eS%@FCcWpX^lA0GR$ z#WMq=7F>vfOI1uR*Ix$|*)N=Fn7t?GM)@jwhB%FaAg5@~rZ0Ox-iG256?2`TY{)kB z`tUobE&|hRGK{Y~C)$>SSI$3lQMO85?T)Zl`J6pcWmv$wOI7R`B9q|W{>q{ch?_~6 zx=J{uF@q+@9!^CskT*g(g|^9Su?$909n7j~c4rp#%XakrKF|UGc2pBn^mnB6*^}Mk z0#-vcmu^o7;EpYIK?)GMndBAGigN0g_j;>ec(%dTWkbGfqB8O^yfNfB?BIF>YSe0*glsmzGvbPg;LNwdI8S%90S(t{eu8Y=a@6ZcQnI{mk+B zc?YxxC2pL}sYPz^~!K4X`{*IQrx$_zKyB>%9UZvjU>Q9Jt zabeaSHC=6-6;)~3emPwI8OnS-HYcqK0=q<(y;+nL{(~9b#~;JYTQ0{}Y9Io8{PZ_n zkM8+3GvAU#56(m$kB$gxWx83yUkD6|E{7Nv=i&XVF03rQ*A_Wl8S*j$038s2K64BT zvh9>D*2CNN1ZV}aOf#D5v|M(!9kom%zPw#OpF0G-eDpgJu!Q!@tXT0Z7Nh#M#Fv*7 z=X1=zOPC=NGzJ!aM^w?6d^Rb{D^#)$ZZqoOj*B?Xj*s#*COID{d2Mc26YSSkI2+N@ zn05<9A4fCbFg=y6Ro#y2z#T8aPvk#xB16j*32zTP53EKzZ%Mcvb)LLX>k)VoL!dR2 zU@w7>*Cx~YW0?9_VM!iWv6tibS0T5nqYXVTkS$CGduft@LF(2_LeUh)H=Ot5RP5_=|f#gu@##?O9F-lEiGp%0fa0wDwYFY}SFJG#|sl z;Bvjq!(M+B*uk^>!~|V_spR;Ajtcj>%ktI!US+y#?bXI8Y&E$ za}ic~y!nrh`mQ1a5>*X}oIdNuJY;y6I3JmN(cTt+U$Gb+%^W%+i`))4**7UlB+u1h z&g5b3(uo6o&~_(U_?F~N|B5msXEpZS2NLcHVerv+)2sMxnxvN3Bs^>$8cftcsraTo z9B|;R@j>SBqo`#x_i{mQX1i7f7>ZppS&brY7PxKYXp!v)R-=!b1Q0jeZGr6c(C}9-T zL^`x+fXl${L9ye|Aovc6|$qImY|*ewVynrC*6c$#2jmX0y(> za*+viC=6YQyAU{a$%jRX?@FjM&=At3G36Nm5T&*o1D}(R+*}hmzUVC6n{amAtHM6$1X#vSt0*gXMphT z`!X;4QJ+ZV$J5ibbLi5dy07UAuX64IEf)($r$2E7xd+{+&HPU${txz+90gLZ;kOLl zu&Y`AdM6ms7tvpB$Y)8fWH-86XZqNK=mN1$Nf6+AOi(DCH}Mc9vs2|XP5cqqhPQ(E z!ZNdIYX&^oAah?Z*H671&HqVd?5Td!TACCHX|+77eI6xi$#`q_7Nfy9_N_%^wr0K` zedkR^rgKv&Zj6~@3cTdhm>{BNg8n06^ro_DDU?6AC2@&I1k1rrKSHSa24)7n&(HWUg| z%t&Rq6Xw|?YSJJd_&tT3)8vj!wWrH#lgI(nkkyQ4=iIG#O-H}tqA)Q~B7FKXBd?JF zpH=E6T>8AUQg}!taJ)~Rs_8A*!^9(%eb_GhTb9f32#>;iiS=@#)dVb-^AzNDM)tUU z#7PFJs8-bIu-nx}$GYz6JLeYQr}*TrPi#BC1!t-QIOxpYeJ)H;6|85QK^-L@cb{#h zDr`OC!ryC?pn*RX>C?_|3iQ~B~f^)OgrU(NSc{@kkz6 zmHo&b5XzN(-i9{?`xIW>Q z&z48v$cv17W%@pT6gad>Mk?b>7E69RgL9vl!I9*ayh+LM;E|W$5GnC~*%`T1DZD%s zkpq_)=abReD7J2D6Z&Daftg8=-MKTh;44Y;(V0!!eh}l;mZceS1+DhbO6V4?bjL&? zt_PYV{9Ey|NE@{FNu}(y8OP)c@~0$zKqoJJXtbYl_?D6XnEMNW*^sU5wslYNO|XWf zEEjb+GiQ_|&}_-gGw|`)*P!NZARaV-sKmrkQz)!8oX`y&Es&4<2|e5O}}oTAtGd z4T}TYGNTk#ok3Xb$v6hoa(25W(zpIzdOJjCI0*(7br;_EYqBVxg(UazL)5c>70#O@ zw>f(2z?s53(rKhW3b{pEu#I?<0fgp9)i8+1t#*;y@7$Y)_pm z>{e~1+Sl)TzH<-AI~54}@Y+0I(_pfx^pb=Fv>H6@G5Q)XfCaB!$rXn(9QKA{GD0wZ z0?v~@g!OCY`?{A!_F>KnsZgd$@(TfFE-yx=9Rt^cR5a79xM9gmRYiC@*9tA9nI88` z_6JfRsO%a~lw8Nf@_wj4@NC18X)k7|r`6f^GorP{j;fK92({yk`o%gKzJb>R1pS1U zn?}~mi*=PliU8s>{!T8n^6@dXN1^tL1`B;LS^3vQzIc&T>cGwXL{IEH%q&HV0zy-~ z#lXueM6rvQK`248{WLFFcnFlmgS+UZ3k*1sudz;|05sUfcRaa2oEJVloCy7)K=UrM zbjsYWGSsj61DLBm2Mh0m489ZGufIs0fW$hB8&6B}r9|(jo_#-qs#_$j*o1~3uH!Gt zhJxm>Wn)-68wXz#N@>dWi;?e@o~P@YV7K0y07+f2>?Z-8To24*u(^VuLqJZ60WUYo~48DrL7?MhmK9XD1gT!8uvB7>w&oEKeV}_3{J}_wsn{G zzAPvhJdYo@<#v~F&FKM457L|+?}ZnK6nzR z`Kh$*{6>*383rEz>?uW6l=Bg~b`46*fbtL-t~tX(7!cmvbYzC*Aw%gTiL_J1w)}{V zGi_g{icDn^l|&4lZr7_`9oXxrV(fYts%|Ci74qCqFXbUkP8sblGY-BrKn1!87NqVy*_H>{;*(Y z05+X7o)^-KEc!82cC76qx z75ap`!k z3Ea7VV>-WCcE#LL?`V5@qRFFEV#LhQw8J?w?dGx$IBrU>gfh;|d7IR&`65A0SMmLPxRBep^e>}hTt13SrP6YMnK8z2F)xr zBv9sLp>FrjFYl&N1h+rE>sZ&(NcJWxCh+RlF?3G#?x+*{73)`1d#p^CIJw4;RQ1L| zpvVbc4Lzz*yE-6a(-cv9E}il0_NYhga3V9lfeE`itd)b~`*sL|nw7kCt^N8k$tMF= z4f)%>PWuF&oIVD|B>%h^_xBD)*!y) z(ZwG*o;CVnVbQADIXjr`#FU?xwaw0-^;76{qPzWs$n9q+%}nK_WeY+$D>8tsetXr1j=xzyVp>{V zvu8zhTY0L#s4F>|Z9uYn;f@=2B496v=3;%R3 zm{GbqvAQIjl#M|Bhy4#pGQ`m@A4(CRNZUzGzI6d37`$a%KD5-sctrPfxmKDDmJ zzd*G@d_V&Ojuq%Ch;IiISNpKNt`GNg6D@ZXOy8iUgc?Jq{s(8JRBRfVyZ(_}pS68u zll7(JT}@RHZ6C5|Zzvd|Cn&a*rgNwAsPyLkYi6=sq|pI zByljNRUQ}cOdR}5;5-$_dg;bOeka#Q)R<_m+FZZv!whg3m9z_;voH=1>@*jO- zjA)xIC0Gz3cP-07*-Z^h1RnjRo?2*KOb3+qzqZ{k+3Rxl`&$x5xo-{GLEcd5!|{+X z#qS7378YH~Z9yH2bKc$_$XZobD-14H`>8}!&T=S!?9sSu2JOv{}C zKKgcFqB=anp5T69F>rtjUPR$F)&-&pUiTKe%Q_AsrNr`LAmGI^XAndPNmD;{i>8S2 z?m|7|OG(c!G6UKNR8!RVnC~j!c_D$l+!4jKXP9kyZmGIRFN4H--u@nqSzf+SQ~A`7 zvMb4nn&+(a5AU!0AF{R|)-8{R#@zgj*1A~E@DskyYUx@Yb0`ghNr|Klf4wI}>Um>A z`aOR2l7P3GC>|qOUrVC21vrNwEq{R6uHygJ!$z%&xr(e%&UP6lLe2zweg_GhRQQKW zRb5odl7hqSbW%P@NsfCQI_6#KwXJ`*BcK>g?p*KZR$=Y=_peIx!nAd-uvNZ}uCr=p zY+fT7IhXykfr&;eb7Y#NbfTpi&G>}pQrm-2mhM~bl2Q4G8P80KOqmUmr3{*B$FN~Z zL7AnwT>2WqU3c$G5Ttod$ffgi`{k~kq$`;tEyJ+TVj>y?q1kpKPkLiqa{;fQPF(ZD zx0MSWKyF*LhcaV}W-Z+=R*0yZZ1^)J0i#mlfn+K< zp8Bm3li%=!FvSe_$Qwj?Yx=d;hoTuPU)6fH@q-U@b1t$rZ_@%Mb)xt(;=lQN&c05K<6 zbE~?sNWGN^z(KyiU3>m9LB&rC0v;MhlTM!m&G+|U}P!>hcDxHASC@s=I1yE1LccAtLUU$vN@fKq(bECuLw+0M*iAb zEUac07eF8Wl4(0{l#G0LKDf^Q9g0^XdV9BIe1z^F%UF=-c6^S1^_IZ?jr!Y9#hX+H zEcdsa_q*9%iSHntKC52o+PN?Kgts_`J6iSfy26HvWX&;eL%}g#TWyEcmw`+iNxfYJ z7sQOTU12yv@4pI!TFXhz$;~WCOlV7yg?`E@#=#b(Xnw=;?E5Y1{hp{2zgcwbU8oqU z%Hq6{hq5uyjFSJvL*aW8{^Nn(WpdlVYX}|76I_iQ+H2m(keCtKlQ|sa*p?ivQHDmL zIfP{u6$tQ`sO{ZXHAC7PZY8VI@NmFVSoF|%l|4K+{${bjv3|m{L?;&?>J@6Sj%ncJ z!qkOnk|n$fZmGKFekdHq8T}L0v1#TW=$@YRlK(M`sFnl84Gc^F1(CP5bOm7{uvt#T ztW>XM3lJE9H5^G!!-sqq!D$IIpiZkU|EWtR3dG~-*(wi*bDXPCZKv2V&GDIoXJPUk zzf}*>4j||Ir#_r&%xOBOOHAQ2@zadcnf@Gj8fhlwe8y=Tq|ZMvghtzMpp)K}sDNrr zm2}log4iHtW)xn!El^Sj3!p1t^-eWPI%6KEeeHoCQ6Xa(OArIMUNq>zcar~S2ekxU zIn1~x`i4rBoF;?}2w?@fkl|ub`Xq)u-|C<%SVgcO!A8&UF#^o6i|x(Fwaqt7nGnvD z%mxF<_;_MKRaS*pyV(U6qqg})VskdQJddaO!Ie3typ6GSaQzFP$iZMb9^RjIbqIH0 zlGJofr@2l`aLY~LmHI=Ob2BGzuJc1(;ZxfIlO=)i+fLq)7w6HZOHyv%_Yddp!HYwE zKZG05rpA|6J$E+wHu%<}|tRrC_P@TyhSO%7Zw?4KG$Jz3oDRITx#3R{ZpPiEx z%<1ZP6*Dl`H~abIi!83n-N9o1uCWE-Gq4~f@yJjAdAB@BL$`<--)Q^pfPN1xNNYaIoLy*>KkfZLO3 zOHrOkq4}@|uWc@!gM%VLk-09_vLI)^Xp=1Whk3#B2WiBM#gZiGPKH2E{WU5w>PtB0 z7{9#f0(x{1W>&x{XG?=kpFH|6F(fu2jc|2~f)#hg2g?beiHxFj6)}RD8t7P0)cy^r zyrt0{0gc1vynk9#o7;6+V9LnHc&RA%e!~^8d&c8+9bU5HSksmt7FefonzOUOh$WDe z?tgZ-m7s5#aMd32G)bgUCw3wCaLyRw4C>^}t`H+?`tN@MxRC7Nro6DUC%rwaB2*Sh z2?wQ8Wd=V#NKgMp$itz7r2Tkn1Ko8Dabvz!o6RYy!Ywsd`QY>!#!P+o}OL=_d@rHS$#05not%6keE$UbsOVWZ(-U?g(XX1>(K zy*r~hT@!L9G0?r#%FXM)Pj|~Ee5vH5EjJUf9x!n|bha_6#r)=OnZw;{s>9A8@9~ZL zmTU0im^^*BR?Uu!ffsobi4?Q<+hW^^!6j>aUY%j5kJ_K!=);c-PwSO+a*93Up_9dh z2#!nt5@U2H)pKI1L~m}g$0oHYTkC~=XCp~tfLjzt?YQNdFx-!DqFPg*@Y%9~!g{&C z*c5-A{g8+?+l6Vag1ZxgUl!NHStaAgRSGgB>h$$~%pSaGX~llSMBFtV0jXBUiQ2Yl zH?d}SiYmsq1lxrS{GGU@Z8Fzn^y|dP%|4LpdY8%j!0-*r211#%0&qAH4Fx?(;dZ(6xZS}BNY!F3H*$_p@~QT3&YykxuyT$U z5_W|OfV>``@z@@Wq0IR-5OnvYE|Cx1*WWh~B{p6tU(j;?T?Gkj<{BldL?HyG{e>4m zE_&C!h-SM~B(24n@26)^XX&g!<8RkX;jt0r;vBM<`taomk=o%56?Q%(c1?%c@uM3yJP=+e-<;-7Z z<<}TG%#TNWZTv3tG%oHrrDw11D-ErfIak3BNQu{8ayXg_nmqZg#WdJuS_T}E;CIk1 zkShPy!aHIsT_2amIMN#UIEkS7CbU_eCEJO$b~9B>8uP(_Ogqkzs!RVwRR8mDFEP9! zc5lYOk-U@__Qy}phG<7ta&2Farju+enV-=!r3dqAhTOk37@AHL{qdL-|G96P=Hi_E zrs*a1J*J~krzlWZEqcWJ2X)0(tsWOo@(1T2c)6^MEw}HFUPMePr-=yQg21P&&Ir!H z8~^gdUpAk_m6bc!q_Mui&d?OsV9{MPiLw!9MZd^1!-94WXK3wiLCK_o1S^ah$8Mu2 zDo>vW`Fi=f{A8*3%^dZ`YW%`~wQAn(6Zt;IGYcyX#jBe3!2 zlce(-TS&^^{D_kmF1H2yq4AJ;F+oT10HG549guJh(km+e3E+sppJr{6VW+mFku9Iw zOC3w4?szQFNEnsd?}RWk+kY zQr0|a4iY@3JjK-flkL%3odN?D?LBxxWxS@kZd2p6jx6H+6EQ}c5+%+-c2c_xHKKTW zJGw()j|3i7n3L@HmleA73 zh>$!NbW-Ko%bV%$9PQOe26PKT@3cJ2&EI6*{RU}oD4ma!C*lt}=<;doe-1v`6?$y2Pn|pIiCSkERQ%}k z>qA*{N8u^7;Ik6>sHH3JIOUcGMJS{nEfL+`Y%`)!ld$Mev*D&7I^0Q7xEIo7m)whSWlM^P&XZm+lzj*r6EW14 zDyJGpVnui*{@U-wX2v?(K^Qj}R8>pk6!4OJRJ|NBLP zg7XC~r0Y-ajgAAS5+yyWIO~pys8`j<0>(_ZTW8aQx?a`DueStv5dVrX?NkqxGkd z&{ID?r77}!0!5vi?tr+yL=U2{5ef8H?h^Qq8HfwCA^hl4Y&QNvq@X_;G8xK;=D|MCSLiS zvK2SbSpUZ zDyYT5p{U8xp@pA;(^y*hH@%B>14Ky{gd=ZE-BXPP5!SGGRLn6IdP8JEsN^ay@0bzU z^T$N8@oJD+Ue$uhKJmxGq{JWcV~K3c-SO7F=NJ-}lZP2Oel>W?@8WyXnE43glYGEq zo+i6yx_Vu>2-0CNTmjZ*d7^Clr6vlebx6uMiq@Mx#$(k#aQjmGzZKpqsXebjf^!le z(pb%!DRK?bVxiVJU& zL60<10l)lgTSq#;Jb}9)?j7ij;@NZxplz_A@rxv=MI=gMRkp~8b*3d#7BU%wV z_azNFrE|yw6)s2~NUl67Ud7NMP$$TLH>i zZYP>1lH%6lw-1SD;}_fx9a^K);Pia^m%TqnHZ{U~RS`-4vjsy{@N{8&66TdA=SDD89*teOj$m>4*xggd=^oKBwtYN|Vz%GJ_#T@UtQ&Vk%RK_Dl{?+wM zP=E#zY6t>x&;#wvTrb;ZY@Ev1Z9!@VSok(R=Mx7WYfj8jVdl?#b28}!W2uLvs-9Sr zxM#?{mf&qR4wDYr0mF)XwtkNpN{(oZ@=LzIIPmoFaUsdIpzw!O6m%qdwlPA2gdnZh z1$5-V{x0%UlR-aaEP7gmBA{Qs;On>Dh6V9dea+ouHF3`e+>wwgr>Pbn>`RAmNynb#dtP5RvU(FnrPF zwMORi96m1S)>)S1l%2q55j|`g7ZFXkx4z-`?r`yVT-N61`%gmxh*gVh^d6f2ilX+- zxkWkxB)AzMbh_xodb-z3u>`no&T&p zNZrYFwll_`Sf6g1W@K%JtI-*2f{7TJbi>EL7;>k#V{JYGe4JWwqV0_AJu`0a~frB?}P$ zIqgiR&ks6;_sJs2Q~-hKeQ}$Xu-%f&sV4gKwd%n0B%-__bvPWumI(>buT)$IVc?^- zpS}8a3GQ?FX=mFoOIP@+bCMU^%^lSZQVyKhsQ3NN)SXsu_auK6yf?)KQokLnY8+|o z*0Kqb+jx$`$J9^WZTKO^n)$0roU!a*X~|bev0I0Q1MQau(clfe`*a>*-3GfeCQmrh z^em4tTJpKPk9P*g*DKJ1K8@pULhORk`V=Lr&y#t=iXEQEtqR7zTly&1Jo?dJ5wi?) z0ANxE>J@87M!gVfz+b*xISzoiftzxM-(T9^hX<6|(>J~FciQoqR<}pT%az!=jQ;|| z85>!^=eJv0wmMjx?v><-V$DR<^swQ^Q;U)HI+8bYoA&)WrCzR#+SI=@Xv=&jO@{Fk zBP}E2D;q(rhnb4ZuRLEzd8W7LIkhDL;o}CFF29eC6_i@2HE*W{3;`a*K&Jy3&pVAK z_vGP$z1}Sixz66+pAY&FgX5GJQ=VZh_(PKS}1&L@W4#h98t zQbz-~0C9c`SPsyBJ|H=Y#-WT-vY)s>`Ot+H*7kbT~vwHu6O zHlOz~yp{tyY+UFiH>Rnun{GrsDCB1y=-gT z_F7_f@MI3XE_hXt+9DIh+y0AuUiZ=zvyri)XtVHGj7+wr5O2phKxz8aGn403f&D_3 zE*_uc8F@ev<`Q=Pe}$L656ya#L5JZ52$k`fxL6+S$!TPSA5x~zb4)p7O+&yn2SD_V zg0|$qOG|YBD7q`*%9RuHJsFY~Piyi`gyfnYop7QVmb+GLon|4IdG_fj8&E1%%r)s= zS1`!?UN@ZL)a>*<%vE-{>P(DwFZ2k;ewFKmTs|xy%p*dpBJ_W-GbEHF_j=nl{L9v2 zqEXpz9DTIu zvzw~9;sP%Nx(#r4oVTI<+;X|%2b&R1cRwM_^1U*5xB2;*UXJ~d1}P92*pOr8I~^to z7eE?IhX|5X*7ujUZAY%IC+#&J?WM(hUt3OZSU}~HB-poBT-XtfO3^zjxH$M_9{*Re zRu)?ZI;?x-vzpQVd{LMkL#@t$zNGgAmJ;LnvTj%8AN27K_kKy?MMw77blmEy3U?3( zR=xA8YAG+0=o{{4e{GjjUh)V#&*6A=q-AT)W~XiWxo5r1d< zm@i35vqjH}8v!54)U1*lmVhzdX!hnTA$*X$|7(sL>Y4{~F7W3+R6`S!Y-I9fW!z(B zYfY7M>yuoxIWdKf;nvaiC5WZAwMYLqGT1>IGdC>lc{eGl?W=#;;#lo@&HV0YJAOAs z0UhKyQG|9Dw{4K(ZxY-r_Hg&dP0`a!!r!ANMsKl!0e$>Am$K1xpv`!@WToAAx%o%mW)7?-HE5b%RPV$5`ML}IF6=3;6OQn?5N(;f#}73ttOV^2pC^;Eu1dZy$S6 z7l(8ge4O=%gHjwoLr~BwseZtiVo6f%-Z3>CqxjJ@(|zy!W7cGApsT2U3=Y)Ea*t?Q zo2T+0#;I!Df=l*;fi$hA*u}v@%|d`KxG4-|hTN^Sm)lfi zXVf+(m)zW52qHf+J}DQk`g-YT(Cu3clHH*8rO@9Dd66LX^!EyK{FDuM2qQ~lH1Pc+ zDoPlNQ_FLdY>Z#onzZC9j*vgjU1bxDqYHOz`Uw_rb?nM#Pqz?{E&eZMsSqY+Jf4>kmc ztVu3)F1k&xd0kYHR|679BBlv{Z+x9m@890X6pC_sJUDy}bs{rAarT2SDfygW)viL5 zfl7i3W?h`3gP3Ju)<`@q)P~3SgmK{s)FvK${_uM( zwYp}F86;ReWX7Avki`}T*r#^fGbQ6?AC-6*Q38vRkH31l>mZDxldbAw%}BfsT+fJK zUoJMR@+%Fq%zWvgvbqeljW$6e_}FYU)J3sWy@e+0Iqo1*j&ax^pfUiv&GF^8AT4Rc zUNmFWo#BW&9+cic5=1v(QlK~<9YBpycz|xrbqsBI7dv6~@qQ(TT*7XFTAsr?&(glB zstZCF%}g>U-2C!qQAvHjyj*%ol=?W50PBAG)SqpOmL5rC_)eA~cK}~S2RV3xvS*B_ zq=vckxAP|gMRbIMuRWUF8|PRfO!pS2eAJ4J?*!)GWQZN>(Z0@RK1bxZgZt%xqNl0J z*d???tAO*Lpyb8`(Y69glP5mF@v%C*`3-U*97P6s*IG?&2xCJm%W%|&B8u3+Qu>=m zv2-a0Sw2R-HgSyU3Od|ku79K|EH@chAp`6kYoZh^VwrTPZVF0XUT79X*Lmw8GBN(o zLIr0grgND>$(9PfrveHx&bouVi@AL>Nf4Ag$hA+r#;8Z+2PV{3@u$3g=&?=^rwR4Y zEb0XtyRyEwL#rAty^{Ecw1{Yhv?r3oIPCj4Oq%H_zbRGr4$!9vd3)TcEe#BXmcuE> zva&8#jIBYLgKaG*!gWWZMUDBL$C^nKrn$c0_t5f9}tgK@|)h(W&zkFI)+&1RglZ^ z7kf3%+H`maD`|)ag%wjl0&(y&8vi6fL5VWVyHnPslO$UartQ$Nk&3=Tam2x+w}rqX z+@OGPSr4jeW&hl0Z)uW47D2LMK|0_PaLXp3!;Y|)cMieL#D`04dtKz@1(;)m+Q~zc z|Jtyk80xS61SifjQ|s2fnjV5n^yuHm38y|wJOEfv&=6y5oQ=l$E9GAn)Fo3rxOWqTmKm%LKPR#Iar9OM!>Oh4v58k$#FMEG;SI8BrbWNI3osouRj;Nq0A+7exkKd8jmzEc*3*!BIE zU+4^hq(Ir2C!PJh?Xw(Of_t0mAQJw>7%4k>h`hd$lTY!F2=eqrlrPSMs}Ls?OH!C} zsS|8GMuE|$dSBJ{#ElA&%THjpLA~wUlmr}mb$)k4xQ+K1x^TRl_N!!>pQ0IJYlCr- z5*+sO;i0r=!Ve3R|5I5w^{SS@u@M`3m>(zv*%(0~@$6hQEPPXkwKWawUj2lC$lrzE zF#YSU+aJQV{|g97m$M}99V@Y^Ze!T1OY}DK3oY?_Bm%y& zqt9afW@^+{#Z)vorqC&C6Taz6z=1y0&d=S06(Wr1Vo$l3DDb{zaKZ$kEozw%x$kmg z#Zbjo8==cUI-6}$bQj5$8Ai0wjr^gtZ{hKwemQbEk_7zCC@jzINU1#@@CFq~lxX!> zgW`fOzn=daQ#gb!98f1S8%%M<$VKPyD*%jtR0-YRw#AYIm@t}m*F&Fg1Cdrs17ryn zK0pW+ARU3G)tM#N-rximEF$7aI%*N|ggC?lc52*gjZI?HdwOG#1;wV_A_7~;fE4ZR z?X2xX0vVLJyn4c^+X-%>dfsE#Vi)EqZ<-$SQIGVmFnB0xJf%{91*FY91j7TC%2V=9 zsMn2>ycqn@{C1E|NN+!=B{Twst(z^*7%kv0o&Y>3pg}I9(@bZA?n$_UqdNGHX*8=Q zW;dPjhpv^_@Z+|??^W`1xi#Cd2X(W$A#W^u+e9j*_HOx$cH&Vf)^c{#E;iV1@bcIPCrXr<*YHHS?_Al=7_kq{r( z5qpd7XLDI4>j6&ic2ag>=2$QH4f(| z&RWF&8NldYxsR&Ba+9u7c+>NP)k})xdez#*DYS42kVmfgB(3@$EgZ?4oj=~0sm)Bb z3ouoYU};2njWFQ1i6F$n>c)5vpwrPIu5hE2AxGRqyZbA_CayGC2tH9aCZ_QGS39vz zEKEl=@Vj!q+x#Q?LC`N@iyS8ZpprtJ+&~D_xRqJ2b9sKGGB_&9=wi6p)o7BlJ5tga zJZy>db3#R$1dA9H#FpV4;-cwVd@yZFy*wO-&$66F6=oqwG5Ao6l&rAvoJJ>+DDud` zgadNUc>XydCVb}A1Ek(9zd5{_>RiOP$twk-{$7vqho;sj^|G2A zp^+TV1!JTm?b#w6Ef}p*G~G|>lYG!N#yi6ZU}M?@hnq`qnJoAloSnD5Nl~6Bargd7 zrrH%7kFW|{${A=nHnF@So6$HobHYM69nIMRqNmD8V}GAFj;TA5T+&0VyIB!x(5dj=G6;-rUC| zo4*wJyAehaL!X(!6f!~(#v+4fYMP{fZ78{6x)(k0t@d~p0)dK>4h+@~mu1iM@1yDX z*5=w1L47$WbsWeSfFk^fSM{o~nBiKyPR{CLyaYF;IO*?&)Oi1MH<^(oG8wknVIY-$ z>^`K88!ch3uBlN39?H$=>&D^8;6;g*(DV=GHQ{jXpq|E+)(AXuIBFSbV^d1Zyc*Q_ zNc|Ekz){3RSo)(CeZgH^qM3Ky-IUfKg;CPV*k}e72>&N@az3R?AnfA;{btNeqxPif zOIStx^&8=BBI@LLWi^F#@6yN(lE&mqEeQ-#SM67tY-JRRTY*h~Nlk3noMyh$$X@$B zDd_N4|8yniPfKU+76`Y;WtHLhAv{!be0#X^^q0&1M@+p%WOQ7AUPha{CrNE!oZzDP z5h=azKuYiXN>zaiQZY)oP8d^_3>MMA#uhEaI)j?)VC-GCuH|_dMRB7uH zL`5Z@$R5kC7uyWYPxd4DIhw-qtf#rkO;`u$jOl*~Ia$bLzkbp9lusXu(Q{iw+j`FT z;F~Bk^Cg_&co36Wls)JW8#N*gBh>YBQ!-?|dxccNOae=uk(i=D$(00w{GQp$zPg7- zfJi@B2N5GyIun2+sMzZpdb>eX!jAUc5D`Ypgg3f5-nD9|ocp=0)>{D3)lH7s5Nw5n zFuLAnfk$M>X&(R{J|;B4GE5Grh_GdzlSZC+HA;229zu5^t`mQ)K!@#H1VQ&z&MQ^+ zn@BW+ycI50sk%Rv;!>tTCt5m;m7$0w%NPrK;fj*09Bh(nCr@Y2x(35gN)EGkkf}o- z+X+}Q1Ye3u!K>SM^~<9^pf-8EKTS79>*FE`;_MBGA!`Rmz>?00ua>xjVyASid8u*j zGNCc21=FF1mv_V1O=rVeGqhk2973gn)RB-G31QN;#4`}$dAJv82=IRX5;NB3WIL$K z63^;}mbD;I50EHpi1by}!BZsxeZ5YrE=~DY32#SgrUn13U9ykS4ER)%lw4T>J4@}y zdyUxjln^M1IszbVhw%)XFARM&KA!r}?li#5>SiNU63dM)em2X^8{Umi4%zj*XTmxr z-B-1b4qiwLnvfH@OxL%wpanyg>p8VXl18R5(&Kjkv~Ao3%w52IdiSJCq8)0)4##HM z(|V!!qLZQavX{UmmrcwpPEg#yS0~ubaY3JhIsEcqt_1a!$PWAIh$cbb6@&NLDnWsJ zL?>e63!aXzM*_K+V{T=6P`z8-g!o}xngOPc>r(%G9?mH+US+r$WdH$~)PGh;ft&FE zW~Z=3;o#={mqNc8tG7FeR#zMXPTDj3f3HTdi@reN43&tQec>CGZn5$>5UvZ)XIs1ZByp5c11_Q{?g8G1c|D7mx+ec)AZjDLW{2$P+AX z`t$8Bi;W*o#XD=88=X&*yDN4lADw_GrR_re@%8QDmb#mh zsk8Y-&jjJ2Wzpnr=+(*F(WXa+qvPPv?1)0?AyZyIKEeb3HATcV} zQ~)-Cgc7y)gpv-_gc7HwloB77#FE*A#P73|CulutrHUr&9%}_*w^*q%Cmy9^AaBMVtpk+s4^r z%M?n*pDLP&DUifg3POMl5Dvleb`1qhbe{J*G}vf}#fxJB*8UOZc*8&BKGezH0JYvM z;SBTpp(J-mp}EgfwT705ah6E!dF%F@%lWtI*_@Lc0J%I`QZp;gA->^zSjAbit{gQT z;_JkIcW(H1TJ!PlL|>9GCLtMZpC!$m)Zx{JD(@~=)uFk&Xu6imqX?tcYfS&!bF>>b zNL9fgsM77yDB-^mRzHpwPl`NMP>+1LI$9-yiVmu1Lto|@N0U`T!uECAbIu0%-NyoN z3l{kt2!=&8+FU^;erGFD_27)1fM2Aw+Xq(_-0@kSs|-zD9i>Bt)tdQiC)0&)i;BIOnreey zDoc(O|Ie)%P`WmjE~v;WvD>(kO&WH#Gs>XFOuE?GlS~mRGN%fgpV$wExPkdVbAml} zeiPc)$-~uG2ckA{&2Q5_mGz1Lt&DL#R1WNiLqJMovp#(|C>V1mv}F`xg(?)lUxGrz z*0Cc0I6`=>U;Qv8c^SFiVM#EI6Hw4K@KT8s6oEX+Dw92R2g0&WzB5=I*H?}ivyw)| z5RYMU9MiPyG98_v#fqMOlO~QI$txx1`sx92>|mpsoREY8)nK7cQ&{AiLE=Vc+Py3WTD2jkq3D_Alr>hCFZEXYIIEM0!gGtS}uYz@vBXrFbH>?5D} z=%f53$`LgOhoe)hcwS{X#EB*6mjCf_TDJx%AwmZ@$tQyedXN+(>O#5Txr-w}r+no( znI79RM@@%D9v&@kf`;Vs-1G_MI03Jm`*H-GPZN{M1bW!%@UED8HX0t$9+isz5fbfj z3artpsuKc^{{%N`kYTxR*f|;Ve2>$0?%ToW1g2nv>E_MP!EK-X^jpLGpWnA1o6j0! z13F&LN6ih`Foe}3W(lYvT_0Gz=PTJCdfrGqcOmN zJO0jt1VnVBcHy|F=h0XT0KrXOBm^DJ74)!3*BiCt5XmH3-1TMeU;ydRsy3d^ab!B- zK59(^*CEMxbpYYef{at6U8w^yPz(ZvM$T^IFttP$!klAN6Y^kY2 z6Gn75Z4lNi7{Jp+FZsTR!w9gnQkkidP0~V8U`Yo5pgdtYf|=qQOFwHYQ+i}w_{Ocb zFj_Yb;k0IJ`}NR;!}ePMezVgumtzqdAPjeeM2CJ~b?TG(p@AK3*RcnhPNF5FTEY4r z?ve9YE)E*QCzz`Z_y4u4MV*CTQiT5nTCma#Lb}S;*&$%pYubEeD-kgitV{xW2VLr;{Od_@|on-u|}-v!OFyG zC+qH~p>gGvVeybkhLjM-B7jD;tiZYNMPh!S(@_Lg?BLHhYGR(%C&^_IIFX*3kY@q% z9YAHXf7?NmNqSxIecC(q3!szf*){MikwcHS%in#URpbEFG`0vNN89%QZup0*0?9x7x1!m>tQ$%Nr`KPH{tQjJn zAHG<$?6u!TMr&&6n~&SJ{U?u!O%Ppd>O(!}NnDi=OarN)yuyiE;fDTgL#bgwfV@G$KBjZVCP!d=y%xFG!2A&l5WJ_`5zqPj2IIs@gw=9nxTAr(bU- zs4n&)b@blp6)f8og39etoqN^%0OO3R)AAc3btvMs)Ry-mvE&U2KVYP;-3 zjp~*a7$TnJncbQsSTgR1f4Ee3ajJg(*!bX$$qCpFRn*-Sm@qT)kqFrP4qpadr=)`6 zj@%3TEO61d@~v^36@?idjko_M*OR{0GUGw%>rt|nuTx!ly9tiPvQ-d@09r*(RMxjm z4aK@_m()75H1fofs_N#%y_awb_J8G)*3sCDl0?(350<;q#cUHig&e%Q#=1yI_%~i2 z)qqQP_xBpTd-=!Nm#IF%*v~4-YREqy_iSn-4)k3!4gj~?$x@&SIs@}VdI;WqyBu?b zNStW?(d^Lo8I5%Jr4-knD|;gL1KM6^gnE%I+>?&9&#SuxoQ#?t=*ij^hL=$jS(tio zFx-4a-wK<>fwR8{64Ndzr2~}U^)NdhsqVQvM@#WsIek@jn-s}jy`rx>tKf#CTye_T zNtIg~EZUk5p11y$$KC@c+Svr(6#H2cq#kT+=P@Hhxh>F0#jg03>Zq%3IVQWqBM(Ai zJ}ig+KBpx|BCu>);q|aqDkmajIcU2RqgTwPWw>@T`E}*ya!35}N`l<9W4isOHALoc zTw`pEh5y6kd^9JWV&E4c5^U)2^083{FVgq68hEIl24Hv8fdX5|jSb_;HzK);20YO= zk82(8E51tfQ}Hjy<=wcA_)3__$u|=R%s1v~ckLWg(ehTnZZ$On55$SgU!*o?>t1WU z29s+o`;M9JG`?ksEtlR}DOrlf5OhB8#|&-+_PSf8R5Ho_*o)#<++JJ2y^)yF1M)Ku z>&h4I*OABR3Zu-n%Dz#=HB!MJwvc@Mun8n?&JSXYxz^=dxX^Xo&}itZ8a0cW6~pT; zX?of-O;rb1QR@g>2E)Caw!12F4m6Rnx9H0wW>8-TY7kQ!h-~d+6IWY2(EAy4_u4=- zC}@m~%463*CE(~y5y6X`tM2Z$JsN~sWR+7snx2C%Eo0;B7evoWKXp_Mx}FAafR%$B zcG+d$9v(phRwUHRh!Q)X6b)nP%k$%PK|d@E9rI<8wxVv=Ptt}%BA*mW^u9dEdH2Au z@#_pucMFXfEvJTX(XQ*@5NmqihpSXqZ~q=FFlvn>1;hcl0KJy`aKB=MqjURGYe2p_ z+`Qs=P;mt-8BF3=k=ZoKw%Y|JLul~d2XEXm;Pa~k7;v{F7|sky^mhu8{@YMiSVf#)lV0GiURJkY!|mt0B}P&dgUV%z}cM_d4f&xa`^r zT@Tx#fx$rXy0!m$c2arb;kKHv4MmFgCiu`?EUW0fDrv7-MV2VcL?PeMj9oLx7A0D4 zNct*5Ark|9r{CeP|;95!^4$md5Jkpe#vB2;sySal>Ar zMmIYxrbvb=a&TrLM!CZ+gQ?il>Qq( z9a0k}=I;aP#mnb&?qw zs+(i&XPg2hk{3jyn=t`liI=b0;I2QE0NQO0JS;5fzD|6|zr1KjmrM%F?O3=&Mg)TS z;T^0M&?KV^^NS4uw+AD^ca3m(bvdIjlJRx-KJbdoL?|*FMaC;bmAalR`^#s0|D_*g z8NhP}LBv$l$VC_43gJeh91B<5gF)RzmmO2m>dh`(RS;uK(bAG-md|U=(m$;>Sb>*l zODBcNEjVhxO_LBir1*5D6}ywq1-r-*LP=pFB6xC^d9F7A*0d zq*pW-ERndSD{v@`06}Oq(K(tBC;z>6ot~{jad#|#X2+}JNZ+WRh_P5$CR=DG4W+HGaA~h8NJYn!Z=zx9i;jd_|ev9WVLL2ylj)25T1-}I7*s{(7k zYcAK3h<~E&5#NF)Z=w}&zlFWou?^Daag!iU`7r{_D}mZ?hL|S zLp9Iqpp1^3n~vw@W>ARN>l494o=u|z9*YcJ^G^i7cyiuX`v;;VizIs;fBld53LWGp zg#HJ7wC46*LYX9;NEev^1&qeh(%?^>j=@IT)If*vpySMcWH^XwuzF_G|5a!sy5Sz z#`?PhD)hC%HQQmv&c%GWaPAfTm$5L=P&g++1(H8P;l&JeMH^X{$jL8}<#Sj~VX zLW^($QlSHUxj_$qS=6hB!r%yB)9|!k5j4yTv_Lhm=OgqZ5b(afN>u&26trpHj#R|{ zHbA~&v(ShVoU_9>xCgb@g1dM3xHC4mL1n-O0eyI0S{HpMsb~0rK7OhiQ2jYlcgqRf zV_V`Q-n8#InA4!Cys6F>yO*REl5(_bCSup zt{VGqJ`nbLo<(8WGuu6V(dkd2f3F ziZ9;|?SyWHPsYDQ-LW<;0 ztFhAI-e*h&|25vyMRD32$KUZmcMZ~+(a`)}73|no#24DGT-{FWlFU$;j*6b@vtkn> zMSYWTC)Av}uUh?QTROUn!nFPEen7#7v2@o9HnhFZp}EX{-9!}ab8>IMPD;SQg8E+Q1D2-T%j2en_sZxGxIBslQFYqY9p1TFe2E*O2hsY zTAN^;lN*=3yf8mV^)`&iNtDS+{#7UuTRBQ4e zHT;bY-qtsYdD*}r#x;eAnqyqWO2CYqY*&~H;#bwRcjP2}ktLy^W# zKny>(E;5=uyxY|wZ_mSksBm)!!^_kipgTJQ-gnRLDs`J$;xZ=gsH&`4qEjERPWrt+ z3_^462UT>@XZ9jH?RgBylv?M!3*~Qe<=dbvyeW2f)2)JW7i`=ihCWfuW=}-4o_Z=e zyBezvMZpHaAlkS~L2Hkl>&l4vss|j5ct|}A>a%1L$kk6%Jbh0lxbLIaWxWcnhkut> zq9lT{+-mrG4OtA55SxM;ICPaBNf|K8(zkKFG33_fO0xL1gk4 z&nvQ@;{U;qjSUU1O>(8^W{G#TWJ|JUXrN!uXDxs?*0d-G?v6UL^0Lau1uh{MSC%r@{abD&@s`?dw|B{E^cPP)4>$)SJRy+`Y9z*#DeP1n z0?0P?bRicff-aLv>{xa(a#Oi`yxk#sC{e&Eg(cB8Yt)XM45kq#WPKOd-tglwfce!W z#<%mhbooWh8S3}3+?u`0V>L7lm9~4KPXP%}(ST3V*)U5ycQ?@uGW8rG%to#3`HuS7 zbLyX;JeAd=Lw#`)bTT3fraarEk(4XnRF;4fJGs{#y-=8EKL!rYf(1=w%H_An+=Y;f zY2EY99%cn&Uly1Y~mj8wCdU}Y}=S7m7K<>8|#JM zS=;sr>%JG;v~{E^-5SRU^6ewIsTw7CQfHEskG58|O;X_lDwR=Gfk&gdmvig2CM}ja z%Um`hwq7clOvlmri(WTy+tKe!HTJ5ij{RDa0bcBp`ub5--XWm%@hdcIsP+F{0C<`0 z;suhEA!>?j{7<{bh?ah}vP>QI{FQTD;~wuba?E|&E?nxJ58VM4Y6|~ag=i>C$`TWu zCfOL$I#|GCgG5;!mw`*6M5%TNI)Kt}Bl;xmiWd7>jIgqIBfj?ylm~oY!LN@q6Ucs$ zR@-Po;;8ahHVc&_mOCH>lbVf`OUF+yejg`_A4kPXH8-WDf&u%=bN|Ld$-C!#*`mbq zxuE1=88Aw}aTz9J%^pApA56DZGuJv%k@vf!oD{56V%lPbvDZHbyGR%U45DTjRzcWi zd=55leGsdzx73AL85)5DVhF9Wb*0X$vk^DXv6!AaxFSy{S?O^m6#F$lD(7GT*1A7@ zlHBOn3s@aAqFl~A?q629+)~TH8@ZHXVZ^7}?7q(>mn+9YrXIv!Cd-vR*IlY) zt6IUe945uvu6-++3p?7HQ`@Q7n^l^K{kn{-sgnQoeYmT0f}1(-?d2ki#3@=Qj&xeE z%=2Ra&!X+r1lP4JF{tH4DbuX62i9W4cjZ1Da6WrI8rR7&v-M$OHe||YTXFwTUwPYk z4Tf=zpxS0+6EoK^)gj@VJ1vMRwG*&lw2+pJ4kN{Y2D zkoR-Xb4wp8$&_ewN7;v856qcg(4Qo0@XJ{aZya5atm2A82&z~I>e1dtMK7sGFkLFS24~UFLpXqQp23s zjN7SOe3%**EtSoKy$}d+5mVh&8bE$_Gi%(^K3HvEZB<7tZx1C?dRP=}Mso}C)qV$h z|9hG4)>e)eGo;JSm?RocR;VlNo3g;8>r70f<(-zH%gA(kWAS12bG7a@OR#YZ zuw>2ho~8=ri6t@qj}}6HKej#@!e~t@?|4@!4;>ZgFBx?D1^~v?F*UM!U39=6sl&GQ z$d;yP$`%;|M@bTAWst-d`TVuEEQR!VRk!rg_f4oew{-`~HYrTA^ri>D9yUrNm9YXdJ0 zH+h=-Tpw4CZ*XEy&(?KQLSOnWaP)ulz>K{dIS@Y{(%Xi{P(pY9>8@bwWX59pX{q$k zbiX{=;@-G4{(gm~1^0D7&u1StJV4i)=|Jdz+I#D-D5LdlTn3NdI7L23v=KqQ7n z2`T9o7^J(Tk#3dl4rzu)QbJlm=?+0@5E%O1oO3?seAf3L_`dI2$2C9TEZEO8d-lHf z{ap8TUl+9;uGm@k_K5I2I@_enTwvalwki?AZcczApX|1b;J43g!DbR07M#2{GH0XU z>AaEsb}gi{lv@IU%CK4$>-X)KD-7h{@+ql z0k)|*c3m+*sbhLVk7d#E4_!A&WabJ9Sxk=sqI~BI$;UtksWR8ir!?=pU2Iiwl$1Z? zgwSbV)@#RVhih%!A?0Y8s>gsuaQ?KZRkaMRGB0}B_99Yw>hskp{vz|qz>%LMF?V=f z(^w$Zy<(TqlZ%4C$f>rKM~l3kA5V?u$pbu~OgV2JsUk#Lhn3i(3qSw5>upG>JCK@^ zbW)nZ@U0sozeC_)sdS5h4)U*8oE${W_~@yX6%rce|F74R$KY8(Nd)z$`9JTiy$7I{ zNKEPs?0^1}4x*$8Qb%I^wq~yXyywY8hba)7S11tqKcj_-6BEZlX1m{2R{XE`Xt19L z46hcK4FBg={{OqZ_GF>Nhc7UsM zjfSR9&o1zlXcXSQ)3VoAe|0iD3;Zsu!1!MK zeM-dU8{5dydgREn(gKN5v5PX!*1u!*i@p~r8=k#Nj)f_R8lc*VA=oE+I)^l0gtbw? zeLov`AM^m5DXa$72Q^a@Ofx@H9&hOTPci_v3uTdmpyS>7+3WMk_8D-O#B<{0+Qw+B zpW|wY{7036P>{YBnDUHyFg4{yWS=h%`e^Zj6Rh`#_(tzlRH}GpJax=1uN{*=sr0)V z6qq(6z8EyRetr9eayD<~p8QZ(#$@Jfm~%P-bAr`J>+(&a7aie24MbQ^PcDx}XR{^V z0cH`Lt5`^wmg=fGYRkJ3xLnyr^G{SwBholgJL}DQ;P!1RTTiSghGd`8>ORX8?7>C} z8ihlj&;%rC9)3p0FZ*OfW&gQ)!-zT4p81XYoZzDOs%_JHV&};g5YSI^8ekK>k=+7H z!X@)lx}I2MmRu&YpFR{AI%;;WJj{N% zYxq0)LOyweqig|uJHJ5f^dvQkR3DkDr|; z{8;iZYO+Kcp*po}TB}X_LTLHUW=gg`WDbcoY-BjUWblbNs;NBoUJ`WgrLg$>;G(!A zdTZQI+j2U6;*5YLE3tP=Ne&~VAEKvUIXUr&Y3YVFar+pC9C{7pBx`-H&o=78g29-W zz~%n@4J?#Yx4wt&bB+Rw-ba{Z*=|ONH|XPw&rzv4g#{Dw^1VO_%Vq3E!0a8esn=W( zT&_lA2VY~E_*!OvQdeLbjteu>mXM33nRmmw`RG@KgTQbDJ8-F`%h9A66$`(vy1Cl( zvyBnmsU4qb#3P}ZL|EK>LBo1Tpq~~zi~K9}ndZwRtd29L{>NJ}p49-Et6BSY+wq5U z4Yxa7fb0Z((=#q=v@--CklhUx3lsN(82Me*fneYhP{|q*Wgm@K_{o8S^^v*~zs8bN z2VR4&C)nud=KT$8nAiHKQ<>mq?#87+`S$r7@nYYkK|{{4bVf z;*jPvjQHhK`lq`dm5Z{Ld#q#MdK@^+(>wZ|%f)Xw#4%mAjaOCvs-rM3JhM zdIu27((HTi=7_s|1`U!LRlc=@$TzLh2tVJ<+pkKTM6_^O3(-1I0fle5eJb|_&y6~s zwPE=6L2O5P_HZu|t(6e1JW0tg8wl-bybc;kaEO+nXF+up#Pe5*Mc|}4@ZR#^Jhn#; z3*g!%7PN8KU(Gp>)&b34{>JaCi@`>mxIxz^4egi{?La%3y^3ZBgp^)Q4az^)!&Nn2JrStmK86V8ong#@k^BXpzUxVig>KWs=!(I z3RS8TUT^!5!}bZKyYWqR4A`J7Bk)iN7_*^?*x#I42xjzibAPdr-~ zD*M{s2WWB8QBgd@;Wc4Zn;b7qtp3JH-N*Kga#YZE9_F_NYNhypy5D|LqS_k&m1a+K z>5vEIDUWzP{F-;s?^~kw>u_<3ooQrIjM!k-kycPwA2i!K0cOb4Hzut#{T<)Ko9wjgJ7`YyEwm(;jl8DKIxS+U9u^ zQ=9bCVbIFuHlK^T5qyOApD}7j=@0-=sT6DSzC0`t(%Urx=Ad*<2V>x`z;|+Oe*u%O zphV|SDV2k|BUp2d$+lgoFT2T6;G8UA4iy|Z$}wX;XK)3%g}Ph?4)Wz4=DkiV+I!b3 z+9@M)Rj)$m{k7*G7V(sCa=(1zEQRRd@-@_UluWV6^bBk$n~>e-l!l^*c%e(CVITjU z(6h(PItw)?=TQ5x6QD642CG68^ga}0)%xcrHM`kYmHyqh973f#a_l@JRJ2ZC{qVkD zfR=IV`jpVGT~Y=fc+R%jPly&O_$1Bl#3OQ5=z=mB?~zYxvldh+ z!1`D>8)Y4Q;D=usJL#9}rne`}IXBReO7@=*WC>F7vb1Twvf)cf<#cG z#dll7`6ZNxfAk4mpbQzdy?AHngX^3y-Yc2yE|svX=_2JUf$@puu9$uPIr2zYj@kbA zZuY_4++uvLYO54TbOgyHYjizW$1&zq;a)?t6H^bfJp2PWxhNX(yun5d2n?;ORqMP8 z4y^@;Jj+wDzvZ0nm$lL&Nz)jiN6_lf0s`#HVMIHZwJ2?~+m00oKFE2G)#%;_Mo#_Az0U$;@)M78@@kHK z-hw=_uuqqNJ^s!F4SY`R+uK4$!ZNrc&H$78wPLuF)Gw@Q{9!ZSuN4uW=p`X!+GMWF z#ALb=LJ$17?K*O9>`VgG%y7fCjhAuz9u6Mg0}glHUOkj7CDC`p<`ijfjs5GpsC_V=V6?Wbc5D1w<|7^QzP9uG88>d5%zE8 zBxkFM_+|qj(OWNBo4KgNtwymXa7ac%Y(S_9xPdb#>UK~%;Ts8ii3Rm-)m?*6GU0dc zN8m$3<7JZZV3d)GC>!%$>!~3@dG?Rfp8C}d!1wWbAgD;P4iX$;?WeD77B>a#KCo7r zWt&t(iF7?YMoFF~W=$G;l{0e?%`zttivJb@)BG;&k?+8I{*;Sb(^cBCaG{H*KThyg zsB?m)(HvS;aUyZN2ARTgC?#o)V>JUc8`%g~v;ALDSMoGO%r3Fd=<%0rC=Q?ccT{VL z@4@%0h*mg|p@&#Iqj>VS#>$zC*S$Na*J=j2tV4xs!%PjzWj~41eqz(Rc%%Sw?iz26 zw=vKl&4=-$YF+Fun&QN57GLlL@71`;oUT8LObC%-|CwKECjUW(?ctZ`3`_a4>R`bc z2U6E3pEWlbBY!6L7{fH_Vfg0z)a%s1R#1XZr%r*Y>HPvoE;|JZW%Jf$F2*9Ii>xWUe30T!(K&a5Jh zG|~y$3=~W9{@iA$B{CZF7;Ejjl~EKCJjb$IQM>BfqTfaAKt3c+xc-d!Qh)g=c?%a? zR<<=wT0oiML{~+HaBp{Ao5Gs>`SNjN(s%-HA4&YG1Zi@}b?_ zWg`seWmJ)Q#C7xQ&)IljrXTUW8Wd_me)M#dW3c;%@Z>V=PA0tWp&2;?N7UZO4aYLt zm4PGD?wUSw-`^J)Z_V%z{qE0PzSrq(5Wr};D76t?oa9F>XcbefJ@KZkvD3c4-VFiScD~%+>yvo~w@JKv52+SLUAL2gn%aF{#~w#9)E0&a98t9) z7rYNZBkR#v95C?Tz^!jajpRJ*Mrs>U3YhN&*)CzSR^zj9?0acZg02%e8m-W7Wq;B> z-zlm^5=y`6_e2LhU0MmG{oN&I2d{qIJgKpsZqQDIj;?Zi4B#hGU?*b?-2CX_k10*9 z6I}&(E%9oI%Snh1?VkE(8WPL?4z2hLDFUVUeMukTwGzqUTyCCCY?kAG3GT9L8drm$SY zt>u%pw}cPR)#*rhjxiK=aG}TTcMUtRL@%v|-*Z1Op=|m1Cj-hPd&is<{`nhuWmRqr zS#cFyeH257h3Bm@JcPqfm7e3(g8{bY)_tE>p6e}=>j}2;?+zne@(Hkne;cVn+Aul8 zaCQGG*hXcD7k^gcX{)^a$-pQV^W=H}`@N`kaor$S3x*xK{+6OqsH6)-v%{T9ngN=i zbt2Or!Cq-I?us2efFT&5;y`WkK0RLuJY?IBIBHd@Gju3bFwzg85kjhXbGnJtBB^FT z;!O|~Bq3ChWS)~(zvV^vl@aZOnv09=>t)6}Vw21ghuE_+Bxv4i*gAdgus`tt9{3Ro zK!9@xt#T#Fj??d-ii@=0dDC_n%LqhDS6x> zawqTh83bWU26?S?5;X)!yt|88AsU?c!^!-&Fd;#J2e`}Vq#}9ZfR}{gFPIhayW}nA z9|#AlVug#BVI-eDc;@nEKXkER&hB+jX_s{NBM45O2C*15401pmcSs3WPhf@A5yc(q za!62fLk@`IAXMDS36K7RNif9m06+s9UGv~SU<2L%!@n57M$Bc@6DfWLy+>=;o5kdg zvEa7|paiGM?H`nLtzK3dq?Mb$(uPqVpO6M5XxV)Nm12t9M06EB|Cj5B4z{9XpDR}u z{FlG0wkid^4rGvgql$%+qkMq$j9l~+6u$OA{w}KgOcNt?W`?Xb)r(CTyJ<>xH$(=L z@p`QhB8~j`_1;<{DJ*Q}Swh^?Y+h@+V{(T0GU;@?j5F)J#WKgIwh}NF#}YQ*-U8*9 zSx*aO=euDb#%dZJSuHfkNp5OVE;B6FlBuO@8SmStoW~>z`RLh}ua#j%ghu#=p2sX~ zItUway0};aw9t}ywG4iiStx5MCvhcc9kk!1m^gV}*diBwtwryHgsZ`I3sK*NgJ|_i z<7_Dd>N;W#vDcB}WKiIYIn6tm4E$)lHU}ire%u9%O&5AE>vLkBYZ)|591o)n)nyYS zWW|~tDD~JphSFTXE*+T~EN_7sI-|%ZRN$VdGgO$Idkh^2OFZhtLb%VFxUGaEl9g58 zR5pKd^=l;#cuVYYqQf^dp#W3eODy&US4f6}<&2#lK^X7}+xyz{5C;~m3y47P8HWMv zKkKT6w0dNqB3n&h>dLthFP{QXMUr{L)*-i<603*L#4(5%_vf7LLg>!v61C0QwK1gW z@IV$lo?Z4#I{N_{tjc*rXFCnmaQxfUZL1uCny2x0a9v;3>Z?wGwgF)Qdx0ijUG~lb zA2kMl2#|+i(3bIzc-AlIEJo6(A zCHX$avlCYJ^KJ-W*b3mE3Oh}6Ep{L}Bd-2Q$?wCwyMGm>fF+PtbV zvCGkX5KcFjHCkHs+HJQ90U#r{^@N^;IS0Si2ytET$H}$=NKA6YQrVx35QaDi*!FDV znxqWhV#JF(3T*@MsGPd65kUuC3<`>q@|cSgfRvdNmMae<#NBNf)p;X0)sdhSH8pEr zS!=e#0_i89g9MaS_E%|?FsR%DrCU%V4Z+7~mqzd3vQ_;Ue$CNkxZH3)reIT|7{&}h zb%a4nb1}L!3mpYIHr}ty+3l4`0|-VgtxZ$Gk*N1lz`($SAp?OR8s}0(TJZpJfFb}C z(!xzt7*k^avQbjV%h+qxiedc+s4`(50ZBqCc-vv((z(Byyv;Ck(vw;9r20Ot=3<5U zqb^)T{sFK;MefSoQEEe2Onl4vfK*zgrHjuHLgkk; z-&zR{D}SDJo#ek*`>5tszt}eQ8sR=~?$(J}@C88_xRvQOlP7W|`dGAoAypWClP+-eRKP zVnZch+a)Z9+TWaOSycP{Rc+v&&?fm+l=%mxy+aSlVD^V7=$g(MFo9)+Cj)X?DZHX* zAE_X$RmWlCDmVn=mu7S4=hhqK7vstWCuO!5K#?xW7+-X)%Qk0AF5ARJrP zbFVZqp=N>LXi~iQj`F(;MvYkj-m?3ioq$#RbfY5z{cht_%Cfs-dx#wd>U%ECw8y}L zx{0iNxM_M&9U~9{NB?ni_YwF=0a#iFgFFFcs17m)2B(g%??>c;bHj*Vl|$B3cwX&_ zroT+0@lhjL_MBCUOBMcZJ=hvfcw_()4e}8^2=J-0sS0lh70cU(dBjk_F&h6&baE zd4vc;0pYl$t4Nq}`%R`K`YV}TY~uipyA^ZzdC%w|1t6|*%E->j(6K?Zjo!m#=p8t% zZu}N~ZY!rY4fE-wwO2I#cA#q$Icvw~wN8pGNaUThWZ_#G1&tdN_z)sORvG_LTDGc! z%{ML3wM_7Z&Ee-fixY?#b92Waw<`IPdmecW%lpupn90S6{jRUh4z40cet@rao zCK1ZaEB5-6G2ChWLLcr`r8mDXBcA_LZB zHQ1R{xU!@S5DkJ3$l$PRqRywM#Um|56iYMK88VT<8rqG2*p5cF$|?cRlG?#vy-L*# z99U7dAAUXZzv0ll0E7d(`+r)YPi~VBJdwsfy0X}2{C92S8lUrfUF_JYl3E&vBX#Uk zK*%zUag^WcXAy=Ql+^<9=99z0W4nw$mE>T1InfrA`~2j9l-!029E1h~gb<>GpF?VC z)zT1D-+XGw7q0`W29;>Lq-L9B-?tXwJ-mwD7-T+=eH5u)e4w+Wfk>Bl9LR7kS!EJx zDW}~M-XBqWxVFS#s2Cp+1Z0!sPFo1J-1s&;2d2{c>{DtNg{?jVYzFxdk}0p<#}{W7 zNZ%`co{Ka)HE2a`UmneI>q+W#cX&<5#Jko%BmpE~(caPxi30q^4Go~VUqaj2bZUB| z`DPJRNp}U#eaIHVJXAwr`&v<`!#n(%F&7)Vs(Z}%d`16#6yZs^*4Fda@IDE$K#?PFbhv8U%sCkxzF{?knx zsXqGw6t>J7cB7dHbS$e*trOYOBTluu6(h*;_Y852AmTee(YCUBNAQLO*XHR%SiB^~ z!^d6Qf5*R+Py1xJY9bk99Nyz{wM}gYlg|Zmdz)9YdKtRSRwO2{@K}DBF0CB6sS{!L z;eg;t+=jS&dC%TcLGs~OyDi?#kBc$5bei`$&2=+b?|2e0u-&6H6L2d#+F%yojvRY? z%-gar*+9UGf2BgDhfUzK)UZerkev{=5dOa=>cgE0Ev8Xbs6PJ8&7!*zG7n@Jwu0ob zbyqag^T68^*nw}vPlF?Z>e-L8{?{c}n|a&ydYsk@cTUk>qjNVJ&+eQvK^VR{+!hAs z;yy{G(C?2cBS3}cfwDOe#t|{+kG!`n=$s4vzfq&xycOt=J%yp z|9&uk3iR%*z5U4$A84D_U4}Y{l15w_;f8M_p-*4|{USz`_3nR?p+7#}LHqfFoGQeI zftmyD)RB?yt5{BPRrko_P~-_7jsECFua;m$4^ULF^o*Yp*~qgfv-|wLNiK@QwEl2_ z%cLt&ui2&JKvn(4Pa@-cVX*2Ms}XbmH^erKWwYTdntG`xazz&zXrB-qyQwhHG7{|^>IS*tn1Q|9?w5;p^>P^YgOcY z%kbCIcN-BNZK__v9rSHHdJd8+$_TJILnvRmK61u@4tIarRFkYa2Ed-4Rf(F{n)}5) zzNcG!1F)R>wySy%BYB;o$UTFb@ehQ(uFLJy$+D9} z@cn~ZU*Dw@sq}cdJyzg=??VP=NeILwAzE>M_$mhQpg-$Ow0fkT1(dq~%wF1CyuG@< z7;d|s2`*-@4a)J~ZZs7mfBwG0<#awHty>CT$PY30^JLS+ZR|xk?#UrMVtQLbt^!Tu zVGS$37XCkWG6=`B4pN`)n@4n*eM~7wzH}6XwCQ=6SY!;n$}_dQM>eRMHv~ci6;Jk0 zNg>BvKzVA08EW$$!L+~Iw+!1{dKATFxh6VuPg$}ci?T+XM{8!?a;4&lY3_43<)iAf zcAxsNfCLZsc85Mzv@)x}UP9MZQ~?P>?UZ121D?VIor-^@+m&s+dd^D7P+B*io`zb& zOS9^>57GBd197={ymt7J8k&#wd_BoOlzxNEdoX4g##Z@%)F@A0qG~xBLHJ!+SKHSq zVMu4AL0NGm(C_6o$2mgm+jn5)!NY66BF6-Vp9C*H*|qq9`FGD=U{W8ET8pw>)6PJJ-eMSMj{oGK-q^H z$84q;WV&D3Y`!CMstE5Er^IWW$1XBNc)jv87sk!f78+SelO={#0sFBJjB(46!G?`7 z1c>e376WLp?3T0c6}=f*4l=iE=&q&%Tcs-dTW$!RT=vp{edBeB6hj;yq^wKM5-&K- z2Fc0gxr~_v^~W^m(fs3g6g@pG7#c*mmyk~?M`H>25D1{zs`<^nxn%HfPcC=-+ZV1F z9&{0gTClm-%WE`Vue4%LZr2etSVv7%y!=m-FrY;}xkN#0nCMsnNx_?)Si_n|^V=7D zksOr{5+$@^>F&{-tU77-ySaREFCvCiVXqkxyAyh8gx~4zq5Yg}L+BBR4sUS4>P&l`{Ig$?fDSG^ z2FpR}+t1%v7V!hQzlONq9q7YfM>G`Qn?HMS^Gz07Lb~I}zn5e<6TH+#8Oa-~L0P)u zG}!d}fY!ITu(o$03G2(gh@>a8V~8|Zk8DuZC{6Wy$Y&`J&`8RG{PLpLUP_J&5B24= z1p2Q8BFUAYAi{=ASbv zG2W^8<$ZIr=a(UKJ|djK%=yYpl}GiZ>V#xT0vw;{G6&-^ndh1^Mkq(63q>Jl|6zXK z(eBZ*GlWQg%#?7YMRp{5?XgwY?RtDP`clZiL4vQQA$XGLJC$T`g4PInZ(Q{nJz-C# zb?j>R)xZ}*-lHL|aA^lfCLFS}b&z`?!^?uS~ zCh9|3@rM1hc}88)Sc!sW5^|KOrH*bE0lNjG&0}SXwg)#_CiOaGV@_yCr1p?Ga@t zBaU%+OYdHgRoqbuzq5NMXtimZx2X05j!PuxP}po!H_`7Wfux(Ki}s+suyl;zoEg-InMmcg7CD<&LH|UQ+E0}2rEK>V}YZwh-gtWI$e4dOvbAm3nN1iS; ziel9>&Gwh)N9A45qoGw#&|wOinkNrk=NA-H*E|Imw>_`UJr~T{Hk3y*Gu;n*GEWD& zU9Hs^NWb`vD;dxhC3hX&#Ur}05{DnhZ{X2vdyXsbc^Wi(EQ|tdB?DuCU`8SvL!^F z7_JT!PFk@RTUUdDLwtjgO~zK^ZtosL{Bt=FcL-1(BYs^TrBS=5U+Aw@MF{3V@u_~1 znN+Jp279npu%3GYb@=(f%F+s_w&ox}=x+O5Yq&!aPzd;C^=zT7{ldd|r?unh8aTML zr8lPw!tIf{+O5O#TSuer0ej)P;%_R-B2beSW`$V`Hv7g%mMoXZwfcn_wtB!}e+hi) zbm&Vhsh7VB%zPbw{m}0-1tD58-q+Hq{pfu&AjmRJADT=UE)7CioDd`eJ*|me^&Jtj zcw>G5x&dp}xU!(|lK+ZHIgzb&U~nweMxldFlkyrQm&NQwH(Viax$GF&BfO!K_NPnS zKA9DkAs@6AGBJm%?~BCEdE}+}ayR%bw0)r*e8)-O7G@|;`H$xx&D)T!DJ!}2Jm5~} zIM6IUYjl$OGszt+zomkCHD|?PVVReu{jr8_B`a8!bv{HH?pp2MAJbcg@i?Y>>cetG zH<4=RSj0N%79dk(7(Oj~LpA0Rt4Da%Z4ipp#Z#+C=~{Xp0F+K3*-y;N+oKG-qw`i^ zszWJ*NH0MrHTBBdT1jut4=d~4F11z7?AxZ5a*mez&T2=V?NTy? zV^u)Xjq;ss%|NHgDM6TPDy9)E?&EERAPr=&Z)5%cC`Mtd-c!{yX*8st8r9B)t*lM7sH{V%6b~!#K=tKQ#=-(8=6o z)E{bnQNM`rhj!nhMEXn`FGH|z^IcWBrf(8K#8!_w55s@n%hJvzt%_r@B?RNfT7Ne3 z8g42iFuZRL7|*NhLbU5~GZcPP2JWt^3WwjV>VU{bLdSkTRh`A3Bq#>PzKN8l=x-)+ z*X-#kMZRb~V=0fkt5fCn(-kA5YsZ+UxOLFZU_A*WTcrcj4Ay(G5urYvtW~qp8W0*j zJ<4Xq&|n-#YohP?^F~t*c_+AI2ZPIAlOA8RpTdEKZb*}o;krD2tFyiCWM+|Y9?}ks z8Te#L081RML0f@j_Hace8ott0^pP+!_Uy*0(e2k#8oy-Tom9x=*JlmmJIUb=F$K zpe*lAZR!P&n7L}Bcf|C&`aV>@62UR^UXEtE#)p$+5N@97`3svwH|7kr8Vdvvc9*s4 z+6#NyPZFDnzNYuCaL#1p&TZ&YksisRPTW!CPVZ^tZS8;}+kcZemwV z@3`0#UaWm)mX&%ripjNXUETIl;-Qj}O1v6*4Rl$pll@OVlgu^!#(tf4lE6Vi@7d+6 z>|0-4AHZwkY_VxO`*o7ke%WJp%820|`GM^HEf3Xg#vZZ_ZvI>+3T`z3%q0oa1w06e!CR{W9XTj|W80yVIZ@ z|IMUGqHflc#zxh1x@CDD;g>`ZANO_qL$DZNClbfeh!`g)|)Q+5UD@BDq zfII@x!XGt)YVSUK`>o+H)b6RimmbPw#RE{kENs(hPpp9>{4t9OQF<^v5n8x}aZt+# zkh0ThfL4{l2)%>?N&oDt>*%4c2ANe$a-PHdlNjp5v**99l%FmKW%oJ>E>yjMo&1@s zG|FbB_Paa?IJ^MdxnLuf(+C;P7qn9tXYPrhg9sfxGs(aWvOYFYW5a_CAVMFpP7M0ebsc}s(W$;jCj1cQ<$$*%G-jUSkk|c z|1haGwxe~%5NCP|5{P(My~TzoZk^$}eIU!lhP7WF3K087`{7K`<`dz<)H8F8C30bf z@M)@lcG^2#W6#-jr-qxGdXu!s%M$?1X$A5=!h(~IPR>9kl=26uy^P?9>vB>2p^Dg{XQ5<6_*5O;xRj%FWi0!x= zlx5zR^s4JoUrHn(eU*az@Dm=rV#g>N2J+$YxPzJ?tqYKnv3GJ1QU{Z z#MHSJR#@Xt*omXZnxHS&=(S|$&w>9!^7*Ux@+eLk=`gGJYjv-n(plhHMHiefwh}pl zcv8l{3+74Ah?SWpnC;?VM9U#((-v7hYNL#?B19<5k>s@%5Y|ulF^@9vOf8+iOA;;7 zlJ5alDAyaOBrXFRz}J3b>ANW_t~gcKzmyoC-w7Y7cg{pb0%0`H5hq6TY6sl#^J>|) z-F?pQO!uEI`O`U*PiQl|%xAc4L*b`S6e+X+EV&JS^&nTOU(ZpF7WaQzI_Gg!(#^E~ zO$r|`g5@#VZ~23)&w!F2Xq(^BovZZ{1n8E;%68NWuGjhRq8+eytfv#dQnkl)6g}>L z(1d+ZnII@4IOiY*f7%n($xvSG8rvfUSQu4=R02`sX?oX=0uLL=;-3(yGclp)x$CE1 zOFW6?3FfwYBJ%D1_n016?WI9%Z0WC6AvaISrd|atlK+u6d9TNQrOM0@$9J2hBGJ^H zIhgzl06#r_JuIOM8>*gE<{@@6W%}+%9%`RZK5_{KWA$9b-yFS=>|A-`v2V=iLdLOj z(7?p`2OVF;7tSitg%7QL_PYiFzs}CtZp|w-`#V>JP zw_k|V{*7SqwFrod#+6}xjBAs6iF>X&E(iY}a$|;LB14pB!0=;J5=g84dsJRSU9Xk z2Q?447g@czlE6wid;Aie=EeDRh!V+7T#%Pin&0e<@J;v|nn-RtUyr*;>&hK(A#9o# zOz*&#{*4PXBw$#dAvfc&+A4jCh;Ly)1L(aZd>ezt(y>=i&=_!r1|NPYCY11p*Pve* z7^EBD7Q{=;SaG$({I=Ls@vqD4CQs}>m8AcqDK(``Sz))~>}UCE)(7DtqJQW%WkO1$ zvudX=N9%12pjv*i{4ZdSSm4$i+WHYeC9O^PFOo#`+5fYp?Ib@J%DBw0t*N@n)f zDBM0o@AnVC2JyV#LdNw^&Lx+;e>o`nm1cF}9d32F8rm-pQpI`^<8UbL~7Z4|U z)^J&)C->3p>VqJM<1f#ru_%j-68^4@DGVz=um`+=k=W`FrYj3_{~X$^dMrUJ)xcE) zQ8*f|7#DFEK}wBVN$c9JJpE{$zrtC5{gw%$2KrcYT%WzdSfS(Tdx*%B1&S|;(}{Yi zw6**(YDlAA&W7YH-GR%eMhFo9uVy%|kJo88zM3k*Z@f1Yd^#~56VvBw_VZYTAa>6g zKDb5cwBOCF2W#>lvjFTD55oeyMuet+=bv>l@pRL=`W+`i=buX+CCg?xhGZfDqWJ9^;c=7UGERB z1H)Jm-eCFCGRezQjKt-60Knch)6wFynD!wWt2G36Vuba3oA=t?abP7S%(vq*fN|Bo zOzvA-^hM}^qcrvPqG$*YX#%&5)k=aD4UCdIHC40DZccqya0PYZY;-!4cR zLS#PM>3Xb$A>`K1K{Qya4}r3 zT$fe5cQph2!I+6q8ed(#+Y1QXmg0Xk7He1%YFl&i!ST%Fa&Us{EtL7}x_oI)6bCoI zBYZAWzh74v3>*6VWLp}4nf4peV4aR?koqu)*Dc+1dVV}P44NRzKs9L}z7+wB;7F+h z;Qr++Lxk7v?{o38e_W{W_CkO6iycb{lHXb(zu>z^w(M^Suaocwia@BS*M5e<;`YQYt!*#fExkAo-bIQCoQh~CIKZs#4szb*j1(3T8QeV}M zH7p7U|5f3eiCj_2M;p5$;?vQb%>7!cicupgUSft6V6alY!gk1OUBM}W5eX-L6Y7rb z;kH{29=E0C!ecECI%wMB;=44!y62#Jmy$m}7FFaG!>mKahaTk8a-w_70s}>!BG`vl z812(f)~`-8tUJmZnz7&{+w@X%w8#gy2dD_43j4H3bmB}ZFWM*{X!}G0$_+<(`{kI< z$K+-sQ@m5bQIP`yE8yKo79(WqQAm=tW&hcq@xCaMa>oI3qo*9*<1~+e(?LIbb5@ z%K_lvZJCC81-9|onrs+2woF#BBo`_g2L3erkF14-?4DrN@fIW*h2zCh1I?WLxp^w z0{vWP2TsDiIwLrrzUDu`(OatC9%J#zWcVGymztM+G7(uYzr8l=ld}!PoH0iNg@wxi zyKi9G!Y}(^(z)T!W4x)3@B{mFRTh&w$otE#Pwdhu@Ld;uv^;59(uPFWa!?`o@d}*g zpo;pth5gln)eakg=siIk5gvUh7a>jv(KfG!`ka?gSh7k*a*#>Gs&kkv>!;I$Ekh_G zNOF}j9?@NA$R;(RH~ChT`5tR*xpu?u4p@0mS1(zO2+c=f3HXk=9FIT-$b z8cgvdjT`sqZXY5D{{0HLyyY@vRjVn00|}h{|9p9a`*EcN7}CFAYq#Z4eY_+}j{l4Y zLW<_b|N5OU@rv6}@YA8B`rp?CKY*d`|9c$p!+(dmosIv_)jw9we;31l7sJ1U!+#gU ie;335^^2k72K{$+oy*QcQpg?f?}f}u=?Y2X!2bhG5qE+B literal 47228 zcmeGE^;aFs^9BlI!QI{6-8I48g8N27a0o6txI=K4;O_43PJ+9;yWXAi$@kuK{)KnF zKd^>XYo>aps=DgwuBUgXvZ53cJU%=a7#Na_w74o57(@aX82BJ8G)Qvxo;v`#fIF#5 zeFUqRAUp&E69tnI7gKiwKgsye^jTtl5N9E5fhZ)92zMbn$4bv5#~ zWC7mAi{6AM#5XLmoOS)>uIhZ%BFo!jzS*^Wzxh7B7O*8!Z~prm>i{A)m4qmoG888G z|6Q`dq4>25W;ic8I!a(^A;P|sXaCCy-EH*Zh|X&`mP=cmJ=( zf1dFjKud1Bq~!b{k?oG^c0d;6eP@|@W&U|)I=6O2`}*^r$4Nb*?p8HT52j8=WA@Rl z>tcqek}wmMQ${`t-e?j?Z8WZn3qQ-7Rt*S-SRckvNZ5ApT_qZ*|9Lcfm6X*seF%{w zA!;8kSZK zFEZTV9|Qis-YEhH>97KmBl)j5L{co81&1Q`JYjo$dp?KM%;~hP zo0gnucG^EkL(+S_oi{XFs4~75co=kK`nprHV{q1QrYoTa#AzO%x*rG+=Bynx*ej39s-9UByC*x@sy|g%Kq2L z4&xw`jiTK``44#7P8)gvR{sUNcOMK^;Z; z<+!{tk?d2D+n5#lp;hYN!6V70ssU2kW(l87e}C3YfFbcxZdi5E)%HBCM?hb7p4T_K zUv*_^!k&)~#MY6%Te4zl-ta>(y&8_Qc#NGSV>aa^k{}I+l^1!vF_Y^*t5YjfG|RHw zp~k=l%KjbVQ6hUqhiyB8W8`v>Bui`am8<7no{=K0iakLlr_{313d**57yEF&(hwY4 zN$@hD?d58eeO8`h*k##0_|+uyA6 z1sN)N*3f+yYthIbNeBMrx27T7oYDjK-K5Jd$NI1Km z0vTvSK~RpRy)NDBYS=&rbYB*G0u${q&OcK(6~XtoveUpm?-Fi*6hlc+rE1a8>$2bM zdYmW8`+jx4^E5>*L08W0VgV4HkY?K=gDiBTyy)|M+J5VIY{{BHUkqhQC6O%#wdQp_ zp14YhNZ#rHZCV0nSnzsm8_B)`5HGaubv;ZLOt^I_&9J(k@%H*GS832LxJ>B7QC&38 zBTY;9Cs|Ap!gd|}EcUaX@%<@yzv9%sE`HP15PJ(~IV`xw9hD;SR%dR%QC+kw%n)22 zleOvxMbt?-g0!fNzsMYXPC%a6hr?MuD zYx-)7Ge)LA{VMRNxKObEvC9`ycgA$Sc}A9Dn82=W+EJnNZ)V@Y@pYQuE9_FCDVlCM zZCKuk7J2(Y5yhL64*{Am^&8gLpuq5yH_NZzx|vlF)eq-|Y%-XpMIwKT zjmamlN$Zo7pRQ}}QhTXZ%*{Koa^$m>F6E@`(m!VOXnqy2hqU=)3zXD8S4!}qkwuI$^yP=^JbRLyejals$XCa=0+={p<)cpDv6v1W^`4h>6gU* zFZYVRp%$5!la3K0r>9SGyImMfhVD-YoW~2@ugp6A`l*pH7e)~+I8!Rf7R$G5v(*>s zI7sB`Hg7*yy!XZUeADB7k=Io=Ote*OV?w~jNGeBQK-6{JyYXy{Cbii8LP9!0k#Wwe0o zkn6t8vP(J$ah88V`v*8g8O9GRO!JRf-ghZDn&=7|n4fhRrt~)XgwNZqJ-Ab=Jk%8V z%+yt#^%(-AgD^`F6uo88icr8?brYzP^lElSLG~aIS{3deR(`TPD*Wr82H06xjMP&1S={3zpOeLJJD={|_tusjC7Kpi=3wbfIJOZ|txo4wCXt|El{t z4gMFog`gU|s7PQ?TK;bh>3^y_O=@c5zbo_UAI!CE*=tq*?-Cq>0g`TutIhrm+kdie zb_O{JH3-mpU~N9I-A$s{F0-zrg=T1F7O4po5{HAonkj z;_DIy!a1jPoApco*3cIUk~-uRTmF;h{=&v7MG!Km$>46&(EYat-amLjk&}z*-=P9YGM$ z)wwVu=gi!xT(@z{O8{4{Kj}6MWT`ygt{-IN%eKdP36?vB?thadCkZH~rZQYxCfnX7 zLuZ&OFTOPY+lk6hC1N1oepQ~a_^){6kG-|sZVLYGgfLOAKe!7?_9^1OVr`Jc84;q_ z{sFfCAEbi@`L|R|CQIdC-VsHk4F}oWY;!v8-;SV#`27bha?u`g{8w!D#}|x(9%}z` z1qeLj{{f=VRNj*R6*GbWU}B(@-G9NXx<6>d-`RPSf35QO68|UIYc38H{_Rjq->-k* zGk*LH-hah4e?A?XxV`va(f*xW83{C^hJUPZ?tjJDpidv|$>sTXy#N1={r@}0_C+30 z(DxWE`DIkjqdwccmf)E06e1Z066ksVtU8B#ZxHU z^1m}t6aj*zld)!m-ZtZ4>gTRwH_lvKD29ZAhfJPSw4ZLu8*Oc!nGW-~9@-ZtSXRE1 zy11d8xbQCC#HZXXfs?7dl1i+<>9z<0JXstHl{GSd4({wyJ`-2Aq(I&b3#lxc7Q%sS zs zu#D9KLnh$~6)gYOyodJP^5uU0;45=`-B3=r{e#3`vLINFw{5Aa28KA?C|pH zQnjqlKW5!@U8TA@$VHa(@yD9s_*GFmwuO6rSb*_(%o2Rb(i!6QR6G<+(z4DQhdPk$ zY8PYEI9{kPwYGe6Tq&`a+3K7_YD?uXoCy>@r<%X|Tk-M&yYiF52<=5n!Axp~#hKfb zO|!gb=*4UvcT$UAwh&@;ZPA(!sc96_Jif+!)^r{tlPqBstDT^qDUN%-PRY&!Vn6O+ zpy4XzOo?1a3^lIv2f!O6|y#8lDyuOkd*Q!E0bUVa?tC~^qyOr7*T@S;*QP)n|Rcz%naX1He zG^XRLY|^lJ6)MV{GBR(7!*{D5^L&Pp;vgAw+JLd>VHNXR1H-qvECH3ER}YzJX(Ze6 z*YvcMKr0mQdxPx#+HsZFDc7K=Np0hAjVL&~x`~aan^|(6HChldC70$w;g>e;d`*Y@ zn0wfGdtceEVHz#8Vk$;ry3KJtX45{WBl5q(;H!grFgpn6=mujvwH-Zw<f)M5v}O3cEcj=h%K|B5ef&_26#s8NMC=lKT*ecq^9}(K+`iA~ z$p@K2M3ABS1ErNMiZU57Cj(*|OQlJ$V`!&=@uy|AgawUj_)^F=0+CFO)?9;Hc0y*f z%Pv+{AY27{7Z3uhD;56G`8-!i!UMrA8E! z+^o7zoOi#DtiO+?$l~Ii5K2X3^F5jt@-r1i5p#|3CAU2m0Q`)C>?z?qfRnkk5H1&EZb4-j6QJA7%Joq)7OpMWCUAjO9@SE{*bcgd-($UtSwI-U^JB zRW3uBFd_Tj)9i$KrgLqtG*?m)@>#xj-LJR0AFaAd3`96MVO?X5f2zHZsukr77SYhM zdTsvt_L$)jTVH8$<{rG>>z>{vhl9nF zPLENweS#%RCS6ytIMEukM`sRx40|s64Yyr$)-sJqjWCKP6_O6@sC8=HiM`sll}z9m zwPJN(^^nF2Mnvm2=agp+TJd)nz;-N1Nzb5?-*Oz~6U?QCq{f(ope9`Nypm~T-e40p z`aab0ELm!6d~>PXJAG6)D;~v9ub0zsOHe@U^#I8A;~NAOEj7i%w&T}f*NSGsZAFoR z1N>G^>!9wGVFF7Q41QqV@BmzoIo9E|juqc5v~|QbyO}*7=l#3Kg@tdQ3=+t$xXW3q zv<}M2$p${!P@mlS*}tOk3n7xN!LWvu#mejT(fPaIyeqdp$WuptYU7~(mW5_!G4|az zc}xp4r1Rq!__v`?#jxOW;5{jiPFUl;sc_wmuBY@pg#BI;*Hetb6vG_C{*TY+6@1Bs zZX`bu(@aC|Eg|Hq9&}w6XCRR8C7&M+S>9kbNFRuUKG|OK;7vd76U=tk)IFcujjaQv zaJCK$BM)bTwTF@)_c->3C?YqP41p3NB+j?(_Yutp#;+ z_dVWhaU2_gwb58*r=&<+V`;RY$=J79yxM8W{86t_rUAy5A~qrc{My~bffXRoa;3C* zeJEyt(4&ZrWAfx8$HwQ4?@8f;W*riR(|oJ%z#{ebD9sMvX)APqvAyotP)*Tt?5LE$ z>1KLh#l4X2yu)e?RK6?q=~iAEeIEOhaSS>4j!GjJy^M;^SuFJ&AsquM^}RTLo{nj z1Rh`vLkZVun9A0ku{MIuXAOb*%b4(U({P10^|c@Tz`Z}$^=2@Uc(v0|`aXdqq z2-pUoJ#cuj#R1lh&L{dbziv@v0`oDpLEPFNc-v~@84DiJ zzFmUmp0#X-bj8>`Xyc9hoz<10+!XQURo5<`Lx~k92FE>?$4nH}SCss$o)!&^Kt7)T zsVC{q4|pWM+q~bx<}=PUUEcp%T*z%+oWIhB`l=4l?0lI?_IcHVvB-Sf4BomqKxCC^ z_tNQRl7Iywi`|oco7or$mW)^;7hyt|O6x7PrB}=gFFEwn(6~}uET=Sgf z2UA{Az&*+lNjz;7S`ITi7u6RV&w}3}$up#@d<14LReuL(DQ)v`7nPQ6{qgI{cw1mpvmuyq108sZcPu z{TG!0dOB9UluI_Q+nMt z${H6;9xn_e()`0X%8-n0`yF7^8B`jRc8de|oL*+1`n#0^6Ep5jsUFlQwi@==rM*Uv@{8Pt>wb2=$y{~w5 z0S?rRIkOto35eu50_?q+uRcKg79jr${-WpXC*&#-IAw@MbGmJxP(KG zSPCNG$FfXbL=;ZibLAvoP$EA>ND5sqI%i{_2&g`B~j9JsKxae)qHi2}m?tyFG8iA7ps5ybWT1)C0EHDjCCE z4G@?STl+Wu_{RD$-GA2RWJ?2HhN6rISPP3Y(4{yMUo8RL)6x6ud?RyiN z(_R&*-jUH4me4RlG<;t`G#5kUY%2^!!=wDjO=>(|s>yZd#T%9GqEq#20@JTbP}f?; zO(@*(Mu||bi>D2IBu7wY7cN)+h4+b#+RU8{WHTmT8>pbBkWgfSvIPw~7zz1uSQOze zWAYq$wJVMTUQd=^7d7Ih_p=0?*wf`xo|oxWc#c9237sbp=-zg9$yExj9`{#Y0&3-c ztO3{A@GLU%+ETO5d9kVwf<}Xz2=?P>EpqeZqD|4At=BsO`5cIbsiFidJ=}2;bFNsp z(WI$Fo{BECt+mkLO;76}KfwVMl^+@uZ!nI+p~3B51%GU1IWQQlL>z_0k(@QtZ1?O? zKC9%Wf62m4*H%)FBNe;$$HR`$(CCTe)D7lW)jX14t)=eobVQ&WQD>JjPQ;+jOR@&L z9)ogVh2s9Q@b)3pp1r7hR=69gSQYcy`LtfKCDD}$Yl2Ujs#*a_mCl^+L4IZ(2KgJV zB%dW!hO5S*zrd2vliS=+LCp6ukM2A#l}Xgu55sx}h&3ERc~qX{r@8+9o>2aGh@r`6 zT9l|ra4_?CAAcqAtKBoANUFY+_KL6-poe1WVAjEkZS|p4U~uwJ-t1>hFl4wziRNCU^N$UA$wH$+dhMstT2Y>I>15_NLuXQ+wFq@!{}>B?}FH7>qHA{BMjiTmJ<@v;<0WJb0X z*l7=@>$Z90m&@?BxOM&kj8fEI%SKs?@fAxF>=qT{vzr5g2+p?MmSC>R@bPpNn?}8f zsu38rQ7ksyTe-BRKagCWYH$$^KwtDUzy=nFNobTB8Ul`Zjh0w}jJu&oFVn&-E$4u^ z)zn4W9q&+Pmb{2095;Xde14wp;QjjL@vaY|gpCwU{*>dZ#vTooeW_tTuLikN9!11B(ZiyVUeFMsi z!2GAlXyK=!27j9nFdI;R|7tJIZe%|TNZdqU18VS%v|Ude>wE6RNbL-g0B<^!1WxN} zK#g%MLEY2npPwYS7sYqd?8ME}%5TfhRwX>{F!|2g=B#AAo=-SpD)c4JI&@9LnPP{2 z^39$sYnuL|jlnULZ+Hm0dNG|m9{^tF#^l!043)SQL4>$(R#+u0G;*w+wojE zYc7<}CV0mHejpsiC_0?^404<_E#CyIv|9ni=Z_d_DB3|QOQFPOqbzgaU)Zmi@8A>P zswC6=Ph!OMvQ|96+HC-}3gUf&p+40XU&`C>mgWMC(T)jCC3Kz9r5rYmFdH_Poi+DO zwJmBscLvoF7W1hZ=9+TdG7wQ`N;EC{hJtm+f%jkZ=tCr zezs^mxv|;?X(;FcT3%yvZZ-;1ik1L<3I%XAkGSYpX4r^8%M~4I8Fc=BMfD&8$^@N~ye$9eOxVG^? zC;M)^nrtvKkxmKc>&>k0;TN1gWQMlML;^0K>S5RivUX(jTX}q?E6*2)m`EP%6q7Kc zvnQg%`v?wxuUPRkSUKt`a-+>$5N1 zrabh0$NLoPOx0b`Tv^Xhm7379#_~7=e51G;Bq3*MSHFspv@;6Yx(}6{m3@Bgv(HjT z+oP+JajY1@czfGpq}`EeRq%gI=oPuM*+CI$D)Vn})a#D=dOhynA9nwY(BLZcbdWn| z>x5@TZ)y{0LV0pddi0$sSn1bvWhId5Oi3R1_2sp0Faj;bX=1>J?+=3p1k(L?sI5rD zc4p3-r0IWxI0q_a{427i<J!vyxixsRXW1zml^w-FNz0z z$ac+`LV7D2GhEqwrj7mXVld=2>0*vhCuzy`n&K>T>$fpnHqsSj>71lUj5=c zhb9})cz&+;&?|8Q-S-^gs_GAuoac&p5^BzW66un+I?^qvPxr%;Ysv~RM3%fF8a0;U=+m1(bZI8qJ-O|dA*kBT$ zh0YVU6oIoQ!05SThw>Y%i;qw_iiSy_2mCl&@U#50NI3E9t>AHF9VnS8jigD5NE z^$9VS1t|qz#1vY+Tuu|tJLH(>%PzCb61B?$a&v=)qI_86QK`vTZ&_)sCq;mcVCvF* zmM?)sbeL7;6os%$?u3rf)guB5x1dLItLv)Qz3~L9=G;tQ&@8 zDmceW3AUb&VPq+CiPl<-Mowjd86~UE{_gha9JxoVB?T@FR`nOiv zQIZzA1zBncqeknMAtQnu)>DBw;jpt`xG9$iqP}&;zR`@m*fg&0$$XpOKW3~vuTi#b zTy;6bp^m3$dcX~|?Z28;5|Dd2yp~q~5oJ8HIUBCA{^65aDB4w)(iVs1i!OB;uSODkE@C5!$Ww7?6OpSGD{N~r(2ph7kI)jQR!gGs?E;D=4<5$InnMX2D7$BQ&`;O-s zQC4$W@W$j_pzP0 zguk>rIA707_~Ui#t%o1S-OyOd>c&Q3$R*>sTrwg8Y*xaS?K)SN-9h!ZB@-SMj=BU% zy}=G`NSd18U7K|j9a%8(OgX>MYO}O@s z(RI~1NmJq1nslpju!W4w_WD-F9!di)efZ%RyoTL6Usq8@gllfTBO<>%?94FfG%@S_ z6o{L6UR-0W>ZS7w-@vsV{Ua_uL1d2{2^=Ls&HL*)a19_m-lJYYxM@-P7U^s6x8@Up z_4H{)9nN(ueGT~T{fFiUUt<7#y%nk@!u5ugBpMc8Vn}!Vf|EQ&O6B*QhxjKJH%a;m zQ#zA+L|h7L;^8sq)U{Sqpt2F6e(4t3Zj%E!j^@Xecx1Uw|JX;cx2ZprN{fw-v|5ZE zgkX5w?pQm*e)K0E7d33DdiyJ2f~I;e?_eO+q1I`k6!>XM>yK_AF2S#LL#=a4Yo=?_ zA}_CqSlZ|;m$?T3wf_ymA%4JVGRvlH&f4#S6wWuMCv`nt0J(a;98;B zswdzNHt${!0?WFHVPs-4JiNe;HcS?+L`#>=OL(~Rqwj7!urFRnkoQoEU|K3VF5 z4Zy#WDV78XxIe}YvEB)v)gc6kLO{QP=w7!cbTRw@C4xM0`>c4e82}aBi~j=W^S$Z= z&44;Orl2%$J>}us`F_7`N_UqZ8qVW2OBAJ8PLvG0=#IqGLN0Wu<3gmd_0Dp3rWHjy zy2el=UGE;s9{RHgt)hJSg<6l>)2%$Z+PYqp(UtbBed)j_li@8lbhIcZ(JU|WhSxZT zkUw9aThaSA>8c@G-qMRKK&g32ucn9(Jcs(w^P&~RjBLUY200EQKoO?s(G@@xpb==S zThhOXybltftI}}9T|c5;_jh`c~W1WMZF~e zXanSAJv2cVUf3lIMd)XyoY#~Ytt@PJ9D`-x1ac&#qrK#AlgXxJgml>UDN1+K8Yg^uwgU9`C`TD!M9|cCiE{#HSFmXuK7Cl^M3L77r zLh@)?9@Ip*6&AuX+_pm3wt_K~=HfZuGHv|p_3J!FwMxR&V-7w@6FEGTCcfMo)R4Ig z2MWMWF*UB3{O$lsM-vN`Ndnqf3t`}((d}%xn|Z(HNOUVc#>z2Yj%BiLciW;~rKB!! zffpUHkxAQE{owpKpZowsiT>)aZ)_dqZ;H};VNLB2%!-&h9~^RQb9^NGM!JI4?bT2Nw!? z$Mx-^Da3L604M&0qFuZuaYOPqgD?o_-dv}fvJlB}v5gIA5!W02c)CA7R~U0xQmYvi z^Z%Fyi++1^jNL|j?`onhe5<}vJeRlzVnLrTs^>8CgSfYXhhXx^;g%YG)E^>EdT1uy z**?||kVG7wKc6;cArf#i;;4?)UEGtuWAM;#659<%c*F12JZ~3SKY6v zp99~jXd<0NJ@sxt>{>hm^gb=iR3Ypu@w9z|fuK0nIb_xf#2e_nkSL%ooPRWj=dNlN~B{$^^=G&s7Ve;xy<3@otZup53k^H+#Kg2a=E)H zhFG%y1bDh*YKk9<4kmVd1m38S4<{+a=Q1-<0Lq-=g^(|W&b1jf*|@!ePnArpVKD(T z$2FFe&ewIb5h?EGfVaBA@6gcHMH*4%A-U&s+0yvUaYE zbphZZ0LCpMZo9a-Jp!s@4FO6JY#KFM>?BbcwZw}C>x~6+1MDFmCYK)K2bT2lk56sN zD59z}p9f?qbu%}=XfI9LhMiVdenQ->LbhkRe>C}kXz)bHW|dJ3fA;%!JD4OQ#g-p1 zA<|hfZ{=>;1qPUJ(pf7yTa<0`XRxuT*Sh1~I35Rascp*C5>ELW&gIGN8z%S%WN9Mn zOF3ac6a+Ed!Kl`(>Q^X?Dw)*Sp3X!^-C1-%MqZmRVkh9V)5={a#$t5SZZYTKFoYsP z+v~FD+v|0vU2FM;2VrNKf|8g;G=+jlDWEt2JS=ug+c#&?>pHI_RlcQhWsCcE-A6ZBcxGYMg`q9+7fUN1rxdYv@{`a# z?re2#b)4Det@*)j-o-6_381NVG3HF@K* zc+L#R{SQEbCcbJUOD5@p$WG(z=?(B=L#ADu#>puB-Msk1^jLf-hDz(Q2HN@)$1F6h z?=m`h4>;udaPQ*Dvg-AqwaXaSJSO(u0KuxD#z#_(2YW>y?c&-+)3}}rd8#C>Xm%Zq)(iSwb4!GTN+VE`wttoouZqs&~~c z4o)v_fW7otEoV)S-{a`OU*G25Mi)5FBAsJpD`%)2M<9Tev{FR7l(ygr=AqO;h=nW$Vq9wTH`6BmdF4iOeWbTg)&xxB((;IEYQ_&xwQv-s<0NT* z3u6G7uL|z)2QzXR@!|cXNch}i3iRh~YRPF_RcpWrZ;;{33Di_$Wau_yuVbg$oVRbrAQ*c!gTr((5n z3UFKK9!`wsJEsXc#`4N1pTqo~W6Kv4u`K7gJx+=c$gDKmk;Li=_;=dnHr|UeuE?@W zo+9LhL%a5RHY#g;t8%q8FXDPkZv6#1Q&Hhh^^4~H$12eMGD325qDPrnfbs{;XrPIF ze*J8Nfjg+1{l)mb*8#cUs;U>ha{m+u5m}yXCZ5zGqj|Tu&`&e&J*x1Y&MEM&idOt%iDZSt#g*)F9dkT@&kI``yJ-T$SEA!GD`a8bQuL;24AS_rF5#gl@&ov#3F5W$^izynTym^;~3S^RM2*LVU{A(Xify? z`_Y9`L~)+P4{5R#cmWhcmlT3rOFlEq+bDRPWKD+@LyL?pnCi-t>D&j88BN%J&T-BG zJvU=xMcyNMk#;sn7;J;dK)f|m zrwL}~9=^@4FdZ5z@Pw@_eUUH2DY%yMb7{6ZZB91sMMOSieQ3lnd@+7q>*Ob-FY2CT zN3t2SSH`r#rCWx`M6N6<1+gENg6J7kmuo`GTT0mESTZIb5uTlKc?tS+zFUhUR61Fh z`CGgqjCoyS4Wb3O53EyXIet;!Xzk2V#MGx7J&oswnfCy?VB zzir? z{ny@)9>=IZpOq0BKj+v6c>Dj%n{X#10geuR1}DFz2l*aU@CHAV7KjNN3iLe$V>1BlVL5@kMJdLzW36H((ex9^mmto zfsK+;-_Va)=K_`T@58UqN@F@3*A-dTIh`P^D0{!~*P8G2-fFz`hL~QYh2jkoL0c1x zf5H>pVA2qVzF$p}fTQ1&S@9UCj(l9Bu4jPFD?}AQBWqv4>g^2e@2HBnHSIH*nBJm&8E<|2*EkW_7_`a z-h))5Jm@_am(+p;gT^vfK8s5$8u8aOp#6;C6|-$~s4jnMJ=ZaJS_<6@Bg&$q2QJ}M z-S<&=k8_3hUa{h$v0AszwE0rWu#Zfj*yXx@f)Bz;5UMO9XF0hY+Kj3d8> zCJwLdBrRp3t(qiW zo?re>(5mCo9?pDZ($B_cX7feQ%x22q_T+FZYa=+V+qhQ!H?4qkS9~}>S8q4wy6{6y zvuj2BxVoC+%}B+Au1|$J1s%gp;tBUZa9kz0`FS)=0jk=(jYMKd*)agD{6o( z0(xiY&nCmhLBjiRT#jmQ%g3N{X&5Ml+Z(Rk1*bb}z2lv=le@fWR+ZzGMeI`>HgNBOI4*l(3J~5_ z45am@n$knah&z1n#durY^>f)bJjpUHdaJFiABqe`0FsgPuU?AhRpus|(UzD60-H@E zd`vE4)wCQc@8Tp5->o#5cKHBE=GF0D8R6FRHxF6tY)NgG9EM zXTL{rCt4N#aU%wa_V+NX1$w7Bo0O~>lf#(xBit>fglts`?g76t$@HBQ;vm*M?HtIA zmXhB!_1N%_w|KT%{h&jybB#?EMcR4wm#`V9nJ*E=u4a*2^z}++G^+$mKLrE}f%j+| za;&MvGoR!Mka!u5O$Z3cux@%}x|z%%Q7X838aw$#iBxXlKyV>$i42y#b7Q^5xu$Of zmO=odB+d1}gv?16FnF-4L$k{W1MNQli$W zkCl#r_B0J&cB}Zga=seX31tQ?19(>$fp#8)d-YQ%d3*@60Hfrch=c7*5lVJsV zd4tJ5G9go>Vs6qQ0U&#|MqDN`xAtV9s((v{$(V)$T;G>oTD$W|q)0M~8zBj(Mo6$) z5uCEklDN!+@FF%}t~f(5fXSprUACR=yi(X2gmTqMWMi1&OquHu3KQ&;94ZE>X)R=R zdRPFTk3$T&IIQMp;VYK_C(->u>11U(_x)I=q3y}mk%*Mm;FS7V5(VFP znOZ~Z4jo5*j!lcvE-2W8YKnr$gHH-HHvm+?VBZ945F#BIc=xr>F9dXdDPo6KCTO-2 z;*^JpL;AcdfQ3LNe9yD5OAMGFk zWxVIf=G=e3ij-=$Ihp$HF`)c!`A?s@`h5UiJgf;trkXKgy2zhAU6^rB#oHJb76$BxWeyziM` z7eOYmnCcl^D^1=FuYL8J6jK9QLbmO%m=@LjJHJjMvqKCMlXL|w*xr5Z=lq{yQ?jfm zq^l+sVX?MPlS$9wRN<*^*W*OBEkoo3_JTT?-61yBfyYoPN01T0-o?9@0$!$X+LoU! zv?Fv~^0a8_lO8IJub>nc0)$`ezqLewQ}!`>=-T4h%;UeBKb9K#enOCEYJ(%bQbCO% zI?gLdf7CB&UlgqBG9h-jU){YP@72eJgd!0*-QmW+QQIRxUKOn&+M+OFU@x)3MZfyM zawX9cjp_V)yV`uDl2#x4q3aGTMzEE!=}oXX27;X*Fe14RuT1z5Ct0qJ{GKz8d9(>? z+)HswaKd{-`N9xhcVH%nq3Fv@l7{$Sf9v*hnPkmr!RaL55B z+bM=0eFLVX9F=FS#CeFDYIyEFwk$%fm0kIB)$bh!ojboQ+5y8dmfQWuKTI?umSs+1 z!xL0vb*&v#zM7Phm?DN}d_v#DSQ{eXs5vW4o|4EbCFoc%xm_EXV?P$T_p@Y@?=y}* zrQ^K*A5J|=CY193{T4t?4?MY5B@^h>E%|CoC}Y)k=|V+vBL1fP_a`|J!7c_x7}fol zDW)(B+kFo&#t`Uyep($o#JwEy?L;em9mH+-{6m-Tf+R1kGW2vsgNrP)3KX|3}GmRjIoCgiNQ1r3b-=+__sBz>6nhtNf3L==gB2) zS-ddZz_GU9>BKNKVm9xf$MEgnobTOVX92}CJ!E%&2I~PmuN5@s9`|vyztY_|9^{Za zO`g8(rOKH;9ZD3Wmej9b4fKVQ17FCldQr$hE7P}*hmSiTGVq#@?oNiQxX+7<>5`>{ zhOR=MCQHmXx6|SE+oVXVu$->&efp^X@n@u-81;|7v=?@+t18QSvk;Bbh7TAC@w&?5PF*X|R1yG1w(V&{I&5;+76eZUDhX(@%>HX6XoU5@O(+YDYc!WCX&i)4gA zb(;D%8``Bjv*>LiT2>u2#DCm`EOg%XmeHex%Z!VcoJ*C6+PocgMD2|2zz-KbJ24s+ zKkBn8Q$TDbg^ji7G}$As`tX6oek+t**LoA$v$*-@x2A+`o662vtG-1*H>iW9L7lPA zdNj7px@`zNy)f_jKAd8XA0zC&qP6S>7$lVVe7+UVlnB#F$x%@qug8-Y*Jvv)gwp|C z3-866BHBF4yh(HQN-%I9h$C*qoMQC&e4`e;tA3RPk?0L3vJbwP-&9FCu)~H0{cg;9`F1J@o9Po025p8wi$(Aaa13Hn1-FLtaN4 zSNm&S2f8dk{c)>ng~=0tjy=H?z#vo=cE?sY;V_+4D+0yjl@Lgd%y)%P^=G%P5g zMhY-KpHk=aX&cHGBj3+rsd?B^tWi;HQUB@4>H|97&HVt|*mLco+Xv#w&)A{NAq7xO zZFJiT2Jn=g*&YLNr9rHmEtXDRI%+30L^J~Uz0?%*uihQM8bN0+?-mD<7zfrZ3$b{- zid|oi$CAq0?b`1eS2|bVW<2{AXMRq$MAfEQ5dTRk!6>c-Y8RirHm|zu)|%n;3$R9v z-OAHOFw8lVDB%+9Stk=vnzAzO?w#JRVJx~Dh9AxXN9JdlX*0NwnTTu~#$=HK)SsHR zL5FRoP6Jrle<^5jQO2}N9TtipdyBQ)=<8=T?QuLhJUt2xQ(Dnz^_E@3P;AP|o3tGl>ewL?Um)Ogn=_wx7uv`j+-cP6S-xrLq0F z8bioh-CKV#3_s@geJCQx#ONVbq0d6LA2u7=q%b-lz(E&g`us~GOXs&NP6Qw)0+iFR zV1MR$1jou#DA>=L)YG+zLm>UBUD2T@P1Xta?bpw4R3*PVJC|3YaW@NQ(4(>RDzJe| zMh%PNMr;pbNN-%JFqC(^Ybv}6MowKR8w;9Ji~Ieb!K}I2z{y1>$n1W_N7QUV$>jM8 z&vt(4$Z}TUaDcWl#DwKT)m&uA_b#X}5HCyJS&b=mm271d21iFYr~-FEGOQXXTnhN(a*TI2Hex=MQ5@vW_P||66P^!uvxB&7 zCI`J+*D6!$HiKUh;$Mmo44}TmqPrIj zbA8llP*6TgD}CD5{BzDrr^KHMmSJasils3jp5uXMaVeg;a&uY3R-Dtdo9vHn3I$eVb<@orGcNuD_Ly#7R}4r zhHWRKY=l8y4R-d&Me(YFMMJmzslF?#1X!_ za~;+mxJO+ODCdE2kJ#OfRg>o2aeU@D>ybJ@KEm|Q=QPeg1;rUayePu3PcwBCpgttW z&>$pV#GE%}$QefCdDkVq%^$flG!hJAsdOG8!T8I7!UPyskdTnk!7gnVBoaY4( zZP0q%maA5?-RK)$7SUJzUuwl2!S(Iiz6i`mZ0-vsFtdt?-f7c>B`T3({v$sRL)XXJ z9Oh?9?NdBLzQyvVrbTJww&!RS*oVJp^r?5%eUsFmA#$cMwYSKIPvhuvnpF-37sh9` zPW$mStR2e~g^6d1%z=`LLaOY>%F4XtWukB> z%dD{>*SRj0DOdW}4Sg5>pm-%Y2RTW^@gAkw9pNg6!(@M(;!AigJ~aQM9hhL9pAlOw zjS+jG^tpShH*HmUaV$X>HW6B)H#^&{6aYuS&yj=kVgPcu`W5L?1*@aLW*o>qBV<- zu~?{BYBv;5hQm8`Hh0o&)=;QI1qVsa^InC`|5$LeI?kQRdmuSW6+z;nw4`sWU}JV0 zGzR~VTuc^+06hY=Q^cD$u_e74j)%vTQa>hISjR)O3zA_HC%9JT9MUbQlNr0NAR1**GauUz{V#s_zPk5uU;@HewIgF6X>w9YzKwPp;g1NlDNh#Cb)dvv{i>T_<^bt za@P)J&~H2-ZLtn|kXC=KZpDhq7syl*x^LxA#nKa>W1%3H$ZIjq>6f*Ax)O;Bh63G6 zKdoZ@Y8IKyQr>fIqBaQyn+jRhk~!y0_wd`=GA)-a?8-*Kl`pz)PibR=#p?V?E<&o! zgo>*G$tUfYHIsU!t*CWs&;+uGC(4&G-W!$WMK*LLb8v1b8-D(W+dc4%L>zj-!!)Q! z%8qZ|^ES-zG=4wHAXm>=+;GFY<6v&p=LPDeI6~UM{!Jg;Z#0rT!Fs&P1mn-twjY~? z$vV8}@IaKtino&g!F*Z&^Yk0eyX{ih%cAXx%+iLnriLDd=Gc8^Fs5SS=qtXBppfQ$ zD51owi3#58}_R^{$g2)I)a4NEjDJXtiE%Md|EnuiR9L_bM{)31~)A};)2 zSXS65E#n_gFn)V|Kmi>UCPMZPA=oTIrX~tB32Sz(!!sd&h$9R<@f7P`c_ha`Fi&b# zFtLNbm6>s2J(pY|d(D$2$w=HoYf~xRl7(|bibFE|g1@{;Q^aZ0pX`qU{br-*WcGAY zzX*wnstvlbD-d(iii!19QvT5jno_JjB-7`!l5xL=`w+9rtrIAGF$6$FPSkyCN%rvH zP^@Tr41n5a#=aH|aL0*hc|W>TjghaP)%+nX*GEB!-2c9k+(g5OirBx2_)u8u>;<;~ zv=?o6IM@&ww@8B&eNqET1KJckfHQGaO_7Ha`YhmWw~Xia{t)N(1eJ4P{>?$WAL}y! zTJd){Ss2n3DUCL4f)f*kx(PZ5GI2X8yyD;Bp_)klRf}XeC|GnV9Luam6cX*k2*$1A zs3cNC-=Ef@0Y(cHchVJ0CcoNaG?oT1y8tv-M83S&fcpVeT6#P%P3$Uiv^ z5Vks^zml$8eYBJ9DNLWQJP@wq3nci3s5+qY__Q(k~r32;U|-f zfSyY`)p3Y3_@({!Oey=eKBw011+UBY@thuc;kELjoq=RZOFW`GeLRP_`j)4?k)a-i z!jH1^w5GB6r+|+?iOL})3&X*XrFaix^&u8fW2M*(>qlx6bP$m1BANl-_&#J&Q=|j! z-+E9-*`b(FD2{Y7BKwR!_MbmQcq5Hl=D)02m5~{!&Gigg6N-1Z)+U#*{?+D>pm6NyK`XKhCXE+Xis+x8|_R)=H1K1y&BYWoI>0oEk4i z;W0@VBG#NC%0~%F=xgJ{D5q=DYaY@e#iEWz#f4)u{E{M0!g*EOIfEXjcV(T%xPa)0 zyPY7m3vWFnz)46dUbQH04U;-c0Mb&LvO%K%wSh~jj`xeTeuXi~!VYrBx+PVe*k#$4 ztL(CSby!SP7-|-dh;gWk(iN6DDV#r@f;sjxvm%O`5dIa<(}2FI#!)c|10{>vo*JPhvlSom{14@MX2B*+d0D4eNz(nppYVR z4DzTK`x}Z)e(oR;G71-n4+?x*BKA9n*iwJoRqQg;d(*AF%+VPH#Gy~CVM^YjO z;UWq&Pg4g!zYnYY=~y)IA)n`%7_R3}j_$hjOALDbem7pb#Y7 zgTPU6ekuoMF(y`C5xaj^5HH{~6PYp*IQNP+>KN3Ng+B@nU&cx>kB#s&Lh#q`q7T#} z0e5ALiQfkK{y~Xu+%YypNg;|6Ls?A@egS8K3F2-+?_x%I`sPqT`k~Tu%-{t3`rGTD z#9GZ12fhWvh?Vt7%i2J>tl`y1?VpYqenI7LCI3R7`7r2;u5~)vvf(K-S+mUd?@7L9 z6r0JF2G}VnsP6@olA)VWYvq|2FhC}a_$Mk%rNX@HpbvR$pHpgJt=U|2x1|DZmj^9ORj;KP;2BB9D%h-#^R>pN-YeD?yFWw?%Ro zkRNy(mcTN1VCT%ab@y+{63`IisgQHJ(1jKigmMA{^RUH>0uwU%%b&HDnQNgAZ zW?}1%YNoIQnm@}~AzX{7+2|m`iPq;0;N3S}*wli)@W|Ws|0*ys*b*BS6(NBnGG|Vo zJu@t@q@OOroSP*PD?s9!UoMOg`tmc2|A`#S5o1Z3P;HnMc=v`PcMY}O#eO_GyH?{SqklN-J@{1EIA^c$g~`C|7^ z(uABE!sq30B@J)+?JcTJn|QiE3KE$as*dseq_lz%&*F4nMsCL+pBX)JoRttk zdQNc>?@7x*?0lBw>YI2t*FJbox%D^WTrO%fuZmxBUB(4Mw&!%9(*+syb4}l_NkFUP>5*_PYgZD2gj35Q zW@nsKoCAE_b<33&{_Mhr_OoBC0ul5ixNyQCam9lc$5_NA-v)k!2lH1)v^NFs-5##Q-xVBz4Jg|?yve}dfVVAX59s6n4A1f?`A;pA zq9t-%RsqVVkgS$Z=!{0fyKZC0e%nQLz19HLFKxZw{hT#qxQfT#Zljzp{(c-~${BBY zh#sYn`podQ0*G2=g{o{f437ik1VAf0%BYOQZ16bP_7f%W(>_?y68(7OPuTWF)v%)b z9{`J^RKof!zt}-PwCI*MOD4oh`q|0+f8ygpcjte+5NY=HqF_MsHh#Ys^?g@4s2>&S zaH(Vo!_or;%>-OKXTL(R2-?o7vDhDxcNa85VQ_P_jp13I6=Qy%xNU1wdiD8cU z%N93d`9U#8$RX#GerxoC9Hv&)RnAA`|AEtHy#5XQ9(mqgI_5MRf#;q#MpYUPK!NsL zUjF?+ZZMTW{OAw5dK~>;Et&=}bO64HU*k0Sz;;z*2S3Jpx><|g;u{gmyW5X+j~ol* zkubZYQ?rw`3<25R_vg|~{>wJuV2RXiUNcXnmYh%Y^_%?7ZtmlzlZ4?H04Q|2ECu=T z(J*;Va@l>1H8GPdO5n3}fkwPNbwUh>_5J}=!iB;k2Y6|C*(CVB^C0*Mm0}x5Qi=TQ zTCBFfPGSDA#`gZKm&SxAzkr}vF&;9FekXw^&pq$o@{~Gw+`iaSWRKVTF-zB4<5dlC zCYs**`CF}BGpc%Y9oExp+e`kF$v-xjKAp6w(oQ`=s5`jp%3ev|_pkmqi2?<>_i||x zBSxrmypQ45%_(Dat!>>FsAyc6qRJ)AnA7nBMC#hL?P9@#Q!Af0llUz00;9bl<3^c^2cv5bWKFy2e&mKaAMw#m^lh~{ zZ#fUA2WOoPK0uhsQ3;Tf@~iGlIOlYRUjLkrF{!>kpDCf62hdP}qCe!U;fPh4LN0SR@n#)vvtcIhl`W~cYU)3aJXa#0OVY__hw12X@%R6A9=2B6C6(*p9W{>|YuWr$ zjp6edLe<0QUG`SfFTy_FvK?tb1?ngTfE3G{Ayip2+q0mRFN?U4KH@CT!VTlj(9?XK zV~&%}A9TH40K+TtKn!UN!3!wKtV8!aN{{bZTh2fI_$}t{-hFGlX=j&0WtyW4>`90| zY_66UW|T3*=L77iufEsAP{*I-6>i$1DkaSHq_;T;iEvPydyS2=7}$)2mt^#`+vJ&T zD*G`U6^ZdgWg+!LJcJ7`9D@MDa!O+!T?p(s$dAFtV=sS?(iHh;c*97fvqXaKCL!{U$^iQIX$ zanwPIUj)34Dgza8LC+g4Rz7s!vm8sEeNS8ye(5^?famlYLgDo~3coGu&`D2&d71OBmsGZ0)ysvw=0T4m87oNnik~s zhyeiM4ni<9ewrYMK%>&RUN*&;y!7T+IJCQl1|6n7x}q3BZwCig-(R>|>QC50+#b9U zWl22}(J3>7qQ75iH^cCY%!vv;vce(Dq+@Z25Xz-p-3-E^hk5Q@A`@Jv8EK2d2wm*( zo`*^F2ceJJ8s6CSw8Iq!w!TF%;Ou922%Mv7>quo;$WqE8pb)3>Qlwm+Xj--NoPqkW z_?d?Si{u}$aEKpv%746`k0gJ~NAsgHl)C=csqys-(;!#WRWytCfvv5*2r2Sq;me%saSpCo#d?t_yz;=$W#scjriZ_kBlcLx4tFpCQs?d&nL*#!*ZR@|zQpYA zDMs=K0U{s9R(6R5)CBr&W+;>gD~Ut`3KOL`K;Mzz9qc*yZt!6Jr2MhOwehV&z^ zKt)A8dxl_nH)c;ia)_fkY>G%EQnL2p%!emPMvVi#ixYYpYB^b&z~r!MpmEN|OZKVD z5C1I(ELGga@(XO(>RK;ff+x^Zq zOdkDIo9nlABfjV@=q#%(05L+b<$)p;!cvFL1H=4eG|DZOxY!VH3W5E`x$DQJHvqc_ zUuv~1lXW|qHsNr-ndtHb*rCIc2uxu?bXrgd*Iy|o4l`#(W!i3lM@eeXFsAhwUX4{O zc8vk^lqODN@p3elE%%4SKqJvmCuCO8KXnY8x2$=`c-=ag3oI+|339-$MTuM=tFPX&fQa)1 zr<-atpILUNq_%_l(}+x6!j;pU9I5p9;Av6+n%eiLvG}py&3I(o$UT@6Q3=)1B1H?& zdSQrwjx62-B3~I1#mr#wrTv2q3vwp_P&rg^aoUjkI+Nk??V~+C+yh=IJPV&=OB$&q zdHDg2Otg>Kp9yBfaV>e%;b#xgE{}w%|EBAt8y6LRhiWvl6w+v*(*O|;**RN+IJ4g( zT3J7r6-Fa}F5{rk0N3Lm+nB$bztetPMq9W|Gg;teLbhOqs1lNLh?B|k)DHx4b;LAb zCbe{cOyOT4u~@KJb0s=IKh$OGDk`O$yC9vW=2P*GBk_e|cl~goOi~b0iJ@X4^CSvu)Eo^=11`tEYvg z>aSj>DFtRoMp{Mzba)cd&IvCTn;1YpDpPn42_4RcVVFGzFO(~cmg7}ni#5;TT0oeD zoRN6AU`a4078lzI_QwdIQ$!1ntWtBD+MPQ3Ys;)Eedmq$99GsOc@swyJC~@IF4Pfc zP#9_2YakRoY`N4}ipx+2IiJw;;W;QlkpXsSG}>@8F*0PFn`Z726w#P85wVE8kLYE| z3e|&_#MJh4bPpmD!R%9r^C9(q z&8z;I=2voIO>W!jv#i*I7O9!_^%hloIhq9rV2#}>TL>UVLw$k2=`jlkGaYMSuwI8m zD=bvA6zOB{P9#PO0ITRpl5?fz!x-oBq#1BfU@xNgXsYQsZ(yzgtS{6YU`h#;?L|}# zKg@1`5{K4211Wyv@I`&6ZZEgeVQRQ<31C+T+Ab_1nTg*4sWY{UAm;NX#H2Kyy4USFWLj#*;5p6SqW^|v+_Vybs;jZ8V>X3j)fO6% z$$PAMJ6TWxpC?(PZ} zL@?OE74oAi6m8}<>MutC*Wm^#(L*0LA?+5L8nZ&Y7y#tG6 zu3P~vVq8He0qycsz5swZ`uk!_Cb1av5Vet_5PC|fk;<~Q>B}M>S!w3w0~eobGi{*JjmVi+aih8AQ{Ht*xrlH!Ouxnc zJdnEjwZG(gFG`7Xq*%zNK2D%228VU_X9VF3F}J8L4BompGh#}_V@NsO2jgZ(yKNtg zra6W?FETL{lfnLpQyOf2rKGz(N+=BKS{L$w$5ey``H@8N6LMX*G(b#82FLSARUc@d z?q?-q$6b-9ud0c#oW|hVg=wr%Bmxh7merwERp(Yd$T|QZWJ!T#a@VD&^tBn-r5}T+ zvdDv+|K^Gqo5Ue*OcSm~aBX#_LpCWw`X5ltIE#WgvzO7H>guv_KZaqW(r{DEsUN^y zlGDTy*PIRvyu+g8(qHbKr~{~FVBA9riI52(ew5A`5-F5gIJJq4@Vqy%=me$@!GCf3 zl?)oj@JdcgCah1!=}L$0=b9zwu2*Is6iF_hLV7Ei!4X3lwB2LMlN^_RLlsC+=PARK zl7Qyzw%d7b>q(@q5Ee1uL+%I}=fT8_rb||FiaB%3YcJaeH+_EYy^RbN2ss)&MIFOo zt-Q1P=R+J{_!5ja0XOWupvSW;E2wAMOcVJv#iY~Ci)o&L89FXwwQ-4t?P zh{?1$afZ=9!BHLPU27R-h!Nsf`12wrv)U+q@(4N@)iO%vt282I#C2#{`>IN6haM3^ z)CW=z^@_bJwXKDQ>6#)Ct3TMT_ZzB5eAmzp@0x%`^lp`rX{`N~Ey7%A)ZS298;rtt z7D~S=(3%gBNj`+wX0-p1-o)Cf{Zgib9M&g@GPH*B_DJBx$&X(|DiFGX^X0)&!h=EW z`LZZUyCWD22DS8}o3%27hAtDG?TyJsA-)4{VRPbUSn#}F70yz zk??tpZJJTpNnJEkk*#~t?l+;)Vf8j`uKm6^k7~5W%iLN5VSQI==78_Ta6ag_TGn8d z#BF3*)cqh+lp|&mLxg})(+!y{$c57-_R%di9sbXW`*Cr={91aeyYRw~(b~5i{ zEJ{ni=4HtFr7;YXeuk27@#oMLDF~XJACYf&DNW_wstU;+jKbv&eoA}^1CNS&RC9ny zEbB6Mxer~fi6AGS4u0!L&9MLe?A$-%_A8hOBH81sHdZ;!;Ai64nRW{0eXy`^;I{8U zP{U=Y)eR4v4s(8P>&5ON7j_ieJ!KCqAH5lcm0#lTGqpzqh;w-6&8bMI+J};cj#9GRx zI2hzb6%4mN$g7SDGeU6bxdPF6gemyA^78cIlpOwgX9BlGU3449t zY_L3!z-j2nC`sneH$(KV%9os9i2h6`u1yY}l}PxqJZE5HIb=D;GZ|NlNTG(&-ZpG1 z1J4&5+!?9uqyEHxNy=Uj*3@NLB^Ft7KrrAd)(jc#y11lD=t1bq3!L0jk_G}rK^u&e zyQy?M+}Kvc!GGrT)CYfRNI<)Da;rS`TgzEKuXSVDz}g%xt@-YIMnPH+e@ zY923Lfm9xs4YyF1*T?g}C6X7GoQ-7yXo;avv86omRs&0~2WKYB1pSv7ap?R)Qqemb ze!kRPvn4Zh+>+2pkQ4sQuW711SJDZGh+eElaS$YwSvzX>>M15Xhs@E&9}5Fy7v6wz2=4Vm;&(AR+Fx!Z((#aV#?DUk zskI~*+uItlPWWP=cZ5)p7$g*I>p5`-&PvE$5*@CGH8IBs-(PLc{oaAwWwvSV`k`h-;j!JZovZpB!irx)UetSz-rg4_v98t5Aa z9c&!Q zLCaXb@hpSO1E4W@dt3q`k-2vHla>1Q{W(oHKPrE9-#zQ|PJk zC#RmN0!CMxcDv2`I^xp@*zS2?uSQUUqW4{bm187I*Y}SZ3;tqS(!F4K@bBc2Hj&|u zU}oSaS%)$B1(z?8rxA#J`*3+@^#)>!>-C>8w5!BhGYP76p0>cXOZ3v5lVpI-mq0 zfm%*&5nzkfcW%Q)%x)osgKM2ycG)M1DZ2s5aoTaj>jS(RYzf(vdrRjHc?sMg z5T5I3NWEX<^econP0;!O`I&;o@m!PPtMgd8E z!k?zna8*=#O>vQ6mFZPBlM~!|6hCP;<;vIqt1u(d=OK-0{8uUOvA!83@F+zg(+H)e zy{8~fgkcKSKt$v;rBG%8k^^jsgWgnr_Uyu**hx4YU?^D2mZl05fySKVLLjlScu9>f z*9B2HgG7{Bsv0%p?LR^BMG+-!yP-u+BurV65PpTJZsLU7?T!vN#NQOL*3%ishYCwkYEMFHN`iUFtmUJs$f7_^64e zm>x>oj`N0rkgN_;mRg$ROS^Lx+3 zuzbgUa>O8iJ)NnoF^U0&6~Cej?}bP3-_Y!D65ButeyJgyniUofo(a-*qF6iMDDP6A z1hTY!9f7kDaS(+BWFF!yzAv^nJVQ6DBAHITdfH2}cl9oCNhmw7NeW&d3(vVnkqexFuse`r== z>`x>jcM#rU2cKjXa+rBiecr}$W0 z&fO~)RNwaZv(=4Y#}QQxm+x-t2b7WL<+Ac(X26^@v@j?>7$HA|)IUicBKS>!5++;V z1c|Zx5uqf_kbC4#7rx{qS1zc=WwrCsHo^60=Oxc~LsYpCYCAfjYSSFsedw43f%lq#~kj=(b-u|l2%mza{XC9c}? zo#?8){k*QTntvCE7LH0yp-nfms@xjQO?e3#Xck-+ro7#la$M*SDCoi;KO~&l@f9od zQ`qw_i*p|pFF2ph*7HmX7y56tT?=-gPq&fp)%YJJ@)A4`j?!LWIN#7ftO5Qd3r$Co zRL2LAX*`-|tAy|SIxldPGUSy77W&hlq#WTq18<~M_^c*rqgioy#EF?6j*s$?4jvr` z^Sam%nhn%Zg+lJ61<43nr)Hb#3*|T})eCb}ohBWzUGO@QeI)6Y>IJGL$6wcqg3pSt zyk4rD#B+w(pvK~i2^KZONjck_u$v3dp3fU^zoZn|PC4-w69^?GF?*=wsdyN3C>py+ zbKbL(Te7@os)vY`kYy~?r^pGdmN>r_ML2R{lZFbVu@{~kpZ0yZwUCH3T9C`0P^>|B zh*;o;piHSYlACOZA4@X&eD|?T5s_H1x-qKBOm0f6iaZiTUy|t+X*fxSnV*+elGx#- zBcJH_<0$p0C-3n}og*F<^{+Y7y8s=o7-4iAu}8PKld@l`W31d|EWTdKW>u5s-NyCO zrEq?_`C`)J*khEI&QvA5TCuV?Ii+bJ5k9`F{I7;Pd1X-!WYY&jxs+8wBP^^gw?1IK z_In(-N!hU?hDrL&X=W5UQL5#CtO(&bQUt9lBM1xVh212XG7eZ_*y|i0Rmu{Hfpyi+;8++YD4%E6Ni9KSu!w7K1e(M^M(4i#OrR5q6MHfQ-cyxv_Jf%5YohiHvn z|14-yZ5Gpm5SV{l4{3L>Y`M29|5i;odTLx#&GI~(3hut>dqgtF35cLz*Yfpfd_LyL zbXIHxpz^ObQjN$YQ$6o1)`uyL=q3l^jjOsWTEN@aU!QZP#t3p|Ez9!rzsfR+_ZO|x z0GGfz5aw@frvy%}Z(h>PFsA7&ui3+u=Ue6w%-LH)t+pJn<_>C|uZM+|MN$b_E88UqB8o7KQ za=kXihv6iWt;QeE{$Hj5wvBFe=-)-T>6iPno^DXCvlZttUZaj1+alGLxW&S(Aqg?Y z(HeYB#=srAV3dgd43|@5$zi0yjzHr$`DAkhv2solQ4>h#Qe~usCdDGcxFN&$<*l}Q z(@m~5gDb4`_Go2Lz#fu7 zKJAP;kp&ZseGIzOALdj|RH5W6P@Zxq2e~*TI7k2t0?8i?680xpaf7{ZXy1Q-5P|{6 z33-6gup0w|BKUvKAOs5-Ko%&c{S!~b_sMu_~s%_#A}lP3ngVM7T2@3sHG?h>PcJ24S5WCg_)06^RTkSv>L;6ASp z%_^!I#lL;+df!`k0o>~Y0M%OqK<0J;ps$y%_w9y#%YA=+EP=!ZFdMyS@e}{9>%RLH zm)p6F4gj&W{OdRiCDr`b;QTl37@?&o0FnP~&K9udtf`KoZ4meFaj~KFxCO`@>wv@I zQnO2r$5A~xP+k4OV>*S^c~TH{4*ai~>o|+0r==v*mira>-Qpz(>&oqWbFva1NqOSLi`IV6u7vltagz&DjAN5_UGI(u{(`+?Sn~Jx^|| z>jnV0uMu#;HFkf6tW$0Q7+2u_2T=`45-=*y03z2q1YNa`rlxIx9Wb!UGzi5$zZyhr z{fenQ2cRPJ-E%ER?*OC3m`B9>?P-o}YryZc_WiN<^P9?0$KitA!O48#KGT+M=f!zN zWle?p|6q$0iCyU5F6{go$x_t|r1Gndyww5Z@pbiP-X+++s31f5G=lyd{@X8=KE(3~FSvMRf>Gm)F z{bX{KGzRD$iz5FdJhOZNgV&TX4aEY$#Wwm@M;sS$>?L@79Drk}lA#@??0CD`h%Gm; zQ>i8g%p^FErVo@{;SsngBUQEQ&(%`c|GUwI#)Hx0-`_a?$h&ep=3#J=F=bdcjPLnm z@69!D>wUhgx@Tr$pKO$H^u8?D76g&XRotdum3&B>*QA2 zl*5wN<7~GrcRgNzI1p^UX4Lu>gqEp@h>5M@i<_cZVV5&A*`Qpqg>lZhAJK z?l~d2VO4VtIJRXy%`98A*`o8F*MHY)#YUPJ&+?wy7C!)2;@)YdQM}^J=RbzP6h}t@ zmNEOwztJ|Wq^RlPs{3&($}aN7U8Wjc2RNIoRMj&R7OeoEmy<)< zjEPMKxx)M+eMS2n_$6K+1J0-GSJMoPF^d1= zbE^gL*ra;@-Z!prmmn@RS2nL}vQQ3lofejOxm14~GWbQL z>;SY)3Lo`1-Q(ZDtb$L|1q?u!E|I+KS}Ce2PfE4H*2j6;|pKQ8g;OU=!4o`C_SDmHp&1wN6uLcWGFN*erBRP^M7poLSO>H1{d5qzOfg zEA*WTM^E#r?vIb8LIFn-`2-`mRMADxFg(Zb@eG!6(xf#Ut11J)W!cxdeom>= zwy0sj8X)&x$4KyxWI!Y~RDM0TeOIs!NC+&$y;YQ=L*ITw!A7k-lAy>&^WvUiTh%^V zDJ0W#d$Xb$=|at)WHKDVHjAb^wY_k_grGt`J*zs4%H1|M-KifXh)QAbNCtBIXUMHo zHY_zfr9Gux-mL-ZcyXuly+$f4mq%60hUIlL$*`iRWJ1=XGu`|C_s{UwaU-o63cIdr zF>8-5AkrBA>%)i%L5Lb-W2ZRM0phnpRQ10uwv${dVxedkU6)M zk~;OcXVpPTyo!?|f?1{wyOMQtk+}_^$QGBbCk?-jTQ$_?bTx-co$;7-CpX61{*l0D zw^rxz`Q^PrGhv(|_V_rBb^DLbb*6OtfgxKD((o`hAVkz@31@lX#enWBgnb3@p%X)lI0_OiN2eDD&m-|DgBCL{| z?Ix&7qxS*1CP8IhM#QT5uBj%o$Z*NR^9l$Kaw;-oEIO%@T>s2rd3~G~Ux+r*OpvM6 zxDTOJYc5k-*o>xVnm7ObMmKT<@V-&2=o4OAsU%e1DHFi>vCR;Z#nb>lWGuYZ9-S=M z)=CKEaKGAd=b&m_#_%AVbbL6|`{a&IqWY?k>R0XZuUgzyL&+p01*Ne3##y3keNrbg z+fm0drmbY)u&ijicKhv!!$M@gYxb}A99OB6!m;e$Ij5|VwK)v1T20k2omWMgi;KtD zwR+LABJ`q8DLJIZFDyL*`16y_sEZxBK$rMjK^0AY4LbY{ij;1D97byxX(Px)Ud<ay1P}IElz82v;qN>cB{mq+u28=JNs&5g6R^nN8-B+eQ7NXR|l17 zt`G#-itt5c7+ZF22LW1-l;|XXQLuRiYqk1>k^16JqfvoYP`0;Wgf zS%%8Io2D0)sUxEW2Tg`Msnr;&Dr^w>WR-6&d4_p`vC|uF3F3I#Umv<-VMpzDC&Av| zp6Uvnr|ip6#lw()V&WhI>Of2J3Zw6RqPCJWGY7l$#bX-x#H~H0s~>H+f(HXxBDT}Y z2et?anF~?c8zbWDjJn>}!@6;DmpiuV2@QN@?{~Stepsbs*I6%sHXf_aj_y^%wVkM{ zgl)c{O`HdO)G@OG+F(K_jF_z?m8Mm7GjS>7$SW7a%9w^QNox+JB01c#12z{8knh|k zI+KuJS);pski}hqjcp`^b*syzww90WP0f12u?qB)N-QDsA!hbRw$()Rp6!tXuN=>{ z$iaZN3)^9G#TZq%&T;SUN^F*s=v%jz5vEy_rdB{?;GF%h=C)w?Qwmc!~4-!`htD}@TF=j9?HxPjKeEvuaAK*Fd?Wc(T#P-Yj z<$i!dBO;rBvA3L=<&av~$MUxg>`|xJNw)qnwOjh#6>-3UgRNvH62HCgfCv zkr2SmB%&t|e;v~&(FSU2*OclULj1ZfULl5y7D^e55L6Y`z`G} zL&n4ETUjQ|3Ac#u?k~0Lxr+OO2%+*IwoT0ndCShM!9>MIp>(`8qFRkyMEt;N#!ZoO z+9`P=^G2AW{aF=DEe=!^o z?_>&Vq+0Jt5e!H#ZK}U5Z%F$bm)tg@MRfoo1fWSIE`mXzgT}D)Gl(lAcnl7oDJ#RP zNGHGwLe7er&MV5}d;ncy0y4R}xHOt>JRzfEMG=~-pXx4#;DPm!F3yEciptDEI`$A9 z0;%l3;NZ4@h~+TQg}mq_Pw+Sd;3ZweJx3fDTd>hx_w?l@V}I5um7)=wD&{kC^g5`bi!i zu#moY*r)uz5)05#G$4?m!EXuuU)2K~wDh0u!e@*3^?%h8!gykT#q_{_t^ZfM7vSnE zqcMg5bK(EfSP_s}lo;LN{{QXdzXMl)X>&mPUzx>UAQHG`SSUN9{{)@?)9x=I#%O%! zhz>y^ z?YqtWfX7-E8^j3-!G?mx;8;9jw1yAyjN<-e(Ac^?^Z&H>oqtVsO}A21KtYstD^+?& zsnVN(h9(5*O-cZRph)j1hz6yGAfcCtbm^T4N^jByq#HVfUIVtMshhx_z%QS`W%i`10pzSi?c& zDa>Dr@BV}{Y^N7q2^1Siu6!1%$sThb{4~k)){8>}Ex-HZ3SET{otf)oCKXUM+~4jJ zdR`v_JD42Rf*8rd)q(zF< z(UY(RGjmxMO4R?%M@<&?eLgD5N#(^6PBn&18XOu5@TLWVpNWzBa$4}zo1d}t!s?InNv-(G4F`^;z=LDG4h%(Db#IlNRgh8#`+%R&! zUZ#7TG&Ll2qfb}R$DFtC7}`DTwnRL)_3@p0`9#Xrs=m;JBzx{Dxj&L;*6v z%B6BC)3Rj9wDSj*Skh~qGN&8~*^LJm$#O8a;37p&Z`Q396W0;*B$dZ(hl>ppn~V3q z4V5Jo1zVbUS1WIYns}RF*deQfam8$($LWYpc$%?3e>506jOySyV42lN(5Wo_Qmtdn zEx+ZE_F}*JTA-V1NOJUA+DG&s5se$;8DreI%IEoJGr4 ziw_V*BPzhAB{YT`ftKX)gG5MDPVQ@ILP2?~?vAnF3nrqFRC341;mzjSqL2GdwuDI8 zsILPLYWZ5a0*J0s#+Yd-LF@(Ou>u~7DLm$b<34R@0~8HkY^izP3+)KaSp#qO`WAstwR0SN}w{T8f@7h$;DIP>N@)GV&f=a%}O9~4E z&D3qval7e#ZqVoiN*?psgHu^r`0^6{SLlJc5E_~US>fiMK%P#UznbVXj zV%AEhKFx!BLpzqebe+3m{?%T8^{%j)Jc+m5uEVe7#@Nby3PwUqzH34eo_DSuUkiD2 zQ6TsoeAgv{yj&FTRcPS~eoEns_zA1!yhov!VlCskSDKOPa?DC8Am3%jiZ`E*U3jd`Q?AWBFth#?&37W8GlcU zD#Z2{$crmdp6{mx`nqC}Zbu2&&tORESp5!qy*A$}+(essZLpD4d{6$zb=d2^$;N(M$~KFv9at%7}W zv%~HBwNVARNiSgg-$|zQa}7S5IX|o1Y=)g5CyZ51qga|2JvrDks&8_DqQFG;Kw9Ko z5K`}}YHNtyWz)2G1g~fP^LFU8TZGYocB-)NJ(@Fs>Lz+07&zH;b8?e@A0lzZE;~cO{xnPbpVD2 z%C}7WcCCfd)MK7D4NJ&JLUn~We@rE$FFuR9F`%5z0NzW(r8zj6JI607U^7&Fy47x> zQnI?G*IqI|pxJTX@>&=5LCcQpPd#OBG6QXZ+nyV& zkr8R6TYSL|z6IHz!Y-OrR2J%>le__kf&n4u{h7D0)8oYT^0=rMyqXIRX~X)AC-0Q3 zhj_(KXOEbh9oa}oR{+{GVUVO+c|@$%d1A48sX_*r%=VW9{PeVzVV9AW1fzJnZX6hg z=(Q2Ub06~$IAn!nmq5?&b)T49#MJ+Q3q9_^qu-Z8(#ZdKB!HoSiB8P8C`B}#5HLWJ z+bY0->~-)W_?Pxncccf_`hqS(pVq3d)Y#9p{fR-p?Z#o!z+)$KaV4d9@;175&gqr806s;FSS>dg0N@*8`A~)WZ9NS2{` zLt=HXZSBo9QyvB(Pz|I69>kf9^=g#_QH#3V{B>81;HaMco1@vQWfEZK!qYMfM5-8& zie6jE@7bs?lOL%D{Sfh80By#V^|AOJetSrv`_M@tBu-aS_RzYFocrD&Xezu^829u+ zMhHnmET26TrK$Y^3UHMQaJzy(@7=Z2B;&PDR~gwgwCkVno}b$1A34xR1#MkRi#lmM z@i$De*-Ni4cPp3K(K`Q@ez20o!eVIEKsa;m2@lSsbYym|$qbj^ODq)^cyhiu3!~su z3%Nx4hUYuFb0(aO$V}b(dvIzYXK8x3SX5%^K)G2im#%k=;V;LUX4W(7{e`4J^wU)l zdnZe`$7Mn16VAI!o{Sy$PQ(uW#N!5T(eNQl%{M9;DeUd_{YgS*$g1;Ez)>Q(*aSLk zBH~Z^r+6{?tKY{xe2dU>v*AoOo-K6ox5JJ<6+D}8#jD=xP`1m(_!BgJKEbsx?)uvt z>ER`*h@#xvuzRvAUsLzzWC7f{2kaq)l5CDt-TEzMcXlvR0IH2kQRoa96`tV7k&o_X zk64_oyH3s5wx3$KNV`@TRHl+p-8<s`X?3mZDLxW#J6AzkjP{BT8lbdB@K9Ze zxYtYo7xP}k)aL7SZz@B-o5Fpf@B^(9@`Ac^I-BgmJv3*ahz~rF=T=K9V(yy4ksV*g zTmXvUJVxVKZr*K4k1Tg@WM}FO$OFOBf-LJ5!USJo193p;9wSjS;S4?Vs{~-S>DG5W zp5GbD-`L3fPb;dvwu>eR*KRqGR*R$d5D8pfb?f6|v>)e?{Pp^o zO8DxmG1nkC7_pKfx^lD!g=C?0kVPHw8i!?ll~J#a14(ml6F1%bu7CSaG~1IWq_lzR zgIUY$T#!yR9zNs>RuHwa1F}y~c`Ga;=}Ol+l4>UrBSW2LgcPrC02IsW@RDXW9wxqY?EiUP800|^N_F80ac|cd}`2DDgnM^4@u@f zK$2_3r0wdvgm91ywa28mDwI)5iJwfL*9t(IJlNPmAn~0)r8UY(06I>OzNqj^JCjWs z5TBkTX^Vbv)ko2VB`+gE!@d3!fVuD#Szoa6VL4C)iiEJN8z>!oT=*NY{Z9f(XY^f5 zay%{l-mVyPzCIi|YCh<^TDY3~!4?>#64}gGnsy~N^%+vT6a)U&%k)dBH{@QHh~3mh z?A9!zPtOiuY5w|Sw{on9O@ofxITFRq+98$!!)qXl_R97Fcv+u7u=GnTZ16m36)W^g zcem7*D=})7AlW4BCJ@NuQ;Dq*%yISP@7o#ZS04c$L)~;Fj}~(uv=| zSD--w$A=HzncCKXz#FGC9{ninf$EpV5?Wu4X4geLcu`k47RJDB#*ZCowld$# zsZJl%F%vwK!S+7*5zaLroy}*-x13*Sn5Z4o*l+p>=&mZ-zc8Ra`fGYT^R{GEFP0c; z`}rdV>71g7_`(+E->-J+Yp?wYe#(Dgo);+z6-tI?$%jf(4SpM2K?l={o;?%0gGoJ# zSR-l-FXWX$Yt-LX9<}8p*KU`Z7H3R+eFa~Q6Tcy@OC%}aAf;;Qaf!hU3>P7Xu)a>@Q9=!Ad4=|Dp$V)m6ZLEtMxnYnfUCK^&CGxHQF=Gv`1|j#@JC5 zRkga82{_32kAJ(#QIAnyB?ohR96tAZ@?~709}u>k+Crzs`3DXt%ChWGiBztC& z{2EL#Lgx@IMEiMePWRcKNUz zws#4$`nW`v!6bXX9HIXRBgd^KM*1T>Nq*;JA`8J?sDpx^3p%w|8!0*UTXC1GZIc50 zj*E)W=M1IjZnFCG#xdzd?}+H;#wA!zQl;E$%%jc>PJCuk7L#9gH-GoX0yxN-bU3L( zU{1dl>v2x%r=^yBfjO2K((Ze2jsDJBB{mj#0Qwl6)~i8UKy*o3uZ90(xY5RX&@>&V zgx~%(awPjQJ*wZN#$haAXMe&nW4Iz44(XNeoXLjgir-iEEYzm~VBki`d*Tq!yZ3A! z(yo_k(rSzA(P}FO&@#VNqLn}QU-dgUJsgf0Zz zI~gI2$Q(90Kn$9{R_njt_Ath-ukMeh87&wa;OZ5pow*cpb=#%n?3gRb4(Z01$R!KR z#*q)04QeiD>m6A;v3E1Mo_Gtbo;bc<&wZ;!Z}-Oy%BN%HZxxSu5mbtD2<{|71idLQ z0^k)VZHEzRf|#mFNhX8U$IQ1>+c(&J9q&B2hbXGyMu0Sq_ah6d`W+ow$~%AQ#4EP` z#(6v)jD4Hx-TXSnEmz^I$c6vq0{++RO^P}r_dS1aTA`9qqfWb)qy33@`Oc3o7h_q} zl3Z3?7wYgVYNeZW8Q-xrS)A~F`vkRqnbn8=hw~c!-C`R3O;zMLO5^AjOt1Dn?RugekdE%XD z;;%Q)OZg4hehq5L!3xY<=7X=V#hEd{SCLUy|JJmRzLB_8spl$GQX_LEfIqgSVNyN87Ca4Blz)=~VLB zLrO$sbiGQ-@7Rg8maAh9>J@gImF)auh@^2!E6_Lfr|5wfZOtZEj&}-tZn$h{6dL+T za(n6sA(nTrZs1bi@0?)KeY`I*c;Xq!FrhQAZbhLB@cX9f@4L)TMkpsaX?b)F?}X7h zwPgl&uIx^q``Rlfs;lsB-grjK-<2+?kzhX8m&lX0+eDy4JsSN;9Fx-}Q2eEbjF6fC&W7Rg>M4V?VW=XpAh!oEH)UsCU@y zw{ufM6ZJzNB?eel|6dM0c{wbR;5Fr4+#u+E zvk}hAI<8Mcg5XD^vKtU)-$@V4rutJ)*AfGDK6`X6UV$~aajG`L{ZAX~$#ql6<=j94 zAx?0{+rz-!Q`k6HT#OifWyGLL)E7v)z=r8F*-glP^<{y2LIe{&74bV8$uO}e5n}b{5!kc&qxIBuCawUjQJiF?e+nqh| z=4obfm!`=2;6aDYSa~aAw$kU(oS%d?Jh=#u|Nxn1}|XVZ2|;n3ntDCCqFo#Y`6e=8iAh zA51pHFXIw=&^I-y&nEk-VLSLKU)!{DNh4?@hdOncUWxQ#+O%_*P>+s$#r+~CN+Pn4 zz=^vtaXOQ|36`tpLAQfc?OW;`oXW>LIMvK{4s-gP`fG$b8W+j)jl$9M$8HC!iwRNp zaI4bK`GVgx3cU1ps>U2YehtiG25g(#olp&>SG#uF)w5U1K1sQt{YC0G-%xt6ENIqx zB(FYSf10=rs5*7Vol>=51qCMf0StphF-cLfIPb5O1xhPL_87+<9<-i3Pq1H-^|^O$ z4s_;)FDON}XC0CaI;-_QQU<^OF`DvOWUezZ97*e<5Gb$)bVAqdl9Ex_maA6hh6cR$ zB$TFmDxgZ5-evR{aEa_M1YjqF&QC%5y3H$rCsbD^2bR>c7OmPX&a4B@Whuw6-Jj9o zWFb>zOg?tBd&@LxfDSp4niz22DNy|ILbvgCRprv3*L^}}cukQ8+VyWgsJ0XmEPb8E z0^BCO@eSKoqcR;*J1P>tk`wZ0PrsAgcz8|ln^L#m=1fxz^xk}6)Iayx(STM|R|!-w zYa`kKagA0Qqj&M%Jdlo-j(Jk0#%1}DNYHM1Mm42>z=2~1mhxiJ(M0(SW z0Xwb*Q}jO{I`dKo11R3wrz4NyCbN5}l)HF^YWKFHXkKa6bGx2#!1b7)A*Hxo9k&t4sLVA;+e=s3A6nA@gQ zx4xd*0xZz?9V$N2n#B+s|ft?9T0JK$mPl*S&Tat3nAvMW5;2Z)n4#3B1GT* zeTr^ZoK3DrtqyPtU+ZQP{xWxN&W+JkILd}60CMCX*ym?g>RNI2X8?Rr$}i+CYwM?Yek%77co)+J<) z@@ce;Eih-5{^L5pm7I_^(6#9jEYtd9Q$dl1WReUW` zd^Fl=hUh2~4&RFgU9gJ6>((}WsNn(akcs&y#f`Z+tLNCWz2ds)r849%bI25-jCQ>6 zT*ASkiZ7?TbfnZ{*|Zu^OjGb(jFo_jS0iypFY1P+*8}+B1jWw{L{WoKyV;^at?rs~l3m8>hCN@S5HmiK==9XLm|er7ar3tRRNEWbc6qOtcoDP*TOsBQ-jbf>5Gx!JWD2*ug>x&*c`WZ$P45uJN6M z+d5dfLY_bV)Z)+6ss)KaYvXFAhVU_ej;0ip) z--l04>niTulF?L4#Q~Bq!qt)|#XVO0ehV#k-<;JjwVN7!c}}V?|2`PJuf|gBhi~vo zWk0D+X|^NJk~7s@>HQqUQD1SUpSu6!9p!5Qjz0rrM!kyX{ug?wwju-`^Opn%Wa~gY zcz|DA*5h{r)4H(rM-F`Dt)la<1bI@CXGA1dst*aL^B&0B$m88*sLN>4i#S~cpV8Xi zvhm9ci`zvz`c}>*Ey*|gO>XW8+r%Z~eX7(>sh6n1DP|V^6}J2Zx;dB0)*?+W?JHQf zRw@YyhF%ry0v{j}D2Vnbml}$G7?Ue&;^ZbY7kjTWD_=D;*l&n7_Fg=1&3~SkPM2bqLpb3OjesA&?%yjT zYFw*HB^q#%c=QUd?X&V;Lcis!>k2A+ys2DBLV{XoQz&(f?+G zT#mN|NvUrKd#vyq;X#<5(gT9(AH{#!j2pGF?xt^I&6qR&)GikOAR_6`97FzZ_nZyS=AdK=#pf#sp z8kbFEE8{kSN3!E6)j@WMGK2~Q_c7(S7VnE{otCP3nV;$4fE@id*Ty66%5O+`CJWBZ zGa*s6t;2_NlwBCS;X9C(-sm9udb%0&)S0x}zd4 z8I{CdW!Zcd#=r4vhCzC>4{_i*@q)q21?cyb0cqBT{HzYzY8%6%iV;(biPt`HQs;tU zo^U1$N8+?FzAj&=M~AcqSA($Vo=34Ot_2G??`^le?JAHtK~RJ!^uwor^e*rKFb>$e zScE7?I@_T1)k6i)`(gWvE>k`CQUwbh5P2i2_!^Q0t>okT6wm8dS1*|PEP;p5WC0%q0+BxrA5>#LRi+L17T?;|s#v)hJlQOnzXVv-j1OpnFX=457*3~GsK*RzI+{< zZx$zbq0>g5Kwb$l{Py?b*~#+}nCO{H+i>T=TZ9rNn;Xr(* zN32ioqQRi0gulXIR++)vJoS&fw$rMzvf- zZ+(4952D;T->1n2tmH13+xPtC9jJ0WEwOK%f!qN!L8x#mAWSu@%E3P)55r z=XQP{EVja4f4+f1z38>~TXVhFgPQcE|c1Vy3*ihlE8yAym+Ps^tcY{L_? z-_HetXpn8sX+*Q#p9Nzz`uWc+NMf28<)f=BqJ)+5#|p||PoCL6<;COJk1SfTYiG2{26|`vrZ=@gAwGv+0?875e;p34VI!kc2e3NbRHD z!3Fx6g(*%-rF5E$`qSK3hTOg^4Zo`-leBakFTW%rIDyCad( z1hX#e)GGn6KfYVBrVNMVUBU_eE;d3)@G&4UToDud)djH5lSr+cU4 z#ngh3&Ax#4q>*4GzkK$i4magC5QUOP+y@!;r-wB5o(8CAI3H90vR(uQjJ5jvBCx0m z`n8O3e_raANr5*m~otvr1ZP%EY89&(`vrXFfsQC8fmnC)5v&omV$8TwiWbxt&8vT9`1=zgnX!q1Cr&f}V--nf2 zsG|aDFX#wFSIBQdNy}5W-!XPag0$m}pv}mupuJ-}BiI!;xiO%KR=L8bRPiP`+;oO@ zrF}>^LGWxlbVPMLm&d>gz4(D@&P1$fmI|aB`EFHmB#KN zUuZyQY+FAIHI!o${RyvN+TxhZoYZYd$(IFMq^VTX*=f>%yXvR!*t`D9%msw~^OLlL zE29QmD%TP%yfYG2Hfb@B3_AqJhOZW?8m5_3Jf#3xTyp>>dT`oslUvcEN1)J_leY0A zjF+QrHe5j29aU&&mC(g~E1Uwz_LkurcS3q3CN0E^Jr5m%T*~uWCp~xg^dz1Z9CX4k z)N*%aw;xG%@S1Ry*##|goi{#Za$+E6)&chSLpQkD%PJ&}?(N3j=&Adm^y1;4-;msc z%^$Pwx}8~HYT~_&b9ppAZ4VyCmbAaxnPKgf9{I{yx|ZUB8jBD)cA9T`8_*nbSCw!y z?s#g#TnyI`<-7gHZiu$Z4z*^?Qvh71 z;qnT@5FUZLqPt<$`Us5x%V(FdEXJe#odOP6cU4FMs*-cGKPw8YEt4BHE#nw{V@9Ea zb9g`a6nGu(i&Eb$N3_%35MI$iZKtItL2(1fGMA60KR~h6CL&SjTkzOg*@+@wU?-*A znBAGWWdFm!AdKM9G*8;$@^`Ok2`JnC?2n$*psC`;SPy-}WvaCvj zn>VbfMQ_a}xz^j{n(yi#x^Lv~uCEqzSm^p1YZh@?Tmz_e-*li640|o+80NZ`{iSR# zhfAacP3eE+sri&4CqOig4q?TT{#$fN7qDx*J) zwxe_4PAzmjFm!H!aX$^2`z4S8qx#3L)|iVZj5`MRkp592rIuivzPQ9N%2+rjsn^T~ z8Ic^zOmyhbRyr)P+@w6B!I#qEk!?h0q0y0sR5y`G?A&)gybABpV0yY%0)^k0knuV4DFtNi~OPzZ`YcEg%Amrbu+0)LOybRHI|Sikun*~uq? diff --git a/doc/design/images/graph_construction_example_forward_only.png b/doc/design/images/graph_construction_example_forward_only.png index 14805df11fc09f64d6bc17f5e969f1400d615148..25d19088cbf0b5f68cf734f2ff21eba8af4a2860 100644 GIT binary patch literal 32270 zcmY&=bzD^2_cja-Qqm2BbO}hO2-4l%Al;oqH_{-fba!`mNOzZXcf8}hdcVK-uYvP9 zXV2a%pY^PDCR9#F6d4g85ds1NSzJt50RjS00RaKo4-X5zA{uT1z5oH~pdcy)Q8q^S z3j#tALR|Qxk}KpBJtSPIz(yovm?%kw7zn-zfvkSMZ9ZN;J~>`FUXk8kD0R%`t4lSh|SGh^>TWxeid%lFa_R7idu;&q~_F;SOEX zZr<0ROh@qYY0C!_x=@~6Sq}-2O@IX{lJyRK;ymlYy3~yKECcFsE805ibW+4<@k2~7 z33kc{ap)Z^vo)RVF&>c&51D`zskyQ!m9tTfl$8f66Y#PIuv$#{e}x@S23V!7@Ofph z-$?+{mA+%KPOC1dDrq>b3-mC{juh7MR^JXH5GHmPGulZIVQdiGzFk8iDc9@}AWoTP_BRe3 zJnm_lDn9p+s}qLF*5>Mo+0u7>O_aaoB_w+Lj|TrSB|#;4*Y6$PkEH`yAAuHSG#Age zV@Py(Li2m5Yo`1ML62nud-%g9d~h~1ZCl%H|X~}Nw(#R2f1cG z3GS#CkAF=biI7u3I zm)%OZtf31=O4pnwCW9|sDBPeJ_~SX(1@u49ybBZDMm%b9i=ynXf36-6Teb^+Y+rTv z76e3s-pfVvZkANNlOl!2fxI7zRU0XdzM{w$Iyr1R`=;qEVP!&yF#- zYcA7}BxFF(IZd0;2>c!TZq7t)??!mYhvARl=f5Y_M*jD_87W#QAUqcf=@1ew>66Zx-?nPH*)vbO!{ z0JSvoe{#SK7NkZzT5beVW$bqupy!!y3de}Y39D$9Hb~sLD+Qx4Iv=r)hm53RAAY5ziU6r zHYb+J@5Lgq|CxR2qc-NxIbEL>li4PnRkywL0qYDH97p|5{-+5$uiHh*o`d-m%!U~4 z8b}xmpWNd1gT)!`X6M>pF}g#%to=)GhEq)9uk?zRlwr@MqMLh}K00$h z!nrfV_hLkeY3kR!J>>fjKmHN^Pf|JOEf9@D+w*MlNB;=3<@w|??MSzSLZEEPmOhTI zXC-n6MITC-6Ymt!3j@|@oZVL6jupb`UOY{oWmPD1BtG6$c_uTfX^^V(CyB`y+K3F) zXEx6U=3yq?2JhF0T?px%Uf?WI=%EQmmS9Wq=Qk2x~ORtKhNI< zs4z2#{gWoXqQgIWseaz-JkwyP@OhzO@od;?Ty=;ZNM<%D@qVry+)bclCD{3G;_KIgqS01?x0hPr}z98b5W^Zp&<)I&Utah*i+ zYFy^?s_kGB@2|}#O_C-f$z+FR&1Gu-!n=6G+`{dDqMmDzU|}R2%qV4|1Z}F-E@5Jm z6Gkz8SelnJ%Mjg|_Ty1?bYypvQ_!0G#7DTnaHckDRUNn7TEHzDtl~DG;3I>-RdwNk=~53#tLW>MD( zg6?o4=Yg&m5u9LV7@^juapB`SSjRX&t&2NVamBl=x|tsOn(@7VjT0!n&)7R`K5eT; z5OW@of>70Vni;9aBJA$5Te7N8n6U~%O{3ttS@k$ohGB;gd!_pgRMyD=6kNncQ(WGU zqsh;=E}mmiM3HXyJ4dbOp6xe_7Ob9aWS@eRw9Ys*nLPxOGx$8dwZA;jRX@Kxo{n2q z&(4Q!t$IHmkJJ^HH5D1866Hij8pxgVj4>(Z<5!PyzcXVqnbVHEKkv6KFMGyf%#goT zm0POdl2s`jS-uUYuL`NeXc+1LeEOdKKybI)sq~X-P`-IKEcDEKDA+dxX{J;GDOTN9 zy+J5M8kIhI6pok6R@I~ISWE2ldQedL>}bsgF*I{!0nn-VPXd;n_*!HP|G+~~2-;s) zAVmCFZNexgfURn=J<{V!fuEr5Cad&qA?zcRnq4net~iG3&R9B+=|TLlc4ThtS6l04 zKmGYC?CsL}9u1HPEVADmfi*)-dsjDW)C*#HU7(~v?M5h7M#22xL4YzRYr9SJF?%J( z$5h)M6T*90lhJPpZh8E5m1j*0x!8w-Dc_-uG>z38;Z?XYJTC^{)OOG|end~%d)qXY znzN!P|8HtJ1Ay#tQSC9+^7jp2d}bD(*@)3xd}h>AzPi*|uV~0{?ZitJo-Vq& zNHVq7w2~Yns|$KRor{7MF%*~_2& zN{s?iAV4ir)&u!dm}AaI^S?c8*kF;b1cnB(Gjw%G$lX%IbjM#44GWW1yfEYSzWN~v zn}=Fsx+m1Mhxtz@z0j7`Z4lP(ezRDdux=lk$%!6*OJB|@9Ge?1rHrw0IgXTeF#u-4Unfy#x(STcliVNsh zniR(bIheCp6zEe^XdPWrBQhaBFHh42%rX))6M_5fxDnE^oA*7~7Oiz6vpSkatGqaH zH&Hyoi?vAER}V?wpOtZWbRUatt-{yB-iD#g7IBja!O^4)&a#a#7Ph>;JhD5VyK>Dl zqND9VRgS5roM{MOB=fpewdj+$-jzA*jV0_P>8{F5xFg6TSU3HmBzU|_8R42hf!7vQ zV>}e5326CiO=7U1Dty#~Ej3dph-76IW)l!h1lPTh;qF+eEKTE{nkYJ}{kE84so+=4 z2x&1r#)fqOtJPwSlnP@VL+!LePyv*#_kHeyagsJm|3XRt*=2xqzM1oy*X;=V@K06@ zDwz?jjvy44+4JG`?_R%hlXi7#GTP5x1a7qqHEQ0ocY&BW)+{5O`fgNez_6$MeXGoA z+oN^$f=R0JiAM%{@mG9FC~8-xMLCC{FOY=&!FxSA@Bhig!l8o6r_C$`s6mDc zB}q^B`OHaL22zK!ee{(9%-Ny@m$z%zw6qs-M-e@0;nx1;{;}>$R*cBhLx<|7c&s)$ z{^60i5XVeD4|F3qDqhya7KLn?UN`Es`D4`_*GdG6BMK-*d?q(X3zYDKA|Yk(Da<^( zEW;CXe**|Zf8c(CP|8z|PFe63{+w7Q~reI%4(6SE&Lr#Nq!5)*5x2e7T zTr4w=IvO`#FO6WVWS}_DozEWYsqI3vyjW91TA#6~+vvK_^tVtEL>D6gAgk#4Q(`dP zygc^8D&QPWkg?C?48(|rdA@qt3OBuIwli}bf572Cno$*@ltN3@m{L!fnu+bIodM&Q zaBJoNpfcnuI=nOp`I`i1;he6Eddo?RqET*w@aHH@0v{I6uG81o7drdnS|DBdQ7l@T zN7=mA8L;_i)1~!Lljpen0hqt{f9fy=_<*vGNi@LiQUHI-U1c!wfx%$x{dL=8thtHZ z97PKSStTqA=7JSR|0&;%8v0#!F8!iz?sD@Y``@!=U7>{tgOK&Q>bF`hdrTnSrW$G+ z?WK#j-(aS4@NsrUzj7KK*TgxDX_(-}xU8Z#KH10gVqa!^+sz0)(lO>-bN>%=PoO~q zmG%6?HqJbktbpKTfAM$Nf@x2gk|;%IWa?5W+w-Ul2Iv|XaPn)a&*F}5+jYy?=dgI(H_5Y zqcS}XOMyw+yCq$kR#?A`6;&HuD zR(10O#+$-8WE(vgbZ$+-guuxLyX_>IW0;zDg(W8qyZ_|#4qZ$b1T$zdDh!M29Hr9} zMM1<HEA{UE;GxF_5f?IMnYuVU|( zQTnW$@myB=U#;aSL}~}dDTbqL+HV6qPMwD6qm=S9XfH0uJJFpUqpNYRpf?-&pDG5B zIYL^Vss=t2%#;%HHN6Kc>IT~VEd9G9UmzMEV3oS!qauRo^+_{uFO$jsdPWA4i1l*- zG*s^pkxPRZU@wi_{^=YTFzEMVqCmcqV&AS8nDwkoui&2czltp&(ANc#9+}*(!4hULrGY>2OUb(9G7C7eXp`x}^ZQ?fWeHl-qnXE5&09Z^48x3KK;(b|Sf24jUW&@u`p1fq| zAGG!lDFFfRTG|1tT{A=rMCB#Dm^TEnnl!vu6D6X!xZeYQ3TkkzVrn6^pVJ1)3LH;% z!tNcs-k}$iw$De`YCY1uwEd*}uawXA^{rLS6Rqd~XS@NYIj-IWBWNTh`pLpL_}N)) z7in3|q3?MM&rXYGW8Y-vtE$Jbxi;Zr@}!3$1G}uAE+k`aj*(8o9q5+r8#egQyF;-j ztBljA_iO(#ZZQ-{-99KZQeuJ3dtPRn*$>1YSX5#U>mGDd_Mwfg77bBj^t-i8%I9>7 zecaqlYT#RLGsKb@fpL_fXt(ugFk!>ngM;MnZ88QXz60TiwpEy8%jJ!c<@Chg|M>Aw zMk@0JQw_7%X;!?6-QDXSCm2En@7Eu~O=U~$Nf?@Htrvsbu3JwCl+LHQ zUEYqb5(Va$;xPaYyA899O<%ZbHI|D0bv`*n`1k+-(km9QXHGXsDCr9UCdd^{4u+jL z?V9$F9d3}zy+x$QR@*KvU-o(a&B)Y}pKV2txgm*){edBnBBav47izwmv?f&{w)|$z zS86c{uLq)_4PaW$6W{tir}}Sn6+;D3eD8=wFb}1ej-lhLPoJHJgO<}vcs=+e`-SLF zWH|^E6sNBEfKObap?A3W?C!}$o`q=1b=y{_{ZFyzNG`{MNYAUb(~(2IXOaR%UQPaSvFjQ_2jj=K)Y;chp%kOqQhd`7zuQXe{Xx0>B0R#qC;A2X=5G@jodWT=o$B zz7bGD$$EBtGx=~A1~IEDyw$-?cvb$Vl5P5Zcx>Vl1^7uPD08d0j}R|(KQIBo{x5%0 zoEQ#(BEQ3&z07gNS7XWOx4!X~uZ#Zbn^evo5iI%cM&_FM$`razs#)){#l74RB(`{;wqNgK4H7H zC=wAJ5>subJqW%4`;R2h=zcFDL?$>LGrW=Ig9SQ3o1oX4Agd;SDHOK@TKUZO3*LG*LYa1*xnzoSj*l@UK#6 ze+z1JAeV4np;fz9Tb~RfP>`!a|9j!iAGW?HQc#?>Vh9xZ$Ub?Piws%YkyqMZ)AmP{ z3t$^cjSTWug&uL!;ZOXq3Q~zF=7OqbCRLM%^3;fedGkm594sxm3zkJB={fA(I zViol^I$o~g2AJA=!m@Rw(8Z7eXS(P+|5w`&Xw>2`iPmzWWmhW^8e6VtzWlxgFtfh} z@<$56f^k>Mjl1}UAQ9W`gZZCcs4w{BlLhi;lCPqH5S-D3G+-M`P=$}rBe20t;cc|% zS&QOT$WgCewtArBhW2X%+Xr_DdzJln6S=p71|yb zC~jB5@VK>0lW2kO0PWzZ+jhE^7pwkbsJ{)MONjKEa2^uMK0(#DvnqHKle+wJLFMKM z58FsXh8g~-rCMyOkBuaBr^V&;TlCM?4RWwo{({=rDx&z4XnKML53uGVIj|S>{J$ko zV9rZ(M4~FS#)=iI0C|o`{Ss=wc8KJyE!NW2sMd>wf+LIkk7DCL#?=D?KddIm4d!}% zUJ8jB>Ag}d6`s9UFpvQgP*p^YRpnhd25Vl`_4~ICzn$>^Ks9-?i2=3HqBG9n<7M@2 ze^gpCsD(M}Rjuv4OpCfn6O!_^?vLxy0>apuQed{LI58LyzO}L$uDX)1+4X7tr>IDV z_o6&(7Bbo+mMC{daH@^-5{`~qwnIR0Lt@T98HgJ#nOk`~R?_}3Y>G!dt;uP4rS0JW zC`y#j%&?(LUZlP*-Jo&Z@br}o==G?=7n6H{%%QYXOUv@pMqLjwhVRBUOreefd&lW= zRM{=E4URAnj82z*@`@Jdu!y)D@XsP@zWmeDfg{EhExp43;}YWJ65X! z5V5SMxovA6`qEO(J)KN(7}Q9KD|j@F6#jusZByNpuNqPxvOEg@ICeZLphU~G{OfLg zTBTw2sBoeNt&i+QyX0pO%+SkoF|8zZoojRPJAa6~OW&+_WYpz6C)ATwxsRnj*`n({ zuf-`_L+_$3zx++2JAobSOPq-6(xo>}y)`+QYKanH4rUT_;G0F*dQWaFAwca`E*8zi zJ+NWOy8I;)0I4H4^Aj#vySzSQV9`R<&X_^*PmVH$R?8(P_RupLDyK8-DQn)fx!!UC z$2mc6+bpqa6BI3OlW>M7h^90P5XTnFFemi-vL3opC@Lh&1kBx@F4oIoW&f_t%-SD? z?D#=^&g~e8UonXQMa&5ooy$wefQ-!^_OsKmu*bR6)RbaH_1d4ch)DemX;edTAiod< zn#`Z+xGXWOHto;+@!F@4zQVFri%O-a3sScW`3KIrb@5V!^Ol0`&$A-$1i^j81)Ay+ zMmg*I_g`=hN{(2rJf6{ffoHRtz|(dnA>626QzG6^^4qC4!a6?MU}`32C($9L34ZH0 zRxB<%NUW$#E*AFh@VZrYO7oAFy5o8a`cG9%fPNw>`vv31M78}SfhW1fqGLR6atR@j z2$$CmRt!}prrWtl(gZ|rAthM>&kzRTLI&T8X()v`_orLs;Ep-naBwRJT<3=JK9(TG zMkhGsCM?61q;T?{`ARZL8u&Lli15FtZG$^ZV5;#vo?}tWXqZ9dW>GT7>+zs0Z|Tq< zA{1#q*piWc_>->ZM!t)g*J8omA48f(1<@#`2?*P=!=f})a;#ulo}c)*W2s3kQJ47g z==*_VJLfPlS@pn>$UbFBxoPHcJXNABR}jea#xj39w()RAbp>vC1Kh20;7ZTKUi@aG zat7y8#m#pf(Cu1WN=pTG##o*Hi#qa%_+CqjGg*p#09(W-mg}JTRBOK)3dMFK9g)Fn zPK#$n6CQN3vH~6L)siO0Vs}^?7&UG6*9d|jg0jc?i!HCaQNt~pt#)k2+j3<=6zdO8 zbD?htC|L|Y=T0IFs6AD|(0*htl#sWS&jQy!5_mLnkJlJrf+i-4s9JLRRP9Xcqx;MK zn0wRn4KQHd*ANf3J|VOxM;yqpN^L*R2b5$w21Evlx}-Q#SJ<>{U{b-Z zccVBY{w$~#su1?xO9$p>9GH-qX8gk3eKJ0O*vKfW-W|o@+QsDn`BHXBBa38Fb}EDi zrojQC=%dWY!dA<)_7r;O>c(h}JBcbLjF5RWX z9^zqigu`A{!Fg(23soeN+6Elmm*3&^sn#c=07`6G^0G+v*Wp){BbZuJU1gbfg($Z0 z5HheRS39f0=>Xg~Zgjy~Qw|f&!hQOgvJV##a0N*sdw#153QUr)T>)c+C^hQJ~?S7BaU#Ja| zg>(hofc75?hy78T*t7T7yqmk(+DEBBsa8E_-7X*#s5WkBrby>9= zbLR8|b5kBP-*bo^zru^7UckUHan?r*oD{&cjLIfwPvkfscmbzhAL|vwWwx?vZ;{h3183=^HP}BiYaz~{by-#O*Zr{sY zWocd#k`L-9g?+_XTklfviS*rJ_)&a(Ik0t<6xE%oVz4^MZYn*kTn@+H%WJjP`4L1%HSDpTj~PDs<|c_vvrE&^OZ8)EA&;|?u&xfrgxW>5U= zdwQy7_Bb(n<183Yxn{)NnNv4(l~=`!;E`L@XP^|73pD$`PmoB-N-zM|++(r$Pp zr7r^Cc*G>N@iD%xyse2jn8;+L!LG06<{R_n?#$|idUeG25+T5>O{?(;Lp`p;2=fV^ z69isA?2hq7{#u;jgMWU& z>(7K-&Q*r98pAYYq9jBpFAJ&8&6PS*$u#ma692L#MRvT zI?m6!dr%y?U_xNHo4J+@gZ)0pE{!|BDW2xVo2KH;P;se=)ZSQNf@le8sMLxes)m};hn30J7l7Y^$Qd#eF^Vb1a%5tjqjeo6a#58Pk{#?4 zuL;BkfwtGSgM=UyCkZE~#Kj`hyT(9M2nlG%L~Mp-7D5#mlE9hJ>M<5rio{T23}Vdo z58<1cyQ@=8{h<)26qu?JcqOsg9#(dkd7Ov1S{6f?hVHk{eZGv^%S9>`Or@__uTtap z*&W2*4a{S{}=)!wl1zdN!6lYz!0Cx{}M=Bz`y}w^JsLMiCL|9>pt0 z+@CZDCE?ugEuDb2ZW_c@F_|Pi zZ9k<}E7{**#kfW^z)Yro;VO9e-odi3|Es4blZg|t(&lXaiSPW`MBJz_H$vT}wMc?K zj0BbC-L+>gQyV_}lZ40d&SLWTqj8sD^HQzk3n=(-X>X@mRM!0D5_zyT8@bA zn;yt056>j+i1V3E4T6j{&nX zy)G$585cP9IONFPQwMtiSMfUA{Ewy|>(j;2$rOowwo(=HR>WqA zR=?&NTVCO}1-~;FnR~Q58VUNe9rr%P*Q4?EMcJbF8KAm&OX1oz*ul|fpQ*C=N%?98 zJB`k+?b40q`o*Jj^6PLpfkkf4*6UCHopk;Ar; z=F;NwG`YBZM&E~N3)$4QkbPI&(RHusvQm^l_+fyd_~8Io*6nn3D@Rm9p*WrB$1w)vRJm1$e0FP5qbyj735(B+zwHAnF zzj*!bBW%$Vg{3uj;?^K#0&JnoPx3wnRC`9yLgO;u~ZuZkXFV;a)$Xj+P=rX36V#*`^Jbs&D8T8PgeE z^IuAt_L?ee_@4~`rGHm1M>rNFM$4Ku%&=trxywjv-(q(1_znOq$9LBq=fqobq(Q*RD4X%8WDP-qjH z3%zOYTHu`|;%m++U#s^ydz#z2i{wS@0*P`IE8rhk6rGAgRJ83-@xZ!S?X zbO$$o)L*R0v4}mo4y}W5dAi%$Dh=K|J(EFvb@k9JpGAWfQ>)$x%01T7c*FnhO9w=( z%{tBQau8wm9nV!*j{uk>koDReG)BS}5MtlBioxz&2%S*J(M{9bI^QCi`)P{bSyYVg zFe%=6O^HRPvVbZuvU2js)FPJ3UOJRYxx;{{7>htzF`yXiDay6R99iUjR)!FbquK@A zD^O3h)hBR!@Gxu&ebO>TW7QTYErrPHh#5g@7rL1l?udSenkoDCE4W5oFbH2pTJtC$ z5O@taYBW7@&h%lkpsad40oJc1Y7S@ciM%|kZl{>q*FRny9+r#bNn(a#l_ohRG>|3LNni(F1cvfh-qe(qD<&z(TmO@xI#+Q zR@qK-*euKjZ^L2vZcj(s=RTt2SAO`_FY>A}ZvHuzwkBkflfJWllIXo}VLpswz3Xf5 zZsYfTylzqkBJh+G@~JY=eywU7#fWqjc7vyP=ur8tzA{udTh{#iyn(5a6@Ofh)A-^Z z!{s21tvyXNkrz2|P#n&C9Y27=gULy=Aq}3H0@H#(0-H2VbV_Wm$6af>n>kSuuXl=h zQiEKc9!O(LHth_oreh-uZG)7lY7~9u%qqlg+vwrxFyljhNQ~4x$2E$(9@?*29U0)c z;x(V}5BH~Kd8qs0gK;8F&ZQQi$O0MSBfpDSTvBl3uX~s-@NL6s6sOR3&6KblBaQ}j zlM2m?a%CjOYmw?4LmTky>GC%tVShQ=Hv9g?-KqjKCEgc-zFCDZ4nk@s%h z|6>jIH^EQ)!XQL5HZ)UIBVV* z3%$y`QMhZl2W24me-uL@F+S7V^G(#|NOb&_l%FxQs_}HIOAmQ%)GULt8);QiaQh-@ zagn;Co@R()THP8-@kX|m{@SNnNcPF7k(|%tre=(fE)|c}1kEOJI4KrkAj<%eIEay;`-=p>RCG zs)%Z zSs~xgRvL<0>{Oa$;O014#9*nGG)c9B37eW3(eFCbOqzKzPVKWgJRTP3YdBSTHJ9T8 zy0aP<43aSS{Y6&$CBRYoaf7q<&Z>i=VvWQ7waFSYMYHJwd67;A&ANE!UF0C`DVS7BC}eF{Sh~l_5V+nR4D$MkZXY`7kvl&!d!-p(bh+ zZAh;8!y*sWY`ME)soSrE;7q)=`@gv-W=&UVRf3U%sa5Hts-wXPT|?_a7yDvrK+Bns z^0&`#ug}iERk(LhNytAHi*lem6}LB?oFjnB+w5U;;8UDq*l%fuXLzos70DZ6YdXf# zv%OUBgW9!eMwwYe0G2UYNxb{uts&}F~%-pzdjWV@IF)Y-T_nZpgk zq+;V}zMHQ%$4kNP=~yagZqL`tKyXy+HQW=PuCqF6%kkNmhvxX8S>Zbs6(n)G+PVWC zcTG&SZlW=a+0hrQ=m3tyGvo^TA;1!I&uaObIPHo?Z2W)@ACqiWxe`>F1 zYJwwnC52*&+h?0q>%ORHUc7msz14`G`8}fNuvY!>HJ}IN^Y(bsVwMOp1LGa-9)l#) z@bYEiqN&#>l_V?hY&^D(tG67wm{X4ByMO@aqOonRH@iAN0`}QB&7g|e< zSkC(soewFz3FoQncm&{i%=sM~@XHHC)v{J+E-DmI5%^kaAb?+0c2|1`g<@?X<&$B3 z$D#*YFh4}BO8JQ8r%+@~(swrcpNN-zUZ3rEQ%p^)pHy|d*rtci=qJl8E9tiID#y!Q zgtYx1U=j>9aic!V0i2htIPP~-ZHzX1LO-6*a;@)<3?I_*ta#pS1k=@JPuv{Nm2bLy zYLj~vde(D-x!;Noir2bzXe9Q^6i_er5WnW}M*uKg#LOz1`xJ}R*Dvxh-7?RTe|Njw z#e=EyrU-UW@f~(q3sltcrV-z2Pu9-Q9B-TGctJS)9(EBXSgQ{>uer|A^1k1>EGAky zEjW)ZC3F7{OWwiyB}H4M(9%|388O);sRukISv~@7y+83CPXrsr@!{%Mis4!nnSpIp z#+o}0vGctCMfMJca~@)kb~?Hf7(NNO6( zw91%kv{*&`K@>@tp$B{-PCt6uiAS$b;=i3m3mTU+KEF9wcv6k(LyT)L-Bk!%8z#Nz zBd{?tv3c`u1Qu1`>1-2;dQ(_ztzz4E3`{-tp7o-wW+nl(!lo+$2%$N4<`#-{2Fv3QyESX{Bb0|6} zXb-^-e@UoEti&;xSp**fV!p%XkF>VGBkoZ zrKIn>uYp~yOk2tfnaYv#f-%l;`^rpNA_F7X+S9HMW+YL#_p?Trr{T8Xo}~DtM#lMW zmq!p`AJs%Qc|952ulCUgdk8K4ZV~3*X#VIDIC7^{){(yJFY92LdbCll21E-j(K@rA zA)$UI{Teq{>HGj5z3m-yKdNDU_nohXOw*5j*JhlbvhdWo3g3(*>zz7NUqtS;1%czu z&63UdNJf24rI;E^FUzn3n&h&YMfcmxhfaJhRu&r_wbEi;E;Uw}dPvyH79ICrxhzHl zUmvhAFVx$gcM;#%A~w2O7&}u?M!tgQL8&v@i=TR zPKDvbsq_F|y3S`b6KQzE-&T#dfeEEA6bxwgk7PpddV^$DZ@uETlJY9|CW~L&akc5ZEnQnADYfk5LT%c=H487DBT4sx91EOBD~IHsU?;nF;lpo2UAS9PQ>!=?Af3menX~#lQiaE4SdLLo9j3j9fmE&ko>sS7g>F7{HQ`- z$P!XblwqlM1$m&MEuiMEWQ6^4@ZNw6<$)>fGzW$P5wR0I${w@PN2yIxMlwQj**LF1 zGm7)%QlBETeh`Vv44nD^>qIXlE;)TiwZi`!qn^DPOQ)MHQh(y=B}CwZ7! z8RkZd2@(;!X=E_9IS6iGbc~*nK83BgNKt75n*?iK$g2a&Jo3l)LH@_1IGRK7eB2oQ zDYT~mCy2VimK7cLwj$tRul-BITo!s(Pane_nVO;ICX3s}_M7rI7I3__B+y#>Nz{pq z{x|o&0+Jc@;j!CLw2)&t^u?46cI%mr1lJF}WN3Gi&!UnHCpo|RmL{R9*l`X`lubA$N`eHOFHcj{m(+3|%!`HIBU^8PVi>=!35o&4lXc9#HL7UpZg zKf1CGkQEpeVTCWo3YZ#NAvj@51d>K}qrQ;@^C#wC-Y*XHuna*qwO#LlQkcnaEgyUd z0v+4*!J{$SEHavMFaYC3dwHYpZ}lM8Shrg`mjyaoF>651pYJaa$`tnn=nRe2#1+;_ zF00xfPnvh+QbprIz7@veNMvPVnwI@}13hq0W`uF!MnS!^o?vFZV36#D! z6n>}|YO+h~S~RkYt{57t8qN}Czw1HKQ}gP63&8#;9e$yWh4pgX3z3fS#@|Tznt>|f z(!5vT#8RKBwdZguIhuc{wQ7{o{APv2Me~#Q@8SABe(JuUqxbBLg_WSF$m=%tM5WcQ z2E}PS#$guYJg3_yi`G1bQwzJ+mpgL>@_jXCahj$0)Bs>c3f}ulbV}*&Kj6|=3`|qw#|us_i~S14OLPJRa)6{ z_M~0;8in*aPmqUG!U0g#HkP{F?R&a2Yz?P&{jsxRx_Uh;VZJsVDHTo3_htz|Hq(Hd zX?zrxF3sTOkH_O&QYoX`M_bcL)+f^u>9ox}&Ds5>t#Tj@W4<8sKWWDTnb8>ZRwLyAy0KrnoHuD543R4P{K)3OuUVb5%X zi3@qX`W9C7UGb#VrFUGeO|96a)7}y@#Bb@a`8?S$Lfz|nxw`#zHTiy*b$8Xnp}zmV zSe40;edWU#-GdJUPxObwauQiSug*1%uOl?zCYo%vMWaO8-JAl}$}s|fx-4QC+#Hh* z+#N|Vq<6|%>|&T1f0)<~70e)1uR{`G_xMqo%I+LhVlL?e8kLm>XtEri}#^ z{LWq5bWMZ%d$%uhW65zcLwqkM&FKJ(we--zDcNQZ{mGo3qV<`;HP`OzK_ZtPMF!oo zdOQe+5{dJgwgctOzIJnD$MACac^HCb3B3qyQpHO6H0N*3vjrC3cVxtzn6FxC;T=A% zM}%sw=!yPTTnq-?^vYH5NLiND`q^6)cG%b2)yof3)bu`5K4x+^8%Xxa&O z>6RGQST+@^m5(iltrJR*)IOdJ|0V}KA;d(Vbl3gE4)`Am<^?Z+;q(fXG&fA^mY;}+ z5U2w7NO3&Kg<7&AgW|;1%~-knf#RD!)AFLmel-~JL}NPM?jvdt1I=RK!dzR&$34O~ zcgXbZ#Z!_^-`2O!x@-h?a+VN8TsxSRKhb`)6RXNS(b+NLziOp(Wav)i^Rxk5dM!b1 z08}vTo9q=KyLP*_mK;Uo@C#~seA0Tfvj0J8s>lhCV=OsOI#PRfaHW1Ls?V~;?u32K z=eVtz`LHxeQ?h`zRFeRzm)tCceUxDw886mRI<}QR{###Uq&96tJ+b%(>K^fGCh~}N ziJ@>yB%^I@&KZ*&*2*>wfIf+paeYa?>|$J?&$6wNV2Od@gUTPeM-IM$oPD834K8!p z!#~&JE=QbeWqUSnSC`cUm-Y3jPs_*a#BQ8#FEK*B=u+MLWg$5bn`L*1&cOn2+UH_> z`MB-HuODgrx~$pWf>YmbDPxD}tB2r7dq;{p|FzNx&ds*HO}mv;wMpN*+#aexfw3HZ zvB|qfU(oD#XK28V{!T*VPW-I5Ck?UPNu#x;&vEUYX8s+4BHUA)qF!mY{%6Riqw3{U zqdQyE(ub=l@b^70I>?%uJztJ5e^W0?RrfevwrYC;9zaFOE}(pwp5YGrcCu1zeOpNc z9s+L>zFdMBcRwtBE;_ev<|;mGK7CLwFR<|cbZ$S)Wm@<8#QdYpsPh8L$1t|1T`zzC z^C_C%Wc##2pJinrbBR$x+_HBw>jy1+ar;u0r%0N&uiM~h&-R@p4)$r;lgO?cne>PS z{ov(e{^u?-xZvtw;-OMyJ>-_lvEPCLUg3PtKWS#F4^oR(KNyY1jPX#qXeaiZxbDu5 z-kqhe{o(O+zI-hHYQTl)-p4YGyY&doKwpvtwE)iiPvexb9 zltoa(C_gRP^8iLgJ!NJ&efu2fj6FE^KVzYsS?zFIL)iFVMl8r{3-674ge zElp*<{}B4bviX9n4MBkg!!9tsNc8{#!PoNpw*b=UtG|?N-K#7#9I8!SuP0XW&Q;|y zqw>m4#r#?k3}fN~|M@u>;v;Y^V#B}y-?`wHp42Yg(Z8T33|TN@Q?EUbdb zK&uI^owb*^1rNt5+g3;49zUXsHY6!|xA1Jv*L~3+;eWil*1P=C31VDJO-DeFLrOIo z^4Q3z4Bce8ZIi}SWzk|)AsOSHl9zK{UBFF)@xts~>(}|oeIEj`RW2Q$pdQoBRj0Il zSf2h7akyCeVyBe)WyV<=+;dPqoa#@eNcs>o9lg-CF6#O&J{C-Lu40_J!0vqXPn!$~ zWYU#`sLb;^163BzN2pse@0G9`1TbfClI^K-?Qy6Jk{*8o4dC3hQHN8j^TX>`t{-Dd zj7t!?;hp1ge;}G)^Q=yNS5o;|IdB=S(KQlh^pEoHefH|RBpplOed z;f`wV(tW3iv?FZ!BU~c_$5?w?Y0-2p1ED0YxJn4O<9o_AQsud-O->zfyQGQ)k)G^r zX!=l);9C+P5ki*R1^}ZyX z0#xH?U>AcDUW>~;%k8Y6*U7t6`p!{d5JlLf6(sIH4Q_IgQaCQ2WPey;-O*58t&T7= zDlmxMCTK0G^LBf7+7k5t4lpe%9pZY#dL`t1lG7`E%RDFd7L|uS-!Fv(ZGbGc zy8Y{g!gYjgot}IzOJDcHw4(B`(QeR>54L(UUSN(7jNj5Pf~nYlB3yLyVezx3l;&7> z7oG&-6zOE_61AqA-h8%m(=B1-BekrVpEd69>qdA zzD3O>0Y_*5u1pS>|GDM|K9+9QhxcE)>r1S#CA2F;^z{9yQc3EHeoO?k>uDd)4F4pN zL%f6u^a{k%!VD569$X_BwTU<5UHG1ii5MREBaC(KGeP5f@%!2i@gi*=Lu{RSkyf!u zzck%?q4w`-UzyDojES)P9)k)9`4Q;IpkNyiq*K{X)M>Dob`&}@Df&D8SNM8({$G1% z`4Cn2#d{bA7Gx1`HXvP9;3@Fj{IgDkzh~{)>lSDmh z_m}D2pPZ7D{J<5PYi0e-jvv|cg8>Y$=v(aqm#&1Kc34~;^@~f7+fell7T46*#v@u4 zzv8gwb|^3=FlT<=dSY8q8KMf6BKs~+OUr~IVi=zVH}D=JZ-W}(nJ~$PA;tULZ~0Fy z@Ms?}VilEMl9gwSp79MY>qqT-CAbgjc|QC7HJ17X52r7G$k@rG;3Y2NdwEE|VZ6!) zry+GLy$K*QSvLZcpwNIs*s(T*mJhzMi+Thksyh06S_kGJR<9=uZ z(aD3}7Ku~a2R;7!sibfuk>gC3c!Oj-g%V*xhd@(}-;6;zyF{zfgL-);PVPQW2pU`$ryGr-0YPln;TO*4; z9(HyqX{zp7_!Mec72or~BIxP+IM0{Mi@;UJ(fKOOd5-=FxY+3D=C)r6hM{2uk+hwc zb=l(T4x%@gnAKVw-Zp-B@xPdjcy-Ze{LAc=|+VpM^c^wl2m#qFnDm6GQA9=5_Dj+)e0{RuD}t_>OtT|WK3M?Te_$Km%e>9aQ6<@?Pg zqS&l)La$9z>FpO>_nK=@HPzp_Yex8ZOlVjukJeT6m7d)F{BS4)T zbVzNycqKOPg0Ue;qRZrQ(T0NbYD2aKCep`BW5iotEMw-5jeaK(o#XLmYnXfLp#Zw$ zv0a&7edu&N2urbCA<@8qlSk(&TzQkF$W07w>u0UsW z>1CXk6n0TBZYTkQpA*&_g33rbAFfiLKN%v6xm+5)!edaab_r-%#O6R6FbZDVMgswn zq#L7hX)%d-?=$xoJ+?7Eyu*+4e$u5MmNnMgl#*#+P$09>*v~}_uyHgv^3Yz$sT}=~ zhB3UXtyUgph_DUeO;~%h27qetg+`kAxLW2|lg7s}O%j4Ju{kS@;#7*$d@e6@VDLJW;`;L7>j+>wHfdywDQ^gUF-(}|4>@b6yE^&W!zQMLoOh*4<1<^Wh zuU3>bQmF(~*aoKB^HLvy&k6Tw01Y2s&H+jyz#sHd=Ugj>Ox__#C zs(z|+7L*1S$|H;<}N!70Eg&%-!7cbjTgT`MUw?6k7S?;Lf$? zlBRjkS*XfmnGPi}b{(?Lb6&+(2Z6qiKHOSM{7{qlgIhGMwqP3t1`2z)of_yey#kT4 zR22$nhf+re*>t1%u7tEps_33}j*%1C2sItT*TuR}u>!w8k`T)*<(bNCgeom3n`YU; z+g)+jg)S-A>vtpxJN4DS7$w!-lUpo8+DYVqU(N`7)I`DbuxCd=8JMTNqI(Lfo-D8LHnqnI?m;MNqlGP zNrD4wshD&oayd=jg%JbW&@Bdl(}4XbVV~5jd<=a8VRdPTAG=f10{tti{JyeGR5Vp@ z`;~pE7dg+6`|V}QOkV;yjE~FlyUcjStLRIqdx*$C^eg&q|0da{c~j`CZ!?_`nXIvU ze-Zbo-%cA5`h;Zg0VlqN`z-0Hs~e;Ho55L>mABemwTR85Vcs#KB zO@X}b@_qF7cjqC2Sy3!Hs3>NDHlvNL7iK(xjlT6Y!WM4i;L(?0g@@Y2Oe-&D3|?-=n~5MxhieC&df~7$r~DP_U{ga z-sNevK`IrUWWAG|y$^Vp2*ZshbsmcH_13%CTHU49Zh?q=G2}(~ejgani5aZ5FZlr5 zWR7J|3Nu>-GB#RK!km~|9MU2c#NH+=R=!}t4G>CF&s>OhTg^jRpupJ!b*-Ycty#Xj zEsJ=oj-%?CA`uAPMtPA5%IA`1>yz>aYH!g4%{5b>EU}*zG4O%N!zDAPOuyJ$xSM>k zI~Tg#kX?H=MJ%}n@yOj2C1g+H(6jaaP)+}uCRTtck1Yg%a~+5nxZyDQ@L3bm5HnV5 z`@aHFIPwe_s2E&xd|>57uhtff<+JT*XXL=g7AiYcG`>0|l^Hec=?{c}5n9LDP<={f zUGq#^4UtoZ1e7+XG`oNby6lmaRvq$vciLG}wKT=|rAQ*qEb4eF!!05!ue*!3El?qm zITVp6j63EIj5jgz-hA<)!cxBq|m)hSr1;u4)#;-Mpa|*`I&&tkSW=?b*(l4y&ZsfD8))VWRO)zgl+b z?sWHIm6sUVdw>?Hd2XGh^?^w520H0l0966Te90o|s4H2>gXFQ&%Ka8kfmy{x z&10d%fi?E;ahMMSQ0r9uPOtYpVG6=)D+)1okx2uuBZ3nLLqbDffLK0>f>blvh!mxL zfL!nC*FS&K`d!e{Kzg)!k!S<)z?AzKTXuQOg5zvU7?depUPQDR5pV{h(Uwp$t9QP7|o z!YLjpM-0Nv<8z+}h)HM!d~0OV1!+$9)4tE=N)L8kys>(nRU)Z^1o?vkVV^EHhu+4K zTi0QwZ~I~j1~ylI5VhDAWDzuuE|duxPzoAW|H*GY7uZg&=2CvrbXg!b?%qQ`RK^`1 zQEtBDtDN`2@p>}Ds>sJ+liTq#ky3p1!c6f|W=DaF7$A*QrDM&;bQ^XNbq3Pb zN2v=vowZu0e)j63Kv@X%>s(};gK%^|tEVk`-re~Q#!j-Bb@@y|Tjiz&U?OKcDI3R9pfJyMbsOun|m zu|e*-A8;Ntp9RvU9`zNVt07YTRk2}!@KFrmV8pbHqWy?OGqn`~lXjH~mP%f0A2qLI zF+OQxgjfVrp~#W=zF7W-fy)w^NJMW_3Zu63lLIN~;kt`P@_k1vTp%^Bc6bTf7M4cH zAkh$HW-59i(K(cxg88k>`RlEqi@G`cwXm>iVYrtWe~~V_c#=4iUx!GiLDV=9k$f@C zrjYL+n^k;X;&z#@)ylRuUYHR{b)ZK&h2f>HmiuH$UuwnIP!8j%E*A?6mCeqI_L0tN z7FjbV0E!frT)~Plr*=vLAcHwyps4;i0EDjY9cA5^|2Q@VVg^rvKu#!xld`BMJ$r=( zUGgzJ+P~Q^p3H=74c(;L{gYI_HX(!~!aGLR?0Kq$U7XNSINgiDgj<@Vwz%vr0O zMh(3>!fSK+j-JQrb5F&vfQ`{DuTjSXIVMgF1`#UDCGpp+>%~Xa^x=;wQx2w>;vXj$ zz6ydrFIOgQ<7i!liE!Zi=5X|LZDY(3D{!o-1x!9SljVyybd*R=bP&}21)M249 zb5Wm+8TCSHTGWkLejb~-?wvf7Roctz%hw0$VcNboL=Ad7FCVXeR&>o6A~4b!!~#P z$lU9_!})T~Oy9_C4z}e+)O&AAFh$rBJZpeRIS`S=k!xt@Ef;EnF8?->YftdbRVF@V zsffoeE7YoIDcpV-$8ynKO0$~d;nQ~mP2+5OX``6V4m=Lmca+aDyVuS? zU=uEFg=CC1pQTfUQA91cb|0c)J@(Ttmhxt(AiY!9o#y2KUTe77eVu)?)QS_9dW^_Z zGFA%t!2!<|M?}p3%pER0Q{Q^3XosP2>lvdKVLGf3$rBS?UaTB#U&ft#X5G zcDBFiA@LhfcgZUMI@R2C6&a5+ME|9eoI4HSdVpeg>cf21%RDsW*G}HcG%DXj$GLK_ zJS3A@4Ms_hQ20Tc`nvRic&uVD8R`NlU)&1r%N|hX`v6kz0O@A%`qMI*U?=6ZPsxbopWrlb`kbKVG@xZuzA~o`WTo zppY?U*os$ea<`74)&0z}%h6<%+-W%2Z?lAS5O8HNWaEb5@@vurRO=b6B(4Gf%yjMF!LtSA3G)39Tj(a=OX%Cb``?;Mn zPa&MIKHJ5AyjQ0;W&R|VBSKAZ%3uPAaLRJ4X^TuRUWAVlu{3nlOSSq(_S-|X=XVG~ zn5dh0&}rz@G(c@eCQQ6|Y8q{LIbybmcFkZTe6ob6I0+=S-IoN8Qk|Y)m#$?FOxjex zsjiB!)koNz6PmXbno(q1(RV$Q#Q}|x8uaba?40}6Tbi#vJHg7L-ldK|h{vD@UdMOo zXKeoi)PzS$hzxEp?FR0FgD(M^Ast_q!7hE0=v{F6NPcW+{RlLaKn45VvAQ1T(WqRB z5XtoIjZ{zWZbg)A9HaPoDa)NXX%SoQ7cL)kJ;o*oGEFLaQ0@KtkYk?(GPjc-SC=zZ zIevo>`3m{VJ6K>pv=UqCo$U;4cJ5>{w~ADET#FR(y7iIbRb>4H{YhYJX+ln> zIc$1OK&v9flOZHjDiZ~7c3sp>ucT!pQ!sk+f-yi+@BBbHRXl*~(&giR)#LQ*T z`vnl!KAw@)-8e-zogU?=bx!(K-N zhQG_s6HZwRuD?pD&T-~A{~F}q5_*Jf^_uZ1Fg@ep$pSoyAPUs;DpXMdZV#dS;8J)V zcfyn8HD1Jul#W@(uWR-aC?#(fzx&BHGjRUIm1fhS%3a_>gvtZ|!4-E@2@x_&;KM3d z%$C-CUwjfq1VA|A$j;}l?=CF}#o?JfM3}BuyEmWVFN38hOezH0pIo;S1S)B|nWTO! z0BEjU0$BT|@rnR5@^ut#@$gyQ1gs!SzoDM3A3K1+4t%xA@6QyvzUogM;Z8>)yM`DH4M%t6X#r*G%QD(xz=@4XZ7!IuEo7f<48&3rhz+}IcK5q zt3QOBvoCa1KC?udJn6rJ^gF(ByJOi_w46T3O!ni!3Y^{lZX#gQ&QK(|D~h2N8D!0 zPP+TBQ-B%~;*OT~k*l%v{zMp)c1LTsQp8-!3*bDMD81XSm;06bB~>c2_faA9S_0`# z(^+Z-YcC2AB{ZztNryt7K2E`R-GQYROyW&=ET!2IO}D3{w6QBCcXx1$(z8>lzQeQ2Xyo>RcRmp6c~>{ zq5eZ7tla^v9cL*k@_}7Duw$VDGdn@H!&{LW&o~AJ@F`u!ISYIE~^wKu|i7A zS?avNYCmHR%YIpB#+?(?^4&@qePS~G$46EW76a1DyX|tu0%kkxG$rK6p-v3b8Td{Z%xM6QAck_g9Dx74}F5LL|+1cJhaw-ESaFNPuTkuQO^q2Esh z=U7(YTvbc~`jpT!#ID=n)k-r=+#nzCR~0^lVVqKS~Aw=Whp3TXM* zkev)p?9=w8GcW1Ybr{R%?1zZ1yJ?lQGd)ZUy;W~5Zc2Yvw268B`C6n5tHN$M z1uk!TW^EdHw2M-q1OmH0=+CWz7}7ZzX4SQ;;~_NlxUE?m=lzz-)s(KwLQqPV?oR=B z5DlHz+WJap+Iaj1_~4*4f~cL;ViRDpxF5dwIuoamY}6GLMy0MnG#k_|tMe+BDlvn_ zR=5h;>g-Tzi^26pE1m)<7hbyuB4Cmyv6~954gV44cUGe#26qc9| z=AH2u6F7f{m|bocV_RNcDQaav9)+Mp==sh2U|zN1M5IV#Yq6m3&WxvL7C7(A7Ajfc z8PL&>O=bp$yiQ(;;jn#Z$dmQ@<*rJCj_3v&U!ucaYLjXUJLs0K^yZZ7CwVXmL`K>v z@6B_&;Mea98RdR;1f5E)R3nMFpy*`uYtOM%n{Q_>(4*MMER*sI9|ZzXoqNTy8dw$| zRrkpF((s_Y&YS!;L90FvkgaM~)fsiyJA|fKZ^h{+tiC6P5k-mNvO^{&;S?EIM#PCr z$noYT$YV!U64Wn*8$Tp|m35~+>Gz9u7=6SA(2g!Oi6`TF8J|+pf^j&KYge!-15Q2< zXhQlpdPxKviRxCc$+JHK=UpN}(7Po3=N4h3jPZT7l!xK9vL9(Tf1M7y3Gr4PL6G{dU?*b;il@|eeH zv5G}^q6Q<%FJk4Gd?H1*Si}%^?vP=BQ2+7Js5`YdnRxAu_kjn!5j_c>8!EtJ0T@}o zuHY6KkT?zyg86&Pf`)wc`~+THM`R~kD0+O(ouxsPK9ZxuvHj?YanOf!eY)i-H!;`n z(3b$)=;Hf@a7N2dFBEo0mOMoqmxLsQp;g*r2|pQT`pk^(gMr6N9d^819gJ1W82%`% zf2?gkwO#f15r%l2f|Ext20sOEph=o2$&djwtJqGx?W^-98F)Vwu4luz*nQcGM{;-V zekn);_)_7RrcL)Kx9dQpH)hE7iTa>zsfzQ@H}}^}tBNkc#}7KHja@4Q=pUf=dn#f3 z3SZZ%s)rx3X;#Cav>=xJuNFhU-QVe@nc{zL4WxZ%6MiPBHkHR&IxMoCM>1X&hV}GI ztb&tW{&;esz}KwBlK(-mmPEnUb)B5jWDPq`ZCEfqD|8=n9(?T%x6bISsAmaQO41Bo zce0}xqniaRpd2rhLwV7QuWb9gW>nZ*l{BF9_LMi`E07#1<2dpCcrWGB!RHYol8qMO z@R$xAMb|f~`s`&RHUM}9N)JNFW8f8Bp*tbhQ3>Z3JH z->4RdFH@=eJvkB>$rnKzHurEdFsdQn>Aqgg(jvlrslvJ@?KonngaWPaS1%VYc|w$B zA(qC6tgnRn-l~`-I}0QG<}bZoe-v|5-4M8lZQH5NoFgpRyFRX{={gpEkHcY?SF!n~ z@h#-VbIYU@9%B2~owWBPzg+H0CV(Z#;MHkA#u7a&>YQ97FRJrZ>%|NB*qH5kso&AG zRZUxeiZN=tk7N>;Un@C{p2|}YM+2W-K7ddw_WT)%5(n(#MGpNS!;H}opEbMm95-DK z;{I0O?6gN@;?IEJ9@1ht68m&@>bPHE6zO=Vhchly!x#u>Fk-Ch-;Vza!UjzSK#V}h zsya8ddfT}%jx4JU8xidoi6>!>;}YDoxYJ&oDXr8uV)fO_r|55tqvj3p@n>ZG&k6QD+~Us#El$R_a)0GwBu1}8lBZ0ezRM{krN@dzk| z0VU1fw5oG|c*=8_x~_%>GeY) zZALZfk91uRE~->ENh@n3s`DC;c*s+`zh6?)%I$Jrsc&)ZG-!B zqrZ8(-e5v~_?%H>fU4KA9bd20-g`u_M=F47c>NBzB$~8iMiZlNO9R8hPg$!5?-g#&0{e#*2 z7lZ{&(1^RMR~C7IpZR~QkMallzJ~sW`ri<}zsYNpbz=gXT12&J5uC(0N4{-HzYX z0hDck{NWMpIH4h3J!OcK{M0<=&-NsD#oZ5S+IS$rf(T#L%w5yRlNf(8&AgeL`gT14 zueoirXf_BKo(=|103v2J4E#xd1u;ZKiMU_(@D2+x05RzbzmI`b7}(f*jEUMX2I+{R zJk(hQzGd*{R9W_Y%z8PXfs;Gin>V=^zoAx21mYj{v42;g{;qdlFF9&@A|Th`JRaC> zI$N+mn|EHPi8|Q^(shuMsHG1Zv6lp(Gto33VNI78ws0hn}Oe3B1jmMV7qKej(8a1}v{`&K7wSrW8ajBxyeF zlu@&@uN%m$+%>u$+xLyo0)0<07+&#i@T>Fwf_)EtAfS%EOH+F7dWzj!Yqx5Ad$B&e zGXp3UbL0}alJ{A%fJ725->8`~dYDoy0cfp*;&0b3*ld;*4v-+G(*!`M#{(pO6*WzB z#?8eR&)-u!qK%kINd;et@1d6!i+^!Up&$?--7Tt)lOodq@&QrCOnVOTuN#}8A@qUc z88vOY&Ve#SxC&s}jE~?9l%o2r0?JTUwGkkbM?qg}7X_v8M~6=ElPi9cPBfe?K#G?1 zK{r=E4an+4FjSWJ?n~l_15eQikxMf`4q<|`J8H!W2mB!thZr>_fR48!wRE2iLz>1_ zazgvVn`A&BqV4blxKHV4h!iTH0kZxqdQh^7Rbg)61H-Tf@TX&Y=|IACdNdyA%eddE z3ly4*%KO+xOLPS50DA+euDFM%-3K0b8=Il?Qx#sc9?v(re$a4P_AlfYe{mH74C&^x zs0jt&rwyf-0;sH`&K5mIKg_>aaFGDU3AgvEW~@}M`Oj~Fh`*s*SG_3%7WERmRLxZ} z@WiM5atP4EJVm{|DC?$4so*^KKElZ*#Rif1%sb7#X}&({AK1%D)YY7+1TL#JcRh!M z_ZyET)n6C*vycj^c)yW)Va^ClJ#@0*2*JJDFmm4X0Y2XY!KnPVx$l}+{^ZZ&j6LzF}0fIz!eCSOq#1Gml(jHZ=e}@xg_Ly5`PTE7gE!zjSRFP4u1)f+Fs)Wka? z__@d`0sM(^j!A0jfF*$oPj2&-2AKERF+c(!x$!^24uMnBj})jDzLo^B2#jDfU+oFD z0?CjnDKy^a=NMm~AKG8-*5DB0cZE?ha03GC;#~V3AUlsHVM5^q@gEfK`$@Ln`iCq) zWrxQ=pgyL751o2cyy-Um)>^NUc{KlNf%HSiI5>0d@6Q&$+HklBg4Z-R*b;S|G~&mj zngG~eX%CFgnUtX;Qpd8a^T`j~r4E2911t?cCrvet?p+!}RE#=c*5TzfySLLb-Q*qk zea(u(AdW-RZaM4OYAdEsWKU7@z#YNGGKkyzBqfE^<>P8vCe;Qy#i3Dkh0xs$E|ZP2 zUOHgedQ!Ws3jD!HG_VgMhWFh-UfFZMVc3!YK7;LU4^tc`Zo$Dmg&(l1xgSeC_R?c0 zF}`4N6;$PZB2~Iqy2xh!55JA^moKpWzJfXKV)3}OG6o@|xC*8RF za&vPZ-+lP~rTI5bowbck_r=9UZGHWW2kx#LVUJsXdSGCnoxS~&lJDQU-6Cc6z>sT= zrM*4t0X@{DRl8f}>;C@!UFNAlXTl^}PC?HzH;=(DBSW5|dJ7u*9=~>$mzQg6YblN; zBz|kn4BqWtKb*SQpSyNw=z!#naL~I5?QZ4N1{E)`1KS4OMefeDd&ExShTL zm0slQ9y{B(bH3HLInF-5YcaM-9)>+b1<}Iwj{{IrB0Z^7)UnS*2ChZ>FAQTZVpCRE zSJhiwFS*jqutXYMEkc+OA_GYcm4SB z<1{yaKf0uMT{!(N?KWBT8ho1e>&6HRiYP(e-rk-bPbOZ0V~7#okk`+=6Te@`XaY9l z&TT1qjoe3;6xl|~d7+?!zz`^+-V3$i55^iZ{O=EnUL(D9G?r-+|9(tY>VS&ow29ZJ z@NfBKpnQ3oFxOw-ri1Hmf-rVQsp7u;TmA`9-o*ct_I>4aa4+O2I_^}?3l`CT%bNh@ z`>@Yy|9*_(or6PBDrckU@o)J)Fz!@5>7E_d{kMTi<;Z#T6BjbB|1BSnMN!JmKDSEr z_hWDZmp+oKN$ld^^6bPs`s!-6-HiV-1NF9+QqlQ~$9PDrg=k?JPP<_>ZL^eqN|lbn^cg8pYch6|K5~78qauJQqiL;X#@I z_o6jksI&8t4*#o-ULT1O6#MUsW@sV3cK4(O{#P56M<4CKX8-qZ-}k2fI{E#u_-}0g zHFx|srQJ`d|IOrotrY()VD}5?f9v*NUdDg!%DrFrpO^NRPx=3ob9zodvkvPO&55=H Q61=0xNXbiR literal 29192 zcmeGEbyHm16E+Md1cF1500|mAxVyUscZVJ z>z9)d`hsPLv-M@3R%z_cBQ9=|w3yUUH|qO5Or(dSlo)_lOHzrPV&xGB&oRQxpQ*_A z*WA;fVz+s*bD5nTjG4-DS!LP(*d-((p=6C}ZmadLQLlnW5vJHj5+il$I`^o+&vzkXNG z`kXpCYTWq$5`OX5^Uu=xUjILCN4T*<1);Y!o8fVG}KM3AUykQI~uUty2W5(zjCRe%338f`i7cI9R#!;FZv66Sf_ zG!Gu3&#Ar`Wn+2Vh@?CUMy2>3Ng4F}gTO`@TWYDvKcC*DIy)btT-yItvc&4Qn`HyG z&M*P591DSZnd>?%n^(MYSQ~#zffwznJimTvyG<%M^-{-X&9!yfo>%%6-tIW z6a?PXn*kqZ;jqiezO~ymptG9jb*f|M_1w*>|7qF@$*(%$(~fDcwLtIo`#)7QOd5m# z#H5U(`J!!klYvd%tk;rbb0284D3Zd?_XF%^$vI-m7xBA=%JTfTI~GQxeup*9M;(_F zAcU3kV$k)_^c~m0q%tnH2Bzi%bQRI-8St#G(ZU;>TnqPc&PZ%>p+?o5AyQ|Ntsi)i z4jtz|`0QpXLN4CQ-!W(;-iM*Y97nrMR5l>b^wg)^MoZ@;KsM=U@tte~Epkx^gCI@d$Ts}|kZb?`NKkjN;N zqvdwh-*G&XbFSUn`C+~rZmsn}&^mGscs;AkQ7T2w6;1hf|7v!|kmX=yX`A_V} zNq+XfCr}fyu&-?eD2I#R@6+^mMs3)^So936)6zXRGH&weus~a53m_XcBadIQnC*1u z)rnCwNACioCBl${VCV9}_xmMAN*~bz9*&!v*}I^9bBOhj`V3iipLAsJhPhef+{}S~ z+1GXIww$wk??LjK(U5;V+IIefv`ibUK1cj_bA4h#qLGW`K(dv32fg@06VDNFvzKK< zVFY}Oro$9ruI@!Vl;%&zSCq%DXg~Q5e-HmHvW0ie%h7p-+z^{tN{HZiXkhx$PxW<{&!0eoe`(Uh_Vl&UU- zL3sPq%rklz)J{!L;{v`;R{q8hv`Jbs`US^x59jYAihiYz~Gv~C|DTue>Rrf z=V1rfQBsDf-v}b8g@g~QK}_jpr=XB=s=?%KcPX*7!R>1kWNKQ8Y&6=~7XOWKVy5)f zRX?wLgv}Ta5&Q+4A;}#Ugp_(~|C|(Ua-U~BpbIpmh5g=egynVmpZq7I#o^HYp!|L;D3ze;CKdo8VGty$`s`<1q5Fg|g!JUtBb zrn=)|G@J)_QtY96gYpjH(6`g>P0c|op7XrLgHii5I>>$4+r6liETwrp^*PPjQU6N` z5;tfwm(^4dvgf(uKluWnXHu^whm&X=BgwrOk^lVsc9Z}5;j|lAbiSIKg?RqSGHm(oYV(BRdqPW{NQy)w|1yev>60&T1bR@Wg^0k;&Z4B9mlMKlXyfmX#1W@UtGS;7G4xa0PP)+q zimp1bfRCPtmpqZ&NIA{WEcoy4PNYN1ao8jSnxx^1!+w%`{kCu_j-qE;ZhxU%jQQ2N z+Rz~pFahD6Hx7671tSMton+bM+b(qF+sGCswwZSNwW2fR{zQ3Rd{Ub7!AFhr1Bb+0L;{ZhYm)^a~LUP{fHI z7pVAJ5+j!C>qleECXK?*` zNz^y~UStAorf*tf-_A9Ziau=2e4zd*h}66E4T6Y!9d9RRQQ^(ssEd#lc0&h#|s{ynL&Hs=2- zyPO`!<}B+2gXalOD^p)L6Q1t&0QPk&U^Y#rC9Yq|^z`4?Lje7eHs!%qL)j)lzTiKp zH5@pR(e8ztTXP)LQ(Y$b0!*s9v3N^b1+MfEm`pp#bZcMg$5wU z=}%3XBAMFIKXvFQ5hM|Y#3Y(W8{H?Hk3U(fe6-pBTafu7>a6O>WH3VhAA!H+)8Ca; z6)}AO?-Eg~<03qLtv3H)rJBQyoW`lj{ny063BLlurwe@(EV?LHzSSkgJ^1F+<(

P@lx;iK>UN@Gx%J1E1Hi`KO2ZWQrsmkr^i*bbV7F z@7O@E$po6C`KG2tpzk6H65Op=m2o1Jj?^`)r-g}0 z`o|0D!tCcrB{I75jiuVVdoXQfZYgu{!Z?x1Otr|j1X5X|z>i!Zs1x$z3(!oemEF&>_ zeULaXe_-B}+y7D(RM+h^M(`ChZaG(1z&}JFGgYN0mL)s;rv}VLDm*8D6K)N-lf#?S zDEP>Ppo={_x3Lv&3S9Fk=1_E4*Jb-7vP(YtR9LG33OEO*$<)izk|9|X(v#u@T|AI+PI zZk={<`#(5>=FM)q(dpmC^)gjuI=0#JJqk6f+7tv>@;zSF@MB!)CTAQa$v2p~&#L|J zprU$@gea834<|J*znuTz7T&@F<#~Vj_L5xq!`HM?TrrOJO$^W&+vQv%Is=c9pKTr4 zW@$7&.MP>6zif(_&8M5j(hBcVNa)m4U@n*rs)ItiKT90aHTTz{QQhe=b&ZrYV zO6p5-=aiD^`|7l|!c2nE$0zvSYgqd!-xj}gtC6!K7{b?JP6V<$6+RN=AK{PC^qmj{ z2p|1iH8o`a-x-@oj5OYMlK_P7Av^9G1Z!CSsRSufr6}!iPiB(4$1cS2XRC6CFCZE! z@c9qG7>iYe<$^}9L#&(cl4A`9lNLF-PvNby#HF)t>&DB;i-Yn`q^QYNIZ|J`jw;89 z!%*C86Tiv@-uX<6m1TD|kcsy-8=47g_rh&A)Dlt4-|}NlNSX%RU)kR1Rt5gEs^ss! zFWRjp_KT5(p0t+J_%pOS7G476NlZ;_$P1u%E8dXWu0=K5$UEZ1JPk%$axt{aNoVjA zJZPT`C8o%QkOhWZ*Ul!4Rs0QI=_;Dp4+XzFwmkoWfzI(q18I1VXU{Y4RGNEC=A|eG zzpr|xZwB$Q+hei$9XyBh13llTLROaS^8RiZkN9ULRYinA+zjgjZi4htvmj34`%j>a ze#mat&=GVCGH1*kqyj~!@GWgNt*wt(0K&H(Ikv>z0_3TFZ=LmbxWsWiKt@iS z(&&0Yqap?9cI|)F(WdX=exoP89m5{59Qu^j`BcfA#>fQ~j3>20bd*8+jdLzn!l%$@ zDSHLCO0Py=1=9}xTANncN~0p2*-7UTn~_bXAHw?_?JCJ_5*tfE+TCgLWM^ivVPawDQ68 zvP{wQu*s6aM7I7d`nk5k>U*`>*BFxoz822IxeM~;OTSy1@5_FmmMsO#2SzU6PdB!%WZLL~!#vS9dP|yem;L&3zT1TQ64k zJ;d9wXGnla?hc8tyus!r>Dm}tA)C5-PNwCDLqry zlX31mBfuN8iE5CL_k`fYs6uPOSX>(J;SsqK5Ebpx9wreyB%_2t@jA@51iiN`A~F4jA!)gD{tOzZW$ zH=^&&MNIOZ^6bkN2yfkrH_blCbjucUJwLhiKRq^sYn%|7Ew^sPyPt#;C%3A(uy5*< zrC4d9V&Rd{76%dO&?lZ~E|R=8yXX^lEkn$OI;4Dm$)*YCToZiQUxr-@-}bynYd-ih zg)8pi6vGNbkDooMs#{DS9{HTiV=nRFjOuv%0Wh+aAUt1{p-0609$z1qsCe-|R)8*W}+|V#cNJp^~nGr3Qnjq%pbi2}NO>|=9e7szeK=1BeJ9s!mnb&@y z^mNe^XAfS;MGQr24T|l_*=syE3y6Hln}4B+z`naz)t#IUk*LRuEwxa_RbVJsz9GEdCEc4dnQx^K%@y9w9Mx?G`+GW$1l|hrZ^goq z>qHppnI;C(iOj= zfczuY)oE7eQPk^hC$Ie&>|pw|oKivRx$P^FSLt6814Vtz(q`zoN^bsk+-kE#!W^K|y9YuHZB3SHrj0_bZ()~@F4($S)u$kU#Q$Q}zFa82>c_mqivQ$rn_!oL zjU*Ctb`F4M`U6Y4phVN9*p17lsdX5pM-9XzyT&SJ=_Sc@5uRo7kc=>hflw8BX;Ff-oFQ;#wwt?N)z*&Ywi-r;W-}oa<7C zm_+Z)>UAc&_Ms0dgD&$5sVUlauKSl&&lJndOIqv`tdlo>ONJ^X47jmmAfM2_*)cJU zxdEQz2kTtK@hdeCM!rTpZVdHOXVe&1#8^A(^Of>{BS~c_j!6wS*hiUtcZf`K*NcrQ z%gnkZmmK?v#<2|2DC&s!bmLE@Tb8`x7(zaZa=6onX=8*g^oQ{@R5yPq#)AthltDBU zUAIhWbbgT7|8UWLGEHk@&N;;WyV7QkAibfPpY8Ev9;f4F~_jnzC{+?x63v zrN0b_zDiRw*0il)CsvD3UHw=_n8V4Q)jR#7*EM`L05SdT3Y_M>Hj4AX|^Jak+&I zK7!Xcp^Y-I!!1gY4_it7IF|>{M_$)g`4TmpRMFJg^=yv}E-n>7)FE6-Zko6tS zpQ>rNjL8jJKEo9#Z2W*ljKp6VHYKB&l3XcLzJ;$0J)l>%&p0;QzOp2F%JWt_95JFw zI#wK|&+)P6sZ6+Ffr?Bsl*rL3OTt7?8=(wKOG1V==p1n_r>hgkZT2?GpYk8gpT#I? zF%QiuojcPR;Xa)Zg?3y-F9@GzIyNZUZe*C+@$KaTn5)&>M5?-fP_8*t)r_-sFCW!) z8``%b+D`6C2d{_Ynz2oo*Ipjr30A%kMgXSRX__C;_vv#utTD>q?Q3qD-On-R0eA^I`7I4T96`r)Tuz6`b9KzwqsZ8LNIFfBix^hm*`1%!uHHr zw5`iJug7&LD~>;Z;g`Hk=hyRj+?U@!D{4~1Yf29{C}T8zpB@1zssQkB%bK)au=n*W zAQZKk%|6U|<~9^}oFb^D5zJiYASb*?y_H$KeAUa1T;Qpw>HLoGXII`$!@sdV+C3Z;%>j&opUqu z>2>Huf#myr(_U~`DYjZe>=;x8Nc3>jx|a{?#})6e683gET(IF^vcv?H#M`Z$H;j#g zmnM#uoRwL>)ELQJ9{-Yuq?^ngLHHpR@{EjGh-m?zrA$jHe3F0Fhh{&rkS6-$U-Wjb z04S!;f&?j6NXO-=`#V$rzCF?N@&0=4>i}Z9zkmQl^%|`o&79jI4(k#iHSufRF_%eM zVqWQ1k?R>3MUjHjE_=SiYQVgSO+>i%COX#JZFv$3e-=~fO|vX#O&Bw!i84rGJymNw zUS#TMD*+N>R7*>BHmwGjR<-GMUvoI^+>%X3fp?>(mqk+e4LfyS>l!&W%{s6~JZPrg zK!+?VWrA72np1F83Xa2-CB#6YxX2Zu`b(u1FRxi0Jy|XDI8F8S3Sj+DF~YrhGj`Lr zkGF`qSB}S&X86Ut8N}x-!-)M*^!_S1iU7q;{fv;5EYp(M)MuloK_u(?bZu=l=;65U zq9(-k2_>}o^BX{BQQ2?n7sodtm~4!U=SR~9y^q1~V%@4}_kP5%dx2-}Z~NaO@-E=8 zTI)&7_qW@UBZLCDd^D@K2$c zSOq2IJwHD&rtd`icV1`uY(~8(NZrC1-7Wya6~F6R z%%i_3hlJva{oY03UJqLd;tnC`^xqM$oWa@5OXSYT*(mqESYPNFkkI(YQH-QX#G|3ye`0l74kM%P3)X~AxorJcu+jagP~f;>jLnr7q_s1*NtqNqkC>r&AO)7jQEMv-+O1T%ul>Lr zHyKn?+f%XyvZv;qc3n!p+L#OOvJQOs;aoX@FEnqwq4CJTqA{T{-bRh6UfnHjpDu)S zY4txR#jtm^e0u+QA1V5HH4R3f{245#;|WR_w8sO$Yeb2xWf}NzHq}2Y&W4h#iIl6Q z%Wq~LRTFV9Y7F=NzvQJF(gzCmW++4m@@oU z9+GkqA+%d5UwNNHyfw|AVYRumWkatuE*miIsshF zzE?K&<_9rbn^0a7R0K&Ay8E?~WD=jr9QrEo|S^M%isf z8W{Dta^sNTwj2?k30xR6Pz6(o^v#1qo_>&fFXOSXF=iP#wOWsz)&>;8caHkJUUKT! zD;8=Ieq6M*5|)94XbcOQsbLo&m9Pd?M%*oF#H0edLPWan6 z%QgjpNB4(y&40NFg{6N1;c$pL#{){g=JHZ;dP`lJz@0pVI_~1zd4h2z*6WaqWgrW? z84=1C1Z2xG%IqGLF>u!WZx z8YaE6%xH@FW8T8hR*=7du@t}EF1JL$RA26s!O1j(aWafyROC58)c>XtLHDS4IjJSv zuDB;|P1hu935(sVP}29xh6(P~SD2UemeHgRg`BfdRNLZk4no8os>DEQpEbS-)DonM z8|M#ahcqDCFU;30QMgZe{0SO_a`k*@DxXQ6S}YRveC$a19P}-65UBd9Y0%vcl1M%% z1)1Lw?qeXfn`woS4j*~JtOCt7pRw=x{5-hMYrUGhzK4FI{c0*SQUpw8dZGOF&K z6qRB<^5mpcqsMszW7i}C@eSvHH@a!jlSABRq_2$+rgJdTXQRp78SC@Z2-JkH(Uh^u z%8lS#M>H#@o+X5Kyw*b=n@Gc(Z6e%%{{GqJ)T>A0s>g7WAxA*?mS& z7Lpf&EU7lRu5yX~tO{M}&wF{PJ`&1%_BBlJcCQ6Jpq#LrzBQnI716ndb>YQHmj87C zA`Sr`{3fUAY3jRs)l>+x$YUX@ePR0T)pD*Wwx$G2(~Ra|&hNMO)fz5Kt^36>7-JeF z^+()(vg#An!39K0pXvwhLWpjg@QI%+#o}O{Z=Z(KM(w0C^@~O$g}0uKhWDK5rWV~6 z@p2(-JXMmO-e{{369fo0GaB?o=O7W01~C3*l!S<@2*?cgr|&>bJbN*VE?j9da zw}V_#fPx-r@mS{pFZs;@dz5^Y7glFY6!xwlQG@Inee6SyxDyu$!k3d5a4`}N*c6Uf z(_6&ne`;JR-8g?BlQ#rjT+A5-VNA7rOMVT*?Kk^+C}T6r#g z=o`D?lP#%AnLYF!1*v2_#gwjR2^Dsg#9A@HZ6#|G9!0w%%XcO!_=0E%&f0&opdRDWu6qm7Z<2pwk3xgc9QacI_il2*g}Q&Qyqb;gSEi#~VnG#y>^ zuc;!S@5&V_KX{Q|5d2P!`$??VJhryAzP6)eUHaJ&Mx)2%JJO4!Q!9~NMn4)u^Kr~0 zF86nlxhvIrFo6hD(Q4`Sd+?D}=Rkv8*PuX|rr6L~Ch$R7-pz*$aK7u8POLp-Qu$^gl2CYKW*d?XjY$s&de#1%0SQCfNIt%4S^~ zh$|j`5c=KAjRoKmqiNpvg(!A2xdrO|wQCJ7dKJ@^f>JpmzJE4s-MW*z@u-@dNe2)U?b@qznJ zo*a}bD;wCe?!CljG9enOTXMb~0&c|#DAkz&%w@fUskcJb8Hu6vulaB^g<7csA8)8P;{^#4&js^@t?J7|E zaKku?H3mJG-}AfiI7wFSql0s|B8AJZQah>TzAk@NG{!_IZ0w`@gAMy@P9~8u1YqGk{XQG!Rk>6Im)v zh!s+m8j+-iONtW-Ts0G^AnkElv79YAm=Nvp#}kZ87*LnKp($`!H(F5-aTXBt z8e&VU+KCgr;`lg6G~PPmozBO1qN6qRIr#Kgjc_(_wTr_GGi zaoc!ZNrM`SvI%bQ9kGX0w#h|yLb6oL>i2_vZZO%??rtAj*jm(8zL()%NMfl6zfnp**2d&zWB(c{wgpdpC>BHE*FA%I9hc z-SNu;$StPl{%pXnd>=4{%(NC&mM8YY5Sg?3GVq|8fi+t zAs5Xe`9S|MXeVvyM9`(sU72mc%Xe8>VUXvS%tW56+(8oBOqPe{9*%g2hwogF=xwh1 zR@`353}G2BjWh~>Obi>7N&Yr_^?^(%frNJL8NERQZGx;&IaP{+1!JY$nGLlL{*w&H zA(g5G)$pX|R5p62(ug4+~zu(<6URDGN9EAUU!+AL|Li zSrM#EW|z7yL%(WK7=>jB6+;LW;mnS2sReA-;6-vUG8?^LaPp- zT{4qiD%qtLy-`LglEe_J-bQ-7o@e=a<+)yiLph^SqI5XWDskZzl0P~bGsT+n7@qUIf4${x zsw!8KgDkb!TXs_9{PQ{BPss&K(`D^*_OC^!)arT9XmF6<+#K#LgC=#?YJbiBBwCbN z`edam3uC@jIZ~pF#0I=!V*JREHv`7k|DY~3b%R$FUn*>9Lr$!6sc$%$VRkbLcuBv? zI1~5fYA3@vCp|I-FJ}E~4%kuJHYWLQaEVN5{CuccboG5XlsLNBpmWhFbqe1!3`guY zLk2E@-d$#azv1j+uDOnKwfnI4sJZmb-4?h{x&aSr^Ms{=EsYdwTGLYO4WtGs(I7iS zc(Om0o-{t!V_Td-BhdN+^ksi}Z@eUX&LX&h>^vHDR1e;H+7pLA|KQ=`3-1FP$#;iU z^3c`LJ`*^PU?fZxN?N zv~l4$4TBJxT#4dM$x|%&^s9`=gHGDuW!TJgb4PN?fKBlsvej%l&Z*R0+S8G) zhnt|=&$z^8<^5#Ua+|eRw}wqa=FgXl$zEsX@(NMF_ca5lN)ZC{hGVTjSV_aqUba1~ z-@oVCUgoI~`o>3r?N@a_H+49E8!bzeU6hQ3LPlAl?p zZqt0nO}4v{;|^rM$*8iB0wwAUe;^9T?=qe zEgRAz@+_^L$!g=@Z0vvc=KPj6q*mp_FnY^=@x=LD{Hbe3275`Mq4>x4V2^T-1fZ|Qd=Ss(Mpv%+*{c1bHTPN!$2`T1V3lP)iQx^28#C+Z{ z6DOoO&(bF83AVYM5Z5S6{veelQ~6Q<{PA|li&U4&3B2r~aqpup_ny%Rfg0DOKO+dm8q7~u`tR-7}4M)_mJ%9Z1_)|HN1fjCsC_7A& z&$Xg{9&l5EJ3rnu&zHbuFu^khJEx?_l=g=m@XNg=Q8f?wqRbX`i_(@`6toxmOfH+; z>r8sIMHx>O@{z(ew$Vi;UX{E8vws<3CSTQd8t{xn$&`bDz@abndw|&CcRmj^Np2Cw zdZ#=uL+e98OJI#!#K$-Eroh0u>IuPeca;QPMfCx`-#rD;v2^hG{bE&3CF78xYxJstZ#y|Suou(wLl=O$dM2~9+N#j#*>oy z&s%$_?<{j?^`J%u&sc1TxV4A2-Fe*=mrhl{L=w68)h#Ux?CHU*rl#owvXJ~|VsCn~ z-tV95BYq)xB3Hfj9jO-qv4)H(4d^{P|4*b)JPRAh>CwSBVR_;?PI%+Nwi}>Uf*CvwsJI=XbaYTsulP8c#O?AM4pYaG>eo`)4;}qB)B{1_B+aHh7_ki}3MKI140(;E<%a@y0^Y=1xus{rO z^LKo4F~3$k0V_JknNEL=@L zbRT@>&hgL$<%=3k8r`fL-{0AQ>bkfd_GzAOdr<-G-3~XKF#z_?k2lxzfKIrbx4vjp zjHv`_4iQo#BNQ0K18!qkH60D&MK-yd##;ALec{xhZak+QfO%uBHPyUG0S0tj2Ew19 zNvFV`42pMIW>2?3fIOK6xc4R8vLN7MbSUu8dyq0aZ~KW5kXxeHz&u!&E^m$T4IrVK|Q=)kOl6{QPxlu9|9jkNBzNUfxB-&d0t#wDGfN{%hhTH zbJvPNq@s&Pff@?m4dI7oO$if5gb#%rmMc3E7mv55&oO+9f1AISYweYanCc5{$^MJT zvbFnQa_Be-X9Zas1+TkbQum=T_3nKZ?2V$R-7X;32|Qm|{zw26*?o8V zCl-0;8g`kN<7Ieirciv_i2rM|N_N|iMBr?{L_?#>1g9$nN{Ig*^xIPGAn;gp(QGW> z5SpVw@7`cTchU8jUEYoolxM75DxI8)-!l|t{(OdSVx9%VQ{+mW^5^FQPG2(+Uk!k$ zwerIVo;kfp?YjWFF&iJ{TA~F}{5K9hFW0<`ybej^At74SMp9v1c$OlsqQ27|cxZEH zzY3N^=AY;@#zNM8l{EJz^3iTa0Z&TPjlz(7VxNpb*B3r!_aN7q`>U30}(hUNrRAwAM{kTAL+WM+1CJW#OH_0pj~#9MKbf!yYiFiMdBYO4FYCe){#ifdkXb zz9#vOXjvEZUPkkzDWe(Yx{2SZ@`sR+hQ;z4EoCYN4K_d!h-`@H{Z%=F6HdP73WU?H zZ6^)%MYzSfWzF@k!yf{_5l|E}2KGnX$()!(-TPnbP$p5**IK`oNF5QV0bM;5N}4JH z;xAh?X$O5xX-)Xhn>B7qqM>SSa1F|Y45o%xF05g|G`C&3z~$O>`L}FSMXif2T@yO*#aa4p83(R#q zh=H(+B3K-Kv-dQ&!l+gY)^om9o9g>+iB;qHZPv5f*8$A^pDB=h&srG->Pz64apOgr zD^XTd4HV)^u+2;QEIp<%VPw=zj*9I#{($?}go;SQeLeV2-vb}OFEd^Rr>FL5pG&qyss|a@`}0Xdj}Iza)Qj8ku;QSfl45@fCRe;J_avJ5wmk*sU*bUn#BTHhpF2QCqup;u z7y|F<7l-eZiC&~O$|X;VUEUHP{cdSKFCM)!bLNnsY0~k8P&|*N+hMQBjU+ZU2OonM zs+=`>sm;+tQ4cP44d^OM0EYtD<=OyDvmtxwHUc)0saHwxaFHYtEco5z)!Nw?rLRGQ zNQhl1%xaN8&*$X3Q9VUBkSP!1WwS+cw?$MR=&0}0y=r9Knv*v}QJ?d7Ovt2p z*StJ9Rn-=`+Z+v7@TWE*)lg{nJZS}pj1L<$|B_qvQ#4bSp<+WSy*0A00he)O2sjxF z(YQL=rySv%>MB|&fK&Q$ly80;2xX^^ruv0$ywJoxsk+PaRz^>DPKOuEP4pGYI`cSuKo zlSrD7F{{cC*~66Cx-zz!OHDOKCHpA^BL^dmJ&8T|WYoDBP>6B!9# z`D?ENHt&TFXq{`ub>p_Gu*o8Dt}zfd`1g#8!WvRXp%YDQ%Jd2mv};%zT-J2B5)p%M zzR{w)k+^7k2MewyA|KFB+sKhd_@H8)JvW46c}Nu3+A;dKn~#+b(e zbU7m4oi`1vL?pd3wailD$&IQuSgi7*Rr~xoSBd@8OxR&;R?!7D<1b@9V|QAb$@WiB z;S!r?bIL5vBG7Kw6zUHSX^dZUwleWHgj(&heVS2aFAuT%H{{7SB3~}8)Q|8BoJokI z?3#Gr4L<^$*tE(xBSqlNv&h{RFarDc`Lj2Wd$))mzP~oDH-k~f0ofx)ob?}Xj&rUT ztAY$@d#cSo8dTCD2XnFeaARgEyu>==K0j`E=5B6;gAuol3Uru^xWt9JGN3)@d;{v_ zM5u)0ymP~VGZm)7MM1`N#sD_ODrs$sUz;-D_jE`a-o@dZ5bL|{WtICXz8dapEIKyW zKa(9`|CSZbg1a%s6fA1|LZU@Y*z2M++}MA%R6T;-+qFamn10)kR&@! zWHjF0$h(DU>%S*ecD=&PI@56U${a0&6pqag6)aH`hU|Tjsi#Gq^IGy9TIVp+_p-~c z$*u-xtCfeIl3~1#ro(ICU(sIRXEbU`=;B*`hPWOvSspLW{@yNIWB)Kk?Iv&>(}1^k z=&tMGTa9D>4l;fs>&D~lqiDDWZp1L+wn2Br#pXAE#mg&iGS=RLhUByAO(7}hwg|PJ z6*mTZ1&P<0H3es}@9VvZt2~ypyrT5rKn3|Q+@YrV`wo9Bc`UMht!VRJTF#nMp@)n`w})oJ4c;Pnu(NX}c8W%*3wqJ?lpX`kn!j~-9e zpKflv8UqpkgF}gBYl%QI{`wfa1aqN~#4v_3X4=;t)M!FrFBcY0BrB;d^4}PEBsn-Ud#T-a}P-0s)bZ^bS%*K!nhxsz?(=si7!Ms-gFe29PcY(n0A6 zND)Cq>C$_LZ^v`a=ed8uojc=9h9Nu2E^D{-?sxs3S31S4E07^Tt13R^ufd2uUM)8@ z6cUP1cgNfKZXOs;pg{MQDyAU(fpxk&qg3hLU4sWV?h?dU`g1E020xfThJQ3XK^U42 zn(Rxp6^`$}&>L@;Iaw~X%T@KhKn;(@FqB)NT_KvB+D8!1XDERi5jkrG-|>}Yl>()7 zs?^>G?xNL<*h5o|lK#3R$2PRSQgv&)`qBBRuek>A74#7S#Zq_^T!XVK{qRP&uwLe- z*d6)Cm*>UNR@+Xy?bK7<(GLhev5V!N9KQ5(LhZb$>E{Y@H*Q0hy zyPCp zYIP69D5mOm(gzknQ5|W^=)XxW=IHev`A02jVu+TR>O<%SbajIkh<8XVhw#6 zrqdpNSym$a-ND?xB)ul<<$xq&^rz;`G>C_1Mnd(BO+=yUad?xxu0JPqj6Ui01-S ztt=;p531d+T=9*$E|X-Qg!+JAll)8`nc>?Gj|sk^mkK1qUwm8}5;{MF^G)3><-o0u z9ni(;^?mdS4;p}+Q1Ric2G~{gyh;(GtL7QR#g_-Pj z@04rEZq$x6A_WOQfMps98%gLkZt`;s1l2s!8*$)lJZ0#}QR3>5#I~7uy>klJgT-?G$oQ6*`Psv|^v=-Kbu_pg5J0Cj z*D*O5$*1gM<@>%)D^smP$v;t$HwBU8Bg7L(Os2pZ;X`pdVwv>Qd5~Y_$0=!ksqv+- z{t?JNcLX81?4m`Xl%dAsy}C0mmPgOlMP+xuBn2Y%=-BG#{y1c(EU3ko$tSbaEWX;> zYG*oX$(zrVhfgfK5Oln@h{XB}A(=kp=YZU^>;0B@GU_IDI76mKGjd&wTnVZLrI*z$ zOtfgc-O_F~LC^}#M%>X_ulXn&sWbD_efDyjtCmieKki7)rxXW@X4kNFp>EsFZzj&3 zMV$OEAyhxMnRrrsocDGt#uwA8-!!wj%)W@sgr;p1*N{vmtMPMTGQqezGP>UbAr#2# zb&>Q#)H&8DTsm?C54us@cUYLrZ?YCO$So* zq$00*8mV|ZWPK%Y(q>5Qw=R9)<&C#=!j%uDwGEEAbh~ps!cRKzn-6r~&%#*tOcT?q zDz{J)e}KwC26x|m6%(9^C`7!$yy@#&+0Y~P7N&0+>f6*=rR8=xuxEZJb`ty%dE$yKB-{6^ zpY&amEAOw_XgU|2u%$a!{1zJE3o&sj4;ptL^JF>bkL_H~(Bs*j*thqjBe+9oTgMlO zPQnm0l^R;?>TW2P3rUCUQia<|lTBSH2{#{flRVI-_Krs^AND)6h?u7O!Dy!lNKJQk{YvWaeY@7)lWP1drN zy5mn?(RTH^9t$q}VQq&clT+2b`)l83wQY^fCw*b*9DuOkHd&QOpbz(N4XZF(Sr4*( zmP(7tIB*Fjn*R~*O|0&zFFvZro2bkzq_3UAIYdnv)UegPbQGMeWj)^Vgs0f#Ha&xh z0-sFqh9_Z6sJtNVi6Dz-`?;VJg$CEy6%I0>1mS|n54Q80=q2NHOFCVgU5Doz=MwCj z@QxxeOWu)Ij~W*MwJRfi4;ny5*=!h#e_I^V!R5Ro{Z?my9yLRLh0yshJCpM)BM(ux zqvw2^ZEPWi4aK}w<%8NV$<4UrFo3MoT4|`+KWr6(QwA7A2#(tDpX}UQ&uI;3s})kG z{19;qgw&ItVQyb%9;3vm0Y`=P&7#j;V2I!b@Xds2xo4&0y9@pnX&Q*Am5lKkZ1DJSd$b{#@;pHUAj>Egx zO%ev%;}L(sJGhL|wF5PiLQx#bCYb@L_&s0 zill!vWW@ab_{GOPxpP4d2CvZ|N;*4I4*P}|yhTRyJo!RB4Y_r5891^U(Xv0Ua%f%X zdSr=+Ln4x?BISIi$&<|^EbCx4=cj$zQhnr#t}`lLaBKGWF%rMLuAn@?{aQBk;`KZ>rC zkLSW;r(nW`s!{9LzfG`%G5} zy4@3O->v#YO&67*6&Y;hy8dC$5K=WiSEp#XA(s5YkMat9DIAPCwvL_WFl2{);2hf0 zo?Uo`!u4^^3HnXYp z(HZ84lWsbB3|~#w|JOnk&KMVJh|YY`Vo#hL zpHyhV^R8d|wM!IR%jkUe@WvjkDfxLK-kuQVFAlSxoTKBqcaG&nDFW08H*h#;h}tB3 zhn4K1c=&4iQ57NNoaB|hdtp-xVcBxUf+WWeR|Kx=<&mz~Sk8{w5cP(z>|s(RjUpaX z2k=ogUwFE62livZ>-!fQTE<;WN%^(k*6nSy<{(}g=B>;9VR*RJbB3Ze!=%0`jgiiXnRZ`y+Nxjc0hi-mU8p(r*f15ge zD}_d436_b9yz`;>$med*ZNr~z`4v}os!9n&C~)I207#&ucR)ia+CL$Q5#6}jicf^U zhsb%~W#kF;bRK#1+;qkC>I*?in7jsLFu+&A@oXMW!y_i6>JKbiYz$sUid~yAb`$}y zHn}*t#|{;uTuL8+i06fB6*9>*u$gU5qSu%+EE+6T2cHXVC_8xr&Q2R{(s4iUm!0_N z9MLqm_Rf0_Ez$9gCxmG}WcS+FDv<^PiSM0so;!o6y{r=Ems6*@JjXkOWrhv)Rc2d| z&VG5;ugj&WX1;I6->n}#2qLg$kH4g{L8?-&CHb0SoKL5F^Omj6#g^5ed)}O~&11Ky z_yN0TbL;jJd(3^jSZ(|2HZl|~iyp4vU4Ai3-bf@L!?jX4Nphy^^P0ywSqa30C5SRD z+m0I<_Xb2q1+3+QJ3L%C;fQ#~BkkwaQJ-k=ki+Vv!SU$CGG>`x1BA=}2LU$|?S@+e4G~h%I;-WCz zFbL}wUU(Bvcfq7NT7o)HpR>(Kl`!xVJbu>GX94$o#KNu?Z2H&L1e@YyoF zEgb0r7$KZVS?>2P5zhdM!@ZHi`EB7thc~w;zQeZG1<5@SF!Q?wOmY1h1~k0My(SsY zI`xtvwoDvOc*J~*Zp}HS{X*fH)BZM7(K2>`{)no6<-+^`HKTd(^~uArLdQcyyqccU zG?L@W1%_ZxZw4|h)z>g{079CXD;iSC`c9a2@Pa)w8y*}RI(c0eHb*G7;%d3LI_2Fr z<@4$I)(<)Z=17LMHYFT6%>vH|lRW(oH;{8&>Ns;~404mDeZe)^*2l^@(7Sax(sFu# zYJ2H9Ps8wgBa7=7=y%l=F|aV7*HA5&W2g`8S>@rcHxiVc26k|pt=ZTjEX zmC7sJV85fPVVgfSRAI;FG8aEmDtsLpCE1Pt13qqEj8V^{v0A(|SBuVaph9%sBj!L% zM#DCNf6BmO63+{mKR7{K@9B!ZHMd>N_4&d2@o7|=_iWViNKu7o0RB{N^wDbTY3^A& zV>iV+JtCDG26MInH~|vmGWy4LvvZCEtm}dKiMJowbp?D+Sy{}_elAq!qXK!z1^PF7ky3dgK>;=FY z&9E1{eqxswxExDc#TCIi-KNEfKeP&CL&RUEelNtwW@~hK`s^DF%_7*kQu40XE{Uxq zw`kH8^!~4yrcikmJY^SQjvK1a@EO{i4y12Ly&eZQp=D017*KcA7E`;P;^(73f?NI0 z${P6#S?7I9{zf*6avGCb-898N61H-%x0!ogZ?E@QZk{onPL*aFeazUrxGb>4R4rG` z;#Rm#C2gbjRyk80*f1G-pGg(Mn1<68*Co}#O9KKm+-{C;z2s+hiyMY!S>dtI6Y$Mt1RBl zNnV}lS28ISoaOOWOTu>9>*Vt0CmUuLyP1~ru=`6n2|fJ#hsWJ_Pr{DGd2T`~6#Xq9 zaw1s$=AM!takJD;d4v&P^+rmFeL+ky&h)4mb3n&m&84~ZKAsv+lyu3;tO_s+fh}R% z&Fsx&1WAbkgg4Z^*Gr7F51;Pvun6kV7{vK06$Rn{iOkN5HWx9Ez*B zP6OEjsrZyLNWyZfe(6evN)vsmAqv5`u_nL#H<3*wMT3}^*HBW!fgd=^#?0+B`OS=- zOGGENkM^#bO%XH!a>Pr*R+_t8UGEI6eYD_%0Ixz!&L=7Y-HjR;-oJ7( z!zkpl9SWAwS+enn~&b7HG z^YH#o8wVFDLf*bPjQ^qkwE-)PyOkSGTb%OL`>`N zv=|vMSS!c43SayNxrtl`+ZySZZDHB(wEw;8$y6A^N7S&!Dx^LDA^VO%u`wcc;N8~? z?hXQCt3%gg5E0!dv)wDa2cVE9`)gkv0p7C;OFgY~U$$fM*$B8=GwBMSSPlX0;c__# zK}`UdYkbhMTLDPKpFL(G4QW1}pY1obiqbfhO?fRFm6|sY$@v|xioIK@=vfJ(GcQZF zOToB&-Cg@S(Ri|@Y2vk%zY2Jyd_Ls+ulH(!vhPbf^-WX9HwiE0FY=-${S7<~XTQ%4hkUv;P+x2xpJ@W`?C_60OT1iX273*=BgpJ0mAw$noxV6t)po) zjBH-00R9>a|4I|!mVPd6I{V@p=?84IH+<{nI1marviR=k4(jfT5<_e-y^8#=$UxKF zsheBh4(n;s@G{m+c@;UuB=6m z9q25t7R9bNQZo_4vQY zu!+Au)4^g4f5HTo@?s6Xx<2PFC{rSrsVX~L%#G$F@wCPii`2^igfdqp8 z-`VFDaCOa;E#0A%WWmE~^z{XkHfkXX01yaPV0!~$3wkJsLzeWL+n7R|Q4b*gcV z429KMU)8vOD0!-loyj|&mpg%-1St-F7L~4m9IAq7jXPxzXQ`z&+o_3_oPjpmdnA{X zK`2jv#N7R_*MRnhplDX?BxYob9AOPZ#*TIkHzYgvAZvd$d#Dus
TSm zLNdI<6piIbR*v%*#)GWVA=JXt8WK&=Nf`bci6FsSXv2F!41wiKidXX9nj}6=!pOzd3*I4wRRa8akX~e*Xy^Vuo5o#gqG-t{$o%g9Tmnp>|{RG zS!2}jNMXor%cSC}Yyx@(Y&Xg2jXF0#E!jDnpDuKAMDvpPtpiG~sAj3ij)5EgYiK|k zC99dvL)kH)YMH8@bk*cwlTaE*GCv?Bnl<-cy{%?vUe8qdGv?&-5}=q{ z%=7Kl%-)r!uH6sAqmAk#8(%8P3;)b(GgAf7#zh!zfgMM!kdiGy?Uq_zuZ}644GCg{ zOSrWNpKP??(-nBM53&?>0lt9)F@Jp-ll!pl)daMLEg&<_rZ>MCGk*XCvUUpD)(6QL zn&dG149|JNAxJiY9T91o34}$K-^D&jP$juF;=gik=&)zu5aYWA#6FVXz7LUAFCTb_242XH!>k%{0qn+F(n!>^H*R3{yi$w; zYc4UaBxLmaaaw{+*iB=S9LW;ic-hCu4 zXKxNkLuPM{OZ#FHe&b^0?ZH-G5NwW5`wQOr3ne*^rO$dE>0x=e*aYlF!Z7d>v`8Ds z{^MZuVacxBJNd?cv%-Htl`&X1i_wee|NAWRkQD4NnP1X;^EWa3*OL+Su)c}tj^uy* z1`#Y9x9?M;{okzXU&P|P>!1YtMfW@ZRs&$#vE13zm;57CzthwrK?%bH%gvzq|GWmI zu^!A#i^5-C+~3JDjGzQe_E^Ag(r%U~XqH1pqrN7X9{;>pMe$_xq$;p29jJoTxj*<) zfU#sBh=G8E0k)#o8e;TVr)Si%u!O)Sxp_!4HS%)u9AlvqdyE`j9p}Wl+nO1=gnDrv zZ^=s#Jf`A58Gi)kPhOZZocLVstd`G2}s8oL<%soKAT;E&b* zjJ1E4gFiDAoQ?fiJispaXUYAuQT^)={n)>eq!pgCflshsj=PZ7y>(Pajr}HguZ^cYN2o{gD;rk%EZ%ATgt4*o zzq4g^p>pB58DsT#wzzn Date: Mon, 4 Sep 2017 14:59:47 -0700 Subject: [PATCH 46/71] typo --- doc/design/var_desc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md index db6ea2b2e4..545ff161ef 100644 --- a/doc/design/var_desc.md +++ b/doc/design/var_desc.md @@ -57,7 +57,7 @@ fc2 = layer.fc(input=fc1, output_size=20) ``` ### what should class `Variable` Have 1. `name`.a name of string type is used to mark the value of the Variable. -1. `initializer`. Since our Tensor does not have value. we will always use some Operator to fullfill it when run. So we should have a inialize method to help add the init operator. +1. `initializer`. Since our Tensor does not have value. we will always use some Operator to fullfill it when run. So we should have a initialize method to help add the init operator. 1. `operator`. Variable should record which operator produce itself. The reaon is: - we use pd.eval(targets=[var1, var2]) to run the related ops to get the value of var1 and var2. var.op is used to trace the dependency of the current variable. From a266a222c74f2bc32df740b8a7978cd79c420a0f Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 4 Sep 2017 15:05:21 -0700 Subject: [PATCH 47/71] Add initialization operators --- doc/design/graph.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/doc/design/graph.md b/doc/design/graph.md index e59d04e1f5..51b7f87638 100644 --- a/doc/design/graph.md +++ b/doc/design/graph.md @@ -1,4 +1,4 @@ -# Design Doc: Computations as Graphs +# Design Doc: Computations as a Graph A primary goal of the refactorization of PaddlePaddle is a more flexible representation of deep learning computation, in particular, a graph of operators and variables, instead of sequences of layers as before. @@ -8,6 +8,8 @@ This document explains that the construction of a graph as three steps: - construct the backward part - construct the optimization part +## The Construction of a Graph + Let us take the problem of image classification as a simple example. The application program that trains the model looks like: ```python @@ -51,3 +53,18 @@ According to the chain rule of gradient computation, `ConstructBackwardGraph` wo For each parameter, like W and b created by `layer.fc`, marked as double circles in above graphs, `ConstructOptimizationGraph` creates an optimization operator to apply its gradient. Here results in the complete graph: ![](images/graph_construction_example_all.png) + +## Block and Graph + +The word block and graph are interchangable in the desgin of PaddlePaddle. A [Block[(https://github.com/PaddlePaddle/Paddle/pull/3708) is a metaphore of the code and local variables in a pair of curly braces in programming languages, where operators are like statements or instructions. A graph of operators and variables is a representation of the block. + +A Block keeps operators in an array `BlockDesc::ops` + +```protobuf +message BlockDesc { + repeated OpDesc ops = 1; + repeated VarDesc vars = 2; +} +``` + +in the order that there appear in user programs, like the Python program at the beginning of this article. We can imagine that in `ops`, we have some forward operators, followed by some gradient operators, and then some optimization operators. From d1fe87582f73ac99056b3accef20bcc0ae0665a0 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 4 Sep 2017 22:10:10 -0700 Subject: [PATCH 48/71] use block in demo code --- doc/design/var_desc.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md index 545ff161ef..9175d15193 100644 --- a/doc/design/var_desc.md +++ b/doc/design/var_desc.md @@ -61,24 +61,26 @@ fc2 = layer.fc(input=fc1, output_size=20) 1. `operator`. Variable should record which operator produce itself. The reaon is: - we use pd.eval(targets=[var1, var2]) to run the related ops to get the value of var1 and var2. var.op is used to trace the dependency of the current variable. +In PaddlePaddle, we use Block to describe Computation Graph, so in the code we will use Block but not Graph. + ```python import VarDesc import LoDTensorDesc import framework def AddInitialOperator(variable, initializer): - # add an initialize Operator to graph to init this Variable + # add an initialize Operator to block to init this Variable class Variable(object): def __init__(self, name, dims, type, initializer): - self._graph = get_default_graph() + self._block = get_default_block() self._name = name self.op = None tensor_desc = LoDTensorDesc(data_type=type, dims=dims) _var_desc = VarDesc(name=name, lod_tensor=tensor_desc) self._var = framework.CreateVar(_var_desc) - self._graph.add_var(self) + self._block.add_var(self) # add initial op according to initializer if initializer is not None: @@ -117,6 +119,6 @@ x = Variable(dims=[-1, 640, 480]) y = layer.fc(x, output_size=100) z = layer.fc(y, output_size=200) -paddle.train(z, ...) -print(y) +paddle.eval(targets=[z], ...) +print(z) ``` From 9b36536a0b2a8293b8936b175168dd047faef770 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 4 Sep 2017 22:13:23 -0700 Subject: [PATCH 49/71] typo --- doc/design/var_desc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md index 9175d15193..668f067984 100644 --- a/doc/design/var_desc.md +++ b/doc/design/var_desc.md @@ -36,7 +36,7 @@ message LoDTensorDesc { INT64 = 3; FP16 = 4; FP32 = 5; - FP64 = 6 + FP64 = 6; } Type data_type = 1; From efd40b66cc4bb898fe152337aeda9f65ced33767 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 5 Sep 2017 14:17:13 +0800 Subject: [PATCH 50/71] update the doc for how to write the operators. --- doc/howto/dev/new_op_cn.md | 141 +++++++++++++++++-------------------- 1 file changed, 64 insertions(+), 77 deletions(-) diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index 3e71a0a592..6bd54137ba 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -23,19 +23,20 @@ - `framework::OperatorWithKernel`:继承自OperatorBase,Op有计算函数,称作有Kernel。 - `class OpProtoAndCheckerMaker`:描述该Op的输入、输出、属性、注释,主要用于Python API接口生成 -依据是否包含kernel,将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自`OperatorBase`,后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写,简单总结Op需要包含的内容如下: - - - 内容 | 定义位置 --------------- | :---------------------- -OpProtoMake定义 | `.cc`文件,Backward Op不需要定义OpProtoMake -Op定义 | `.cc`文件 -Kernel实现 | CPU、GPU共享Kernel在`.h`文件,否则,CPU可以在`.cc`文件,GPU可在`.cu`文件。 -注册Op | Op注册在`.cc`文件;Kernel注册CPU在`.cc`文件,GPU在`.cu`文件 - - -下面以矩阵乘操作,即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。 +依据是否包含kernel,可以将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自`OperatorBase`,后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写,简单总结Op需要包含的内容如下: + + + 内容 | 定义位置 +-------------- | :---------------------- +OpProtoMake定义 | `*_op.cc`文件,Backward Op不需要定义OpProtoMake +Op定义 | `*_op.cc`文件 +Kernel实现 | CPU、GPU共享Kernel在`*_op.h`文件,否则,CPU可以在`*_op.cc`文件,GPU可在`*_op.cu`文件。 +注册Op | Op注册在`*_op.cc`文件;Kernel注册CPU在`*_op.cc`文件,GPU在`*_op.cu`文件 +实现新的op都添加至目录[paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)下,文件命名以`*_op.h`(如有) 、 `*_op.cc` 、`*_op.cu`(如有)结尾。 + + +下面以矩阵乘操作,即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。 ## 实现C++类 @@ -43,8 +44,8 @@ Kernel实现 | CPU、GPU共享Kernel在`.h`文件,否则,CPU可以在` ### 1. 定义ProtoMaker类 矩阵乘的公式:$Out = X * Y$, 可见该计算由两个输入,一个输出组成。首先定义`ProtoMaker`来描述该Op的输入、输出及注释: - -``` + +```cpp class MulOpMaker : public framework::OpProtoAndCheckerMaker { public: MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) @@ -59,20 +60,20 @@ The equation is: Out = X * Y } }; ``` - -[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`,构造函数包括2个: + +[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`,构造函数包括2个参数: - `framework::OpProto` : 前者存储Op的输入输出和参数属性,将用于Python API接口的生成。 - `framework::OpAttrChecker` :后者用于检查参数属性的合法性。 - + 构造函数里通过`AddInput`添加输入参数,通过`AddOutput`添加输出参数,通过`AddComment`添加该Op的注释,这些函数会将对应内容添加到`OpProto`中。 -在`MulOp`中添加两个输入`X`和`Y`,添加了一个输出`Out`,并解释了各自含义,该命名尽可能的规范。 +在`MulOp`中添加两个输入`X`和`Y`,添加了一个输出`Out`,并解释了各自含义,命名请遵守命名规范。 + - 再举个[`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)的例子: - -``` + +```cpp template class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { public: @@ -87,17 +88,17 @@ The equation is: Out = scale*X } }; ``` - - 在这个例子里,两处不同: - + + 这个例子有两处不同: + - `AddInput("X","...").NotInGradient()` : 表示`X`这个输入不参与`ScaleOp`对应的梯度Op计算之中。 - `AddAttr("scale", "...").SetDefault(1.0);` : 增加`scale`系数,作为参数属性,并且设置默认值为1.0。 - + ### 2. 定义Operator类 -```c++ +```cpp class MulOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -121,20 +122,20 @@ class MulOp : public framework::OperatorWithKernel { ``` [`MulOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L22)继承自`OperatorWithKernel`。`public`成员: - -```c++ + +```cpp using framework::OperatorWithKernel::OperatorWithKernel; ``` 这句表示使用基类`OperatorWithKernel`的构造函数,也可写成: - -```c++ + +```cpp MulOp(const std::string &type, const framework::VariableNameMap &inputs, const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorWithKernel(type, inputs, outputs, attrs) {} -``` - +``` + 还需要重写`InferShape`接口。`InferShape`为const函数,不能修改Op的成员变量,参数为`const framework::InferShapeContext &ctx`,通过该参数可获取到输入输出以及属性。它的功能是: - 1). 做检查, 尽早报错:检查输入数据维度、类型等是否合法。 @@ -144,7 +145,7 @@ MulOp(const std::string &type, const framework::VariableNameMap &inputs, ### 3. 定义OpKernel类 -```C++ +```cpp template class MulKernel : public framework::OpKernel { public: @@ -163,34 +164,34 @@ class MulKernel : public framework::OpKernel { `MulKernel`继承自`framework::OpKernel`,带有模板参数: - `typename Place`: 表示设备类型,不同设备(CPU、GPU)共享同一个Kernel时,需加该模板参数,不共享则不加,一个不共享的例子是[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 - + - `typename T` : 表示数据类型,如`float`, `double`等。 - + `MulKernel`需要重写`Compute`接口,该接口参数为`const framework::ExecutionContext& context`, `ExecutionContext`相比`InferShapeContext`增加了设备类型,同样可获取到输入输出和属性参数,`Compute`函数里写具体实现时。 - -注意,不同设备(CPU、GPU)共享一个Op定义,是否则共享同一个`OpKernel`,取决于`Compute`调用的函数是否支持不同设备。`MulOp`的CPU、GPU实现共享同一个`Kernel`,`OpKernel`不共享的例子可以参考[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 - + +注意,不同设备(CPU、GPU)共享一个Op定义,是否则共享同一个`OpKernel`,取决于`Compute`调用的函数是否支持不同设备。`MulOp`的CPU、GPU实现共享同一个`Kernel`,`OpKernel`不共享的例子可以参考[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 + 到此前向Op实现完成,需要在`.cc`文件中注册该op和kernel。反向Op类的定义和Kernel定义与前向Op类似,这里不再重复。但注意,反向Op没有`ProtoMaker`。 - + ### 4. 注册Operator 在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 -```c++ +```cpp namespace ops = paddle::operators; REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad); REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); REGISTER_OP_CPU_KERNEL(mul_grad, ops::MulGradKernel); ``` - + - `REGISTER_OP` : 注册`ops::MulOp`类,类型名为`mul`,该类的`ProtoMaker`为`ops::MulOpMaker`,注册`ops::MulOpGrad`,类型名为`mul_grad`, - `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op。 - `REGISTER_OP_CPU_KERNEL` :注册`ops::MulKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::MulKernel`类。 在 `.cu`文件中注册GPU Kernel。 - -```c++ + +```cpp namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); REGISTER_OP_GPU_KERNEL(mul_grad, @@ -199,56 +200,42 @@ REGISTER_OP_GPU_KERNEL(mul_grad, ### 5. 编译 -在[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)文件中添加编译。 - -``` -op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) -``` - -下面命令可以编译: - +无需修改[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动加入编译。 + +直接执行下面命令可进行编译: + ``` make mul_op ``` ## 绑定Python -- 绑定Python - - 在 [`paddle/pybind/pybind.cc +- 绑定Python + + 在 [`paddle/pybind/pybind.cc `](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc)文件中添加该类: ``` USE_OP(mul); ``` 如果只实现了CPU版本,则使用`USE_CPU_ONLY_OP`: - + ``` USE_CPU_ONLY_OP(gather); ``` - + 如果OP不带Kernel,则使用`USE_NO_KENREL_OP`: - + ``` USE_NO_KENREL_OP(recurrent); ``` - + 使用`USE_OP`告知编译器需要链接该Op的目标文件,具体解释参考[代码注释](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h#L81)。 - - + + - 生成库 - 在 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件添加类到`DEPS`中,使得该Op可以链接到生成的lib库中。 - - ``` - if(WITH_PYTHON) - cc_library(paddle_pybind SHARED - SRCS pybind.cc - DEPS pybind python backward - mul_op - minus_op) - endif(WITH_PYTHON) - ``` + 无需修改 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动被添加链接至生成的lib库中。 ## 实现单元测试 @@ -258,7 +245,7 @@ make mul_op 前向Op单测继承自`unittest.TestCase`,并定义元类`__metaclass__ = OpTestMeta`,具体单测流程在`OpTestMeta`里完成。需在`setUp`函数定义输入输出和属性参数,以及Python对比的输出值。 -``` +```python import unittest import numpy as np from gradient_checker import GradientChecker, create_op @@ -276,17 +263,17 @@ class TestMulOp(unittest.TestCase): self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} ``` 首先需要`import`必要的包,下面详细解释其他值: - + - `self.type = "mul" ` : 定义类型,和注册的类型一致。 - `self.inputs` : 定义输入,类型为Numpy.array,并初始化。 - `self.outputs` : 定义输出,并得到Python结算结果。 - + ### 反向Operator单测 反向Op单测继承自`GradientChecker`,而`GradientChecker`集成自`unittest.TestCase`,所以反向单测函数需要`test_`开头。 - ``` + ```python class MulGradOpTest(GradientChecker): def test_mul(self): op = create_op("mul") @@ -294,7 +281,7 @@ class TestMulOp(unittest.TestCase): 'X': np.random.random((32, 84)).astype("float32"), 'Y': np.random.random((84, 100)).astype("float32") } - self.compare_grad(op, inputs) + self.compare_grad(op, inputs) # mul op will enlarge the relative error self.check_grad( op, inputs, set(["X", "Y"]), "Out", max_relative_error=0.5) @@ -310,7 +297,7 @@ class TestMulOp(unittest.TestCase): - 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out` -### 编译和执行 +### 编译和执行 单测完成之后,在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)里添加编译: From b3ff125d5598da6e362f360220eef816cf0d7f42 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 5 Sep 2017 16:29:57 +0800 Subject: [PATCH 51/71] follow comments. --- doc/howto/dev/new_op_cn.md | 42 +++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index 6bd54137ba..85c222163c 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -28,16 +28,18 @@ 内容 | 定义位置 -------------- | :---------------------- -OpProtoMake定义 | `*_op.cc`文件,Backward Op不需要定义OpProtoMake -Op定义 | `*_op.cc`文件 -Kernel实现 | CPU、GPU共享Kernel在`*_op.h`文件,否则,CPU可以在`*_op.cc`文件,GPU可在`*_op.cu`文件。 -注册Op | Op注册在`*_op.cc`文件;Kernel注册CPU在`*_op.cc`文件,GPU在`*_op.cu`文件 +OpProtoMake定义 | `.cc`文件,Backward Op不需要定义OpProtoMake +Op定义 | `.cc`文件 +Kernel实现 | CPU、GPU共享Kernel在`.h`文件,否则,CPU可以在`.cc`文件,GPU可在`.cu`文件。 +注册Op | Op注册在`.cc`文件;Kernel注册CPU在`.cc`文件,GPU在`.cu`文件 + 实现新的op都添加至目录[paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)下,文件命名以`*_op.h`(如有) 、 `*_op.cc` 、`*_op.cu`(如有)结尾。 下面以矩阵乘操作,即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。 + ## 实现C++类 @@ -200,13 +202,18 @@ REGISTER_OP_GPU_KERNEL(mul_grad, ### 5. 编译 -无需修改[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动加入编译。 +- 简单**无特殊依赖**的OP无需修改CMakeList.txt文件。[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt) 会自动将 `paddle/operators` 目录下新增的 `*_op.cc` 文件加入编译。 +- 较为复杂、**有额外依赖** 的operator仍需要修改[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)。如,`mul_op` 依赖 `math_function`,需要在`CMakeLists.txt`中添加如下内容: -直接执行下面命令可进行编译: + ``` + op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) + + ``` -``` -make mul_op -``` +- 运行下面命令可以进行编译: + + ``` + make mul_op + ``` ## 绑定Python @@ -235,13 +242,13 @@ make mul_op - 生成库 - 无需修改 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动被添加链接至生成的lib库中。 + 无需修改 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会自动被添加链接到生成的lib库中。 ## 实现单元测试 单测包括对比前向Op不同设备(CPU、GPU)的实现、对比反向OP不同设备(CPU、GPU)的实现、反向Op的梯度测试。下面介绍介绍[`MulOp`的单测](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py)。 -### 前向Operator单测 +### 前向Operator单元测试 前向Op单测继承自`unittest.TestCase`,并定义元类`__metaclass__ = OpTestMeta`,具体单测流程在`OpTestMeta`里完成。需在`setUp`函数定义输入输出和属性参数,以及Python对比的输出值。 @@ -269,7 +276,7 @@ class TestMulOp(unittest.TestCase): - `self.outputs` : 定义输出,并得到Python结算结果。 -### 反向Operator单测 +### 反向Operator单元测试 反向Op单测继承自`GradientChecker`,而`GradientChecker`集成自`unittest.TestCase`,所以反向单测函数需要`test_`开头。 @@ -297,21 +304,22 @@ class TestMulOp(unittest.TestCase): - 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out` -### 编译和执行 +### 编译和执行单元测试 -单测完成之后,在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)里添加编译: +单测完成之后,在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)里添加以下内容将单测加入工程中: ``` py_test(test_mul_op SRCS test_mul_op.py) ``` -编译时需要打开`WITH_TESTING`, 即 `cmake paddle_dir -DWITH_TESTING=ON`,编译成功之后执行单测命令为: +请注意,**不同于Op的编译测试,运行单元测试测时需要编译整个工程**,并且编译时需要打开`WITH_TESTING`, 即`cmake paddle_dir -DWITH_TESTING=ON`。编译成功后,执行下面的命令来运行单测: -``` +```bash make test ARGS="-R test_mul_op -V" ``` + 或者: -``` +```bash ctest -R test_mul_op ``` From 020e45f715b1a1cea5dcbee10dacb055b3889523 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 5 Sep 2017 16:54:12 +0800 Subject: [PATCH 52/71] follow comments to revert add_two_op to add_op --- paddle/framework/CMakeLists.txt | 2 +- paddle/framework/grad_op_builder_test.cc | 4 ++-- paddle/operators/{add_two_op.cc => add_op.cc} | 7 +++---- paddle/operators/{add_two_op.cu => add_op.cu} | 5 ++--- paddle/operators/{add_two_op.h => add_op.h} | 0 paddle/pybind/pybind.cc | 2 +- python/paddle/v2/framework/tests/test_add_two_op.py | 2 +- python/paddle/v2/framework/tests/test_gradient_checker.py | 2 +- python/paddle/v2/framework/tests/test_net.py | 4 ++-- python/paddle/v2/framework/tests/test_operator.py | 4 ++-- python/paddle/v2/framework/tests/test_recurrent_op.py | 2 +- 11 files changed, 16 insertions(+), 18 deletions(-) rename paddle/operators/{add_two_op.cc => add_op.cc} (88%) rename paddle/operators/{add_two_op.cu => add_op.cu} (82%) rename paddle/operators/{add_two_op.h => add_op.h} (100%) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index e138517b6b..c0838d9b75 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -25,7 +25,7 @@ cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_library(grad_op_builder SRCS grad_op_builder.cc DEPS operator) cc_library(op_registry SRCS op_registry.cc DEPS grad_op_builder) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) -cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_two_op) +cc_test(grad_op_builder_test SRCS grad_op_builder_test.cc DEPS grad_op_builder op_registry add_op) py_proto_compile(framework_py_proto SRCS framework.proto) # Generate an empty __init__.py to make framework_py_proto as a valid python module. diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index 902c2655e9..9e3ca563c6 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -3,7 +3,7 @@ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" -USE_OP(add_two); +USE_OP(add); namespace paddle { namespace framework { @@ -41,7 +41,7 @@ namespace f = paddle::framework; TEST(GradOpBuilder, AddTwo) { std::shared_ptr add_op(f::OpRegistry::CreateOp( - "add_two", {{"X", {"x"}}, {"Y", {"y"}}}, {{"Out", {"out"}}}, {})); + "add", {{"X", {"x"}}, {"Y", {"y"}}}, {{"Out", {"out"}}}, {})); std::shared_ptr grad_add_op = f::OpRegistry::CreateGradOp(*add_op); EXPECT_EQ(grad_add_op->Inputs().size(), 4UL); diff --git a/paddle/operators/add_two_op.cc b/paddle/operators/add_op.cc similarity index 88% rename from paddle/operators/add_two_op.cc rename to paddle/operators/add_op.cc index bc99e306e0..8dbd47cf0d 100644 --- a/paddle/operators/add_two_op.cc +++ b/paddle/operators/add_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/add_two_op.h" +#include "paddle/operators/add_op.h" namespace paddle { namespace operators { @@ -57,7 +57,6 @@ class AddOpGrad : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(add_two, ops::AddOp, ops::AddOpMaker, add_two_grad, ops::AddOpGrad); +REGISTER_OP(add, ops::AddOp, ops::AddOpMaker, add_grad, ops::AddOpGrad); -REGISTER_OP_CPU_KERNEL(add_two, - ops::AddKernel); +REGISTER_OP_CPU_KERNEL(add, ops::AddKernel); diff --git a/paddle/operators/add_two_op.cu b/paddle/operators/add_op.cu similarity index 82% rename from paddle/operators/add_two_op.cu rename to paddle/operators/add_op.cu index acc03b2c8b..fdf2c66c7c 100644 --- a/paddle/operators/add_two_op.cu +++ b/paddle/operators/add_op.cu @@ -14,8 +14,7 @@ #define EIGEN_USE_GPU #include "paddle/framework/op_registry.h" -#include "paddle/operators/add_two_op.h" +#include "paddle/operators/add_op.h" namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(add_two, - ops::AddKernel); +REGISTER_OP_GPU_KERNEL(add, ops::AddKernel); diff --git a/paddle/operators/add_two_op.h b/paddle/operators/add_op.h similarity index 100% rename from paddle/operators/add_two_op.h rename to paddle/operators/add_op.h diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 6896422617..dba3d8f285 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -30,7 +30,7 @@ limitations under the License. */ namespace py = pybind11; -USE_OP(add_two); +USE_OP(add); USE_OP(onehot_cross_entropy); USE_OP(sgd); USE_OP(mul); diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_two_op.py index 0def484edd..a578e74eca 100644 --- a/python/paddle/v2/framework/tests/test_add_two_op.py +++ b/python/paddle/v2/framework/tests/test_add_two_op.py @@ -11,7 +11,7 @@ class TestAddOp(unittest.TestCase): __metaclass__ = OpTestMeta def setUp(self): - self.type = "add_two" + self.type = "add" self.inputs = { 'X': numpy.random.random((102, 105)).astype("float32"), 'Y': numpy.random.random((102, 105)).astype("float32") diff --git a/python/paddle/v2/framework/tests/test_gradient_checker.py b/python/paddle/v2/framework/tests/test_gradient_checker.py index e0b3151208..857427cdfb 100644 --- a/python/paddle/v2/framework/tests/test_gradient_checker.py +++ b/python/paddle/v2/framework/tests/test_gradient_checker.py @@ -7,7 +7,7 @@ from gradient_checker import get_numeric_gradient class GetNumericGradientTest(unittest.TestCase): def test_add_op(self): - add_op = Operator('add_two', X="X", Y="Y", Out="Z") + add_op = Operator('add', X="X", Y="Y", Out="Z") x = numpy.random.random((10, 1)).astype("float32") y = numpy.random.random((10, 1)).astype("float32") diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py index 9339cf28da..e4b7cd480c 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/framework/tests/test_net.py @@ -15,7 +15,7 @@ def fc(X, W, Y): class TestNet(unittest.TestCase): def test_net_all(self): net = core.Net.create() - op1 = Operator("add_two", X="X", Y="Y", Out="Out") + op1 = Operator("add", X="X", Y="Y", Out="Out") net.append_op(op1) net2 = core.Net.create() @@ -26,7 +26,7 @@ class TestNet(unittest.TestCase): expected = ''' Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}. - Op(add_two), inputs:{X[X], Y[Y]}, outputs:{Out[Out]}. + Op(add), inputs:{X[X], Y[Y]}, outputs:{Out[Out]}. Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. Op(plain_net), inputs:{all[W, X]}, outputs:{all[fc.out, pre_activation]}. Op(mul), inputs:{X[X], Y[W]}, outputs:{Out[pre_activation]}. diff --git a/python/paddle/v2/framework/tests/test_operator.py b/python/paddle/v2/framework/tests/test_operator.py index 1abc4eeb57..040556322d 100644 --- a/python/paddle/v2/framework/tests/test_operator.py +++ b/python/paddle/v2/framework/tests/test_operator.py @@ -193,10 +193,10 @@ class TestOpDescCreationMethod(unittest.TestCase): class TestOpCreations(unittest.TestCase): def test_all(self): - add_op = op.Operator("add_two", X="a", Y="b", Out="z") + add_op = op.Operator("add", X="a", Y="b", Out="z") self.assertIsNotNone(add_op) # Invoke C++ DebugString() - self.assertEqual('Op(add_two), inputs:{X[a], Y[b]}, outputs:{Out[z]}.', + self.assertEqual('Op(add), inputs:{X[a], Y[b]}, outputs:{Out[z]}.', str(add_op)) diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index d6000ab9f9..22e680fd78 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -146,7 +146,7 @@ class TestRecurrentOp(unittest.TestCase): stepnet = core.Net.create() x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx") h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") - sum_op = Operator("add_two", X="Wx", Y="Uh", Out="sum") + sum_op = Operator("add", X="Wx", Y="Uh", Out="sum") sig_op = Operator("sigmoid", X="sum", Y="h@alias") for op in [x_fc_op, h_fc_op, sum_op, sig_op]: From 2beaf3aa2a966980177ea0a0d309fd7a7a594a1c Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 5 Sep 2017 18:12:42 +0800 Subject: [PATCH 53/71] auto find .cc or .cu in operator/CMakeLists.txt --- paddle/operators/CMakeLists.txt | 54 +++++++++++++++------------------ 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 90185101c4..99b8e1967f 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -14,27 +14,31 @@ function(op_library TARGET) cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - foreach(src ${op_library_SRCS}) - if (${src} MATCHES ".*\\.cu$") - list(APPEND cu_srcs ${src}) - elseif(${src} MATCHES ".*\\.cc$") - list(APPEND cc_srcs ${src}) - else() - message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu") + list(LENGTH op_library_SRCS op_library_SRCS_len) + if (${op_library_SRCS_len} EQUAL 0) + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) + list(APPEND cc_srcs ${TARGET}.cc) endif() - endforeach() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) + list(APPEND cu_srcs ${TARGET}.cc) + endif() + else() + foreach(src ${op_library_SRCS}) + if (${src} MATCHES ".*\\.cu$") + list(APPEND cu_srcs ${src}) + elseif(${src} MATCHES ".*\\.cc$") + list(APPEND cc_srcs ${src}) + else() + message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu") + endif() + endforeach() + endif() list(LENGTH cc_srcs cc_srcs_len) if (${cc_srcs_len} EQUAL 0) message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file") endif() - list(LENGTH cu_srcs cu_srcs_len) - list(LENGTH op_library_DEPS dep_len) - if (${cu_srcs_len} EQUAL 0 AND ${dep_len} EQUAL 0) - message(WARNING "The op library ${TARGET} not support GPU!") - endif() - if (WITH_GPU) nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) @@ -46,32 +50,22 @@ endfunction() add_subdirectory(math) -set(ONLYCPU_OPS - net_op - gather_op - scatter_op) -foreach(src ${ONLYCPU_OPS}) - op_library(${src} SRCS ${src}.cc) -endforeach() - set(DEPS_OPS identity_op minus_op mul_op recurrent_op scale_op) -op_library(identity_op SRCS identity_op.cc DEPS scale_op) -op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) -op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) +op_library(identity_op DEPS scale_op) +op_library(minus_op DEPS scale_op) +op_library(mul_op DEPS math_function) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor operator net_op) -op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) +op_library(scale_op DEPS net_op) -list(REMOVE_ITEM GENERAL_OPS - ${ONLYCPU_OPS} - ${DEPS_OPS}) +list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) - op_library(${src} SRCS ${src}.cc ${src}.cu) + op_library(${src}) endforeach() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") From 42513496f30c137e42b54fab6ce275845349bccf Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Tue, 5 Sep 2017 22:17:24 +0800 Subject: [PATCH 54/71] fix a type error in operator/CMakeLists.txt --- paddle/operators/CMakeLists.txt | 2 +- paddle/operators/add_op.cu | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 99b8e1967f..8a0ff1eb53 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -20,7 +20,7 @@ function(op_library TARGET) list(APPEND cc_srcs ${TARGET}.cc) endif() if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) - list(APPEND cu_srcs ${TARGET}.cc) + list(APPEND cu_srcs ${TARGET}.cu) endif() else() foreach(src ${op_library_SRCS}) diff --git a/paddle/operators/add_op.cu b/paddle/operators/add_op.cu index fdf2c66c7c..d9c6d20a6c 100644 --- a/paddle/operators/add_op.cu +++ b/paddle/operators/add_op.cu @@ -12,8 +12,6 @@ See the License for the specific language governing permissions and limitations under the License. */ -#define EIGEN_USE_GPU -#include "paddle/framework/op_registry.h" #include "paddle/operators/add_op.h" namespace ops = paddle::operators; From a7704b69f410b972a2dafd832efb018a029bc10c Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 5 Sep 2017 09:46:57 -0700 Subject: [PATCH 55/71] move DataType outof LoDTensorDesc --- doc/design/var_desc.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md index 668f067984..86a95c10d5 100644 --- a/doc/design/var_desc.md +++ b/doc/design/var_desc.md @@ -21,27 +21,27 @@ A VarDesc should have a name and value, in PaddlePaddle, the value will always b ```proto message VarDesc { required string name = 1; - optional LoDTesnorDesc lod_tensor = 2; // + optional LoDTensorDesc lod_tensor = 2; } ``` ## Definition of LodTensorDesc ```proto +enum DataType { + BOOL = 0; + INT16 = 1; + INT32 = 2; + INT64 = 3; + FP16 = 4; + FP32 = 5; + FP64 = 6; +} + message LoDTensorDesc { - enum Type { - BOOL = 0; - INT16 = 1; - INT32 = 2; - INT64 = 3; - FP16 = 4; - FP32 = 5; - FP64 = 6; - } - - Type data_type = 1; - repeated int dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] - optional int lod_level [default=0] = 3; + required DataType data_type = 1; + repeated int32 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] + optional int32 lod_level = 3 [default=0]; } ``` From 9de6a4b375270f2e066147bfa92c84b84916ea56 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 5 Sep 2017 22:03:51 -0700 Subject: [PATCH 56/71] Change `Op::GetAttr` to `Op::Attr` Fix #3902 --- paddle/framework/op_registry_test.cc | 6 +++--- paddle/framework/operator.h | 6 +++--- paddle/operators/gaussian_random_op.cc | 8 ++++---- paddle/operators/gaussian_random_op.cu | 4 ++-- paddle/operators/rnn/recurrent_op_utils.cc | 8 ++++---- paddle/operators/scale_op.cc | 2 +- paddle/operators/scale_op.h | 2 +- paddle/operators/sgd_op.h | 2 +- paddle/operators/uniform_random_op.cc | 10 +++++----- paddle/operators/uniform_random_op.cu | 4 ++-- 10 files changed, 26 insertions(+), 26 deletions(-) diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index b43f6a8cc5..0e2fb27b65 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -80,7 +80,7 @@ TEST(OpRegistry, CreateOp) { paddle::framework::Scope scope; paddle::platform::CPUDeviceContext dev_ctx; op->Run(scope, dev_ctx); - float scale_get = op->GetAttr("scale"); + float scale_get = op->Attr("scale"); ASSERT_EQ(scale_get, scale); } @@ -121,7 +121,7 @@ TEST(OpRegistry, DefaultValue) { paddle::framework::Scope scope; paddle::platform::CPUDeviceContext dev_ctx; op->Run(scope, dev_ctx); - ASSERT_EQ(op->GetAttr("scale"), 1.0); + ASSERT_EQ(op->Attr("scale"), 1.0); } TEST(OpRegistry, CustomChecker) { @@ -172,6 +172,6 @@ TEST(OpRegistry, CustomChecker) { paddle::platform::CPUDeviceContext dev_ctx; paddle::framework::Scope scope; op->Run(scope, dev_ctx); - int test_attr = op->GetAttr("test_attr"); + int test_attr = op->Attr("test_attr"); ASSERT_EQ(test_attr, 4); } \ No newline at end of file diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index da92220b04..9a98d4d3be 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -69,7 +69,7 @@ class OperatorBase { virtual ~OperatorBase() {} template - inline const T& GetAttr(const std::string& name) const { + inline const T& Attr(const std::string& name) const { PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should be in AttributeMap", name); return boost::get(attrs_.at(name)); @@ -238,8 +238,8 @@ class InferShapeContext { const Scope& scope() const { return scope_; } template - inline const T& GetAttr(const std::string& name) const { - return op_.GetAttr(name); + inline const T& Attr(const std::string& name) const { + return op_.Attr(name); } size_t InputSize(const std::string& name) const { diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 056447901d..193b176c69 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -19,12 +19,12 @@ template class CPUGaussianRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - float mean = context.GetAttr("mean"); - float std = context.GetAttr("std"); + float mean = context.Attr("mean"); + float std = context.Attr("std"); auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); - unsigned int seed = static_cast(context.GetAttr("seed")); + unsigned int seed = static_cast(context.Attr("seed")); std::minstd_rand engine; if (seed == 0) { seed = std::random_device()(); @@ -45,7 +45,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext& context) const override { auto* tensor = context.Output("Out"); - auto dims = GetAttr>("dims"); + auto dims = Attr>("dims"); PADDLE_ENFORCE(dims.size() > 0UL, "dims can be one int or array. dims must be set."); tensor->Resize(framework::make_ddim(dims)); diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index 833a82bbf2..9788d89408 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -47,8 +47,8 @@ class GPUGaussianRandomKernel : public framework::OpKernel { std::random_device rd; seed = rd(); } - T mean = static_cast(context.GetAttr("mean")); - T std = static_cast(context.GetAttr("std")); + T mean = static_cast(context.Attr("mean")); + T std = static_cast(context.Attr("std")); thrust::counting_iterator index_sequence_begin(0); ssize_t N = framework::product(tensor->dims()); thrust::transform(index_sequence_begin, index_sequence_begin + N, diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index a9b65c30f2..ddc963faa5 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -109,7 +109,7 @@ void InitArgument(const ArgumentName& name, Argument* arg, arg->step_scopes = op.Output(name.step_scopes); auto inlinks = op.Inputs(name.inlinks); - auto inlink_alias = op.GetAttr>(name.inlink_alias); + auto inlink_alias = op.Attr>(name.inlink_alias); PADDLE_ENFORCE(inlinks.size() == inlink_alias.size(), "the size of inlinks and inlink_alias don't match:%d,%d", inlinks.size(), inlink_alias.size()); @@ -121,7 +121,7 @@ void InitArgument(const ArgumentName& name, Argument* arg, } auto outlinks = op.Outputs(name.outlinks); - auto outlink_alias = op.GetAttr>(name.outlink_alias); + auto outlink_alias = op.Attr>(name.outlink_alias); PADDLE_ENFORCE(outlinks.size() == outlink_alias.size(), "the size of outlinks and outlink_alias don't match:%d,%d", outlinks.size(), outlink_alias.size()); @@ -135,8 +135,8 @@ void InitArgument(const ArgumentName& name, Argument* arg, auto boot_memories = op.Inputs(name.boot_memories); // attributes - auto memories = op.GetAttr>(name.memories); - auto pre_memories = op.GetAttr>(name.pre_memories); + auto memories = op.Attr>(name.memories); + auto pre_memories = op.Attr>(name.pre_memories); PADDLE_ENFORCE(memories.size() == boot_memories.size(), "the size of memories, boot_memories don't match:%d,%d", diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc index 8e96a74c94..8d65e2754c 100644 --- a/paddle/operators/scale_op.cc +++ b/paddle/operators/scale_op.cc @@ -60,7 +60,7 @@ class ScaleGradOp : public NetOp { AppendOp(framework::OpRegistry::CreateOp( "scale", {{"X", {Input(framework::GradVarName("Out"))}}}, {{"Out", {Output(framework::GradVarName("X"))}}}, - {{"scale", GetAttr("scale")}})); + {{"scale", Attr("scale")}})); CompleteAddOp(false); } }; diff --git a/paddle/operators/scale_op.h b/paddle/operators/scale_op.h index 65fb77eefa..02fbdc52bb 100644 --- a/paddle/operators/scale_op.h +++ b/paddle/operators/scale_op.h @@ -27,7 +27,7 @@ class ScaleKernel : public framework::OpKernel { auto* in = context.Input("X"); tensor->mutable_data(in->place()); - auto scale = static_cast(context.GetAttr("scale")); + auto scale = static_cast(context.Attr("scale")); auto eigen_out = framework::EigenVector::Flatten(*tensor); auto eigen_in = framework::EigenVector::Flatten(*in); diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index 8422b622ee..f8888f9c36 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -31,7 +31,7 @@ class SGDOpKernel : public framework::OpKernel { auto param = ctx.Input("param"); auto grad = ctx.Input("grad"); auto param_out = ctx.Output("param_out"); - float lr = ctx.GetAttr("learning_rate"); + float lr = ctx.Attr("learning_rate"); param_out->mutable_data(ctx.GetPlace()); diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 2d943c4508..8dbc8b064c 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -26,15 +26,15 @@ class CPUUniformRandomKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); - unsigned int seed = static_cast(context.GetAttr("seed")); + unsigned int seed = static_cast(context.Attr("seed")); std::minstd_rand engine; if (seed == 0) { seed = std::random_device()(); } engine.seed(seed); std::uniform_real_distribution dist( - static_cast(context.GetAttr("min")), - static_cast(context.GetAttr("max"))); + static_cast(context.Attr("min")), + static_cast(context.Attr("max"))); ssize_t size = framework::product(tensor->dims()); for (ssize_t i = 0; i < size; ++i) { data[i] = dist(engine); @@ -48,10 +48,10 @@ class UniformRandomOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext& ctx) const override { - PADDLE_ENFORCE(GetAttr("min") < GetAttr("max"), + PADDLE_ENFORCE(Attr("min") < Attr("max"), "uniform_random's min must less then max"); auto* tensor = ctx.Output("Out"); - auto dims = GetAttr>("dims"); + auto dims = Attr>("dims"); tensor->Resize(framework::make_ddim(dims)); } }; diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index df993c0779..fc6f856fa3 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -50,8 +50,8 @@ class GPUUniformRandomKernel : public framework::OpKernel { std::random_device rd; seed = rd(); } - T min = static_cast(context.GetAttr("min")); - T max = static_cast(context.GetAttr("max")); + T min = static_cast(context.Attr("min")); + T max = static_cast(context.Attr("max")); thrust::counting_iterator index_sequence_begin(0); ssize_t N = framework::product(tensor->dims()); thrust::transform(index_sequence_begin, index_sequence_begin + N, From 1e419d9ed3610c95b518982a026666ea3f699f27 Mon Sep 17 00:00:00 2001 From: qijun Date: Wed, 6 Sep 2017 17:24:47 +0800 Subject: [PATCH 57/71] fix clang build and run error --- paddle/operators/cos_sim_op.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index 9e3ff26815..9e2bcebe3b 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -23,6 +23,9 @@ using Tensor = framework::Tensor; template using EigenMatrix = framework::EigenMatrix; +template +using EigenVector = framework::EigenVector; template class CosSimKernel : public framework::OpKernel { @@ -43,14 +46,14 @@ class CosSimKernel : public framework::OpKernel { auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); auto x = EigenMatrix::From(*input_x, new_dims); auto y = EigenMatrix::From(*input_y, new_dims); - auto z = EigenMatrix::From(*output_z); - auto x_norm = EigenMatrix::From(*output_x_norm); - auto y_norm = EigenMatrix::From(*output_y_norm); + auto z = EigenVector::Flatten(*output_z); + auto x_norm = EigenVector::Flatten(*output_x_norm); + auto y_norm = EigenVector::Flatten(*output_y_norm); auto place = context.GetEigenDevice(); - auto xy = (x * y).sum(Eigen::array({1})); - x_norm.device(place) = x.square().sum(Eigen::array({1})).sqrt(); - y_norm.device(place) = y.square().sum(Eigen::array({1})).sqrt(); + auto xy = (x * y).sum(Eigen::array({{1}})); + x_norm.device(place) = x.square().sum(Eigen::array({{1}})).sqrt(); + y_norm.device(place) = y.square().sum(Eigen::array({{1}})).sqrt(); z.device(place) = xy / x_norm / y_norm; } }; From a377b4197cf7e5f3b7f8edb271eb67039ede16eb Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 6 Sep 2017 17:41:03 +0800 Subject: [PATCH 58/71] Follow GLOG enforcing style. --- paddle/operators/squared_l2_distance_op.cc | 3 +-- paddle/operators/squared_l2_distance_op.h | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/paddle/operators/squared_l2_distance_op.cc b/paddle/operators/squared_l2_distance_op.cc index 694b00e493..dc30644a5e 100644 --- a/paddle/operators/squared_l2_distance_op.cc +++ b/paddle/operators/squared_l2_distance_op.cc @@ -40,7 +40,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { "inputs must be same."); int rank = framework::arity(x_dims); - PADDLE_ENFORCE(rank >= 2, "Tensor rank should be at least equal to 2."); + PADDLE_ENFORCE_GE(rank, 2, "Tensor rank should be at least equal to 2."); PADDLE_ENFORCE_EQ(framework::product(x_dims) / x_dims[0], framework::product(y_dims) / y_dims[0], "Product of dimensions expcet the first dimension of " @@ -87,7 +87,6 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { void InferShape(const framework::InferShapeContext& ctx) const override { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), "Gradient of Out should not be null"); - // check out grad dimensions auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); auto x_dims = ctx.Input("X")->dims(); auto y_dims = ctx.Input("Y")->dims(); diff --git a/paddle/operators/squared_l2_distance_op.h b/paddle/operators/squared_l2_distance_op.h index 1015513bdf..77c5a0a5c9 100644 --- a/paddle/operators/squared_l2_distance_op.h +++ b/paddle/operators/squared_l2_distance_op.h @@ -101,9 +101,9 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { auto y_grad = EigenMatrix::From(*y_g, framework::make_ddim({y_dims[0], cols})); - PADDLE_ENFORCE(sub_result.dimensions()[0] >= y_dims[0], - "First dimension of gradient must be greater or " - "equal than first dimension of target"); + PADDLE_ENFORCE_GE(sub_result.dimensions()[0], y_dims[0], + "First dimension of gradient must be greater or " + "equal than first dimension of target."); if (sub_result.dimensions()[0] == y_dims[0]) { y_grad.device(eigen_place) = -1 * grad_mat; From 4373a4a74d0d30c2980752b10e7bce336d434f74 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Wed, 6 Sep 2017 18:24:08 +0800 Subject: [PATCH 59/71] fix redeclaration of 'paddle::StatPtr __stat' --- paddle/gserver/layers/Conv3DLayer.cpp | 6 ++---- paddle/gserver/layers/DeConv3DLayer.cpp | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/gserver/layers/Conv3DLayer.cpp index 3887aa58b2..9deda2de98 100644 --- a/paddle/gserver/layers/Conv3DLayer.cpp +++ b/paddle/gserver/layers/Conv3DLayer.cpp @@ -83,8 +83,8 @@ void Conv3DLayer::forward(PassType passType) { int outWidth = getSize(); resetOutput(batchSize, outWidth); + REGISTER_TIMER_INFO("FwdConv3D", getName().c_str()); for (size_t i = 0; i != inputLayers_.size(); ++i) { - REGISTER_TIMER_INFO("FwdConv3D", getName().c_str()); const MatrixPtr &inMat = getInputValue(i); const MatrixPtr &outMat = getOutputValue(); int M = M_[i]; @@ -120,7 +120,6 @@ void Conv3DLayer::forward(PassType passType) { } } if (nullptr != this->biasParameter_) { - REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); this->addBias(); } forwardActivation(); @@ -134,15 +133,14 @@ void Conv3DLayer::backward(const UpdateCallback &callback) { biases_->getParameterPtr()->incUpdate(callback); } + REGISTER_TIMER_INFO("BwdConv3D", getName().c_str()); for (size_t i = 0; i != inputLayers_.size(); ++i) { - REGISTER_TIMER_INFO("BwdConv3D", getName().c_str()); if (weights_[i]->getWGrad()) { bpropWeights(i); } if (getInputGrad(i)) { bpropData(i); } - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); weights_[i]->getParameterPtr()->incUpdate(callback); } } diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp index 2838980a97..1b59ed60c5 100644 --- a/paddle/gserver/layers/DeConv3DLayer.cpp +++ b/paddle/gserver/layers/DeConv3DLayer.cpp @@ -84,8 +84,8 @@ void DeConv3DLayer::forward(PassType passType) { resetOutput(batchSize, outWidth); const MatrixPtr outMat = getOutputValue(); + REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str()); for (size_t i = 0; i != inputLayers_.size(); ++i) { - REGISTER_TIMER_INFO("FwdDeConv3D", getName().c_str()); const MatrixPtr &inMat = getInputValue(i); int M = M_[i]; int N = N_[i]; @@ -120,7 +120,6 @@ void DeConv3DLayer::forward(PassType passType) { } } if (nullptr != this->biasParameter_) { - REGISTER_TIMER_INFO("FwBiasTimer", getName().c_str()); this->addBias(); } forwardActivation(); @@ -133,12 +132,12 @@ void DeConv3DLayer::backward(const UpdateCallback &callback) { bpropBiases(); biases_->getParameterPtr()->incUpdate(callback); } + REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str()); for (size_t i = 0; i < inputLayers_.size(); ++i) { if (weights_[i]->getWGrad() || this->needGradient_) { int M = M_[i]; int N = N_[i]; int K = K_[i]; - REGISTER_TIMER_INFO("BwdDeConv3D", getName().c_str()); Matrix::resizeOrCreate(colBuf_, K * groups_[i], N, false, useGpu_); const MatrixPtr &inMat = getInputValue(i); for (int n = 0; n < batchSize; ++n) { @@ -182,7 +181,6 @@ void DeConv3DLayer::backward(const UpdateCallback &callback) { } } } - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); weights_[i]->getParameterPtr()->incUpdate(callback); } } From 57f9723d36f1740bc306a8e5022ac3cf01595c2f Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 6 Sep 2017 18:43:33 +0800 Subject: [PATCH 60/71] Using EigenVector to replace EigenMatrix for some variables. --- paddle/operators/squared_l2_distance_op.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/paddle/operators/squared_l2_distance_op.h b/paddle/operators/squared_l2_distance_op.h index 77c5a0a5c9..ad3347a0b3 100644 --- a/paddle/operators/squared_l2_distance_op.h +++ b/paddle/operators/squared_l2_distance_op.h @@ -20,6 +20,9 @@ namespace paddle { namespace operators { using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; template using EigenMatrix = framework::EigenMatrix; @@ -46,7 +49,7 @@ class SquaredL2DistanceKernel : public framework::OpKernel { out0->mutable_data(context.GetPlace()); out1->mutable_data(context.GetPlace()); auto sub_result = EigenMatrix::From(*out0); - auto z = EigenMatrix::From(*out1); + auto z = EigenVector::Flatten(*out1); auto place = context.GetEigenDevice(); auto x_dims = x.dimensions(); @@ -55,13 +58,12 @@ class SquaredL2DistanceKernel : public framework::OpKernel { if (y_dims[0] == 1 && x_dims[0] > y_dims[0]) { sub_result.device(place) = x - - y.broadcast(Eigen::array({static_cast(x_dims[0]), 1})); + y.broadcast(Eigen::array({{static_cast(x_dims[0]), 1}})); } else { sub_result.device(place) = x - y; } auto sub_res_pow2 = sub_result * sub_result; - // z is TensorMap, no need reshape - z.device(place) = sub_res_pow2.sum(Eigen::array({1})); + z.device(place) = sub_res_pow2.sum(Eigen::array({{1}})); } }; @@ -82,8 +84,9 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { int cols = framework::product(x_dims) / x_dims[0]; // calculate gradient - auto grad_mat = - 2 * (out_grad.broadcast(Eigen::array({1, cols}))) * sub_result; + auto grad_mat = 2 * + (out_grad.broadcast(Eigen::array({{1, cols}}))) * + sub_result; // propagate back to input auto eigen_place = context.GetEigenDevice(); @@ -98,18 +101,18 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { if (y_g) { y_g->mutable_data(context.GetPlace()); - auto y_grad = - EigenMatrix::From(*y_g, framework::make_ddim({y_dims[0], cols})); PADDLE_ENFORCE_GE(sub_result.dimensions()[0], y_dims[0], "First dimension of gradient must be greater or " "equal than first dimension of target."); if (sub_result.dimensions()[0] == y_dims[0]) { + auto y_grad = + EigenMatrix::From(*y_g, framework::make_ddim({y_dims[0], cols})); y_grad.device(eigen_place) = -1 * grad_mat; } else { - auto col_sum_res = -1 * (grad_mat.sum(Eigen::array({0}))); - // y_grad is TensorMap, no need reshape + auto col_sum_res = -1 * (grad_mat.sum(Eigen::array({{0}}))); + auto y_grad = EigenVector::Flatten(*y_g); y_grad.device(eigen_place) = col_sum_res; } } From db5e726d160789f05fb8323c6915991e5a276339 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 6 Sep 2017 18:14:27 +0800 Subject: [PATCH 61/71] update the how to write op doc. --- doc/howto/dev/new_op_cn.md | 183 +++++++++++++++++++++---------------- 1 file changed, 102 insertions(+), 81 deletions(-) diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index 0d29865447..58665e9f2b 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -45,7 +45,9 @@ Kernel实现 | CPU、GPU共享Kernel实现在`.h`文件中,否则,CPU ### 1. 定义ProtoMaker类 -矩阵乘的公式:$Out = X * Y$, 可见该计算由两个输入,一个输出组成。首先定义`ProtoMaker`来描述该Op的输入、输出及注释: +矩阵乘法的公式:$Out = X * Y$, 可见该计算由两个输入,一个输出组成。 + +首先定义`ProtoMaker`来描述该Op的输入、输出,并添加注释: ```cpp class MulOpMaker : public framework::OpProtoAndCheckerMaker { @@ -63,17 +65,17 @@ The equation is: Out = X * Y }; ``` -[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`,构造函数包括2个参数: +[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`,构造函数含有2个参数: - `framework::OpProto` : 前者存储Op的输入输出和参数属性,将用于Python API接口的生成。 - `framework::OpAttrChecker` :后者用于检查参数属性的合法性。 -构造函数里通过`AddInput`添加输入参数,通过`AddOutput`添加输出参数,通过`AddComment`添加该Op的注释,这些函数会将对应内容添加到`OpProto`中。 +构造函数里通过`AddInput`添加输入参数,通过`AddOutput`添加输出参数,通过`AddComment`添加Op的注释。这些函数会将对应内容添加到`OpProto`中。 -在`MulOp`中添加两个输入`X`和`Y`,添加了一个输出`Out`,并解释了各自含义,命名请遵守命名规范。 +上面的代码在`MulOp`中添加两个输入`X`和`Y`,添加了一个输出`Out`,并解释了各自含义,命名请遵守命名规范。 -再举个[`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)的例子: +再以[`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)为例: ```cpp template @@ -91,14 +93,16 @@ The equation is: Out = scale*X }; ``` - 这个例子有两处不同: +这个例子有两处不同: + +- `AddInput("X","...").NotInGradient()` : 表示`X`这个输入不参与`ScaleOp`对应的梯度Op计算之中,如果Op的某个输入不参与反向梯度的计算,请显示地调用`.NotInGradient()`进行设置。 - - `AddInput("X","...").NotInGradient()` : 表示`X`这个输入不参与`ScaleOp`对应的梯度Op计算之中。 - - `AddAttr("scale", "...").SetDefault(1.0);` : 增加`scale`系数,作为参数属性,并且设置默认值为1.0。 +- `AddAttr("scale", "...").SetDefault(1.0);` : 增加`scale`系数,作为参数属性,并且设置默认值为1.0。 ### 2. 定义Operator类 +下面的点实现了MulOp的定义: ```cpp class MulOp : public framework::OperatorWithKernel { @@ -143,14 +147,27 @@ MulOp(const std::string &type, const framework::VariableNameMap &inputs, - 1). 做检查, 尽早报错:检查输入数据维度、类型等是否合法。 - 2). 设置输出Tensor的形状。 -通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中,和要讲到的注册函数一起放在`.cc`中 +通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中,和下面将要介绍的注册函数一起放在`.cc`中 ### 3. 定义OpKernel类 -```cpp -template -class MulKernel : public framework::OpKernel { - public: +`MulKernel`继承自`framework::OpKernel`,带有下面两个模板参数: + +- `typename Place`: 表示设备类型,不同设备(CPU、GPU)共享同一个Kernel时,需加该模板参数,不共享则不加,一个不共享的例子是[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 + +- `typename T` : 表示数据类型,如`float`, `double`等。 + +需要为`MulKernel`类重写`Compute`接口。 +- `Compute`接受一个输入参数:`const framework::ExecutionContext& context`。 +- 与`InferShapeContext`相比,`ExecutionContext`增加了设备类型,同样可获取到输入输出和属性参数。 +- `Compute`函数里实现`OpKernel`的具体计算逻辑。 + +下面是 `MulKernel` `Compute`的实现: + + ```cpp + template + class MulKernel : public framework::OpKernel { + public: void Compute(const framework::ExecutionContext& context) const override { auto* X = context.Input("X"); auto* Y = context.Input("Y"); @@ -160,50 +177,50 @@ class MulKernel : public framework::OpKernel { const_cast(context.device_context_); math::matmul(*X, false, *Y, false, 1, Z, 0, device_context); } -}; -``` + }; + ``` -`MulKernel`继承自`framework::OpKernel`,带有模板参数: +需要注意:**不同设备(CPU、GPU)共享一个Op定义,是否则共享同一个`OpKernel`,取决于`Compute`调用的函数是否支持不同设备。** - - `typename Place`: 表示设备类型,不同设备(CPU、GPU)共享同一个Kernel时,需加该模板参数,不共享则不加,一个不共享的例子是[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 +`MulOp`的CPU、GPU实现共享同一个`Kernel`。`OpKernel`不共享的例子可以参考:[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 - - `typename T` : 表示数据类型,如`float`, `double`等。 +为了使`OpKernel`的计算过程书写更加简单,并且CPU、GPU的代码可以复用,我们通常借助 Eigen unsupported Tensor模块来实现`Compute`接口。关于在PaddlePaddle中如何使用Eigen库,请参考[使用文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/use_eigen_cn.md)。 -`MulKernel`需要重写`Compute`接口,该接口参数为`const framework::ExecutionContext& context`, `ExecutionContext`相比`InferShapeContext`增加了设备类型,同样可获取到输入输出和属性参数,`Compute`函数里写具体实现时。 -注意,不同设备(CPU、GPU)共享一个Op定义,是否则共享同一个`OpKernel`,取决于`Compute`调用的函数是否支持不同设备。`MulOp`的CPU、GPU实现共享同一个`Kernel`,`OpKernel`不共享的例子可以参考[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 +到此,前向Op实现完成。接下来,需要在`.cc`文件中注册该op和kernel。 +反向Op类的定义,反向OpKernel的定义与前向Op类似,这里不再赘述。**但需注意反向Op没有`ProtoMaker`**。 -为了使得`OpKernel`的计算过程书写较为简单,CPU、GPU的代码可以复用,我们通常借助Eigen unsupported Tensor模块来实现。关于在paddle中如何使用Eigen库,请参考对应的使用[文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/use_eigen_cn.md) +### 4. 注册Operator -到此前向Op实现完成,需要在`.cc`文件中注册该op和kernel。反向Op类的定义和Kernel定义与前向Op类似,这里不再重复。但注意,反向Op没有`ProtoMaker`。 +- 在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 -### 4. 注册Operator + ```cpp + namespace ops = paddle::operators; + REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad); + REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); + REGISTER_OP_CPU_KERNEL(mul_grad, + ops::MulGradKernel); + ``` -在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 + 在上面的代码中: -```cpp -namespace ops = paddle::operators; -REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad); -REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); -REGISTER_OP_CPU_KERNEL(mul_grad, - ops::MulGradKernel); -``` + - `REGISTER_OP` : 注册`ops::MulOp`类,类型名为`mul`,该类的`ProtoMaker`为`ops::MulOpMaker`,注册`ops::MulOpGrad`,类型名为`mul_grad`。 + - `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op。 + - `REGISTER_OP_CPU_KERNEL` :注册`ops::MulKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::MulKernel`类。 - - `REGISTER_OP` : 注册`ops::MulOp`类,类型名为`mul`,该类的`ProtoMaker`为`ops::MulOpMaker`,注册`ops::MulOpGrad`,类型名为`mul_grad`, - - `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op。 - - `REGISTER_OP_CPU_KERNEL` :注册`ops::MulKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::MulKernel`类。 -在 `.cu`文件中注册GPU Kernel。请注意,如果GPU Kernel的实现是基于Eigen unsupported模块,那么在 `.cu`的最前面请加上宏定义 `#define EIGEN_USE_GPU` +- 在 `.cu`文件中注册GPU Kernel。 + - 请注意,如果GPU Kernel的实现基于Eigen unsupported模块,那么在 `.cu`的开始请加上宏定义 `#define EIGEN_USE_GPU`,代码示例如下: -```cpp -// if use Eigen unsupported module before include head files -#define EIGEN_USE_GPU + ```cpp + // if use Eigen unsupported module before include head files + #define EIGEN_USE_GPU -namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); -REGISTER_OP_GPU_KERNEL(mul_grad, - ops::MulGradKernel); -``` + namespace ops = paddle::operators; + REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); + REGISTER_OP_GPU_KERNEL(mul_grad, + ops::MulGradKernel); + ``` ### 5. 编译 @@ -225,7 +242,7 @@ REGISTER_OP_GPU_KERNEL(mul_grad, - 绑定Python 在 [`paddle/pybind/pybind.cc -`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc)文件中添加该类: +`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc) 使用`USE_OP`告知编译器需要链接的Op,具体解释参考[代码注释](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h#L81)。 ``` USE_OP(mul); @@ -242,50 +259,54 @@ REGISTER_OP_GPU_KERNEL(mul_grad, USE_NO_KENREL_OP(recurrent); ``` - 使用`USE_OP`告知编译器需要链接该Op的目标文件,具体解释参考[代码注释](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h#L81)。 - - 生成库 - 无需修改 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会自动被添加链接到生成的lib库中。 + 无需修改 [`paddle/pybind/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/CMakeLists.txt)文件,`paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动添加链接到生成的lib库中。 ## 实现单元测试 -单测包括对比前向Op不同设备(CPU、GPU)的实现、对比反向OP不同设备(CPU、GPU)的实现、反向Op的梯度测试。下面介绍介绍[`MulOp`的单测](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py)。 +单测包括对比前向Op不同设备(CPU、GPU)的实现、对比反向OP不同设备(CPU、GPU)的实现、反向Op的梯度测试。下面介绍介绍[`MulOp`的单元测试](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py)。 ### 前向Operator单元测试 -前向Op单测继承自`unittest.TestCase`,并定义元类`__metaclass__ = OpTestMeta`,具体单测流程在`OpTestMeta`里完成。需在`setUp`函数定义输入输出和属性参数,以及Python对比的输出值。 +前向Op单元测试继承自`unittest.TestCase`,并定义元类`__metaclass__ = OpTestMeta`。各项更加具体的单元测试在`OpTestMeta`里完成。测试前向Operator,需要: -```python -import unittest -import numpy as np -from gradient_checker import GradientChecker, create_op -from op_test_util import OpTestMeta +1. 在`setUp`函数定义输入、输出,以及相关的属性参数。 +2. 生成随机的输入数据。 +3. 在Python脚本中实现与前向operator相同的计算逻辑,得到输出值,与operator前向计算的输出进行对比。 -class TestMulOp(unittest.TestCase): - __metaclass__ = OpTestMeta - def setUp(self): - self.type = "mul" - self.inputs = { - 'X': np.random.random((32, 84)).astype("float32"), - 'Y': np.random.random((84, 100)).astype("float32") - } - self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} -``` - 首先需要`import`必要的包,下面详细解释其他值: + ```python + import unittest + import numpy as np + from gradient_checker import GradientChecker, create_op + from op_test_util import OpTestMeta + + class TestMulOp(unittest.TestCase): + __metaclass__ = OpTestMeta - - `self.type = "mul" ` : 定义类型,和注册的类型一致。 - - `self.inputs` : 定义输入,类型为Numpy.array,并初始化。 - - `self.outputs` : 定义输出,并得到Python结算结果。 + def setUp(self): + self.type = "mul" + self.inputs = { + 'X': np.random.random((32, 84)).astype("float32"), + 'Y': np.random.random((84, 100)).astype("float32") + } + self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} + ``` + +上面的代码首先导入依赖的包,下面是对`setUp`函数中操作的重要变量的详细解释: + +- `self.type = "mul" ` : 定义类型,与operator注册时注册的类型一致。 +- `self.inputs` : 定义输入,类型为`numpy.array`,并初始化。 +- `self.outputs` : 定义输出,并在Python脚本中完成与operator同样的计算逻辑,返回Python端的计算结果。 ### 反向Operator单元测试 -反向Op单测继承自`GradientChecker`,而`GradientChecker`集成自`unittest.TestCase`,所以反向单测函数需要`test_`开头。 +反向Op单元测试继承自`GradientChecker`,而`GradientChecker`继承自`unittest.TestCase`,因此,**反向单元测试函数需要以`test_`开头**。 -```cpp +```python class TestMulGradOp(GradientChecker): def setUp(self): self.op = create_op("mul") @@ -319,27 +340,27 @@ class TestMulGradOp(GradientChecker): no_grad_set={"Y"}) ``` -下面解释一些关键的地方: +下面解释代码中一些关键的地方: - - 调用`create_op("mul")`创建反向Op对应的前向Op。 - - 调用`compare_grad`函数对比CPU、GPU计算结果。 - - `test_normal`中调用`check_grad`检查梯度稳定性,这里采用数值法检测梯度正确性。 - - 第一个参数`self.op` : 前向Op。 - - 第二个参数`self.inputs` : 输入词典,词典的Key和`ProtoMaker`定义保持一致。 - - 第三个参数`["X", "Y"]` : 指定对输入变量`X`、`Y`做梯度检测。 - - 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out` - - `test_ignore_x`和`test_ignore_y`分支测试只需要计算一个输入梯度的情况。 +- 调用`create_op("mul")`创建反向Op对应的前向Op。 +- 调用`compare_grad`函数对比CPU、GPU计算结果。 +- `test_normal`中调用`check_grad`使用数值法检测梯度正确性和稳定性。 + - 第一个参数`self.op` : 前向Op。 + - 第二个参数`self.inputs` : 输入词典,词典的Key和`ProtoMaker`定义保持一致。 + - 第三个参数`["X", "Y"]` : 指定对输入变量`X`、`Y`做梯度检测。 + - 第四个参数`"Out"` : 指定前向网络最终的输出目标变量`Out` +- `test_ignore_x`和`test_ignore_y`分支用来测试只需要计算一个输入梯度的情况。 ### 编译和执行单元测试 -单测完成之后,在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)里添加以下内容将单测加入工程中: +单元测试编写完成之后,在[`python/paddle/v2/framework/tests/CMakeLists.txt`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/CMakeLists.txt)中添加以下内容,将单元测试加入工程: ``` py_test(test_mul_op SRCS test_mul_op.py) ``` -请注意,**不同于Op的编译测试,运行单元测试测时需要编译整个工程**,并且编译时需要打开`WITH_TESTING`, 即`cmake paddle_dir -DWITH_TESTING=ON`。编译成功后,执行下面的命令来运行单测: +请注意,**不同于Op的编译测试,运行单元测试测时需要编译整个工程**,并且编译时需要打开`WITH_TESTING`, 即`cmake paddle_dir -DWITH_TESTING=ON`。编译成功后,执行下面的命令来运行单元测试: ```bash make test ARGS="-R test_mul_op -V" From ed346f1dcd831fa76f69e680264b727b1a9df148 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 6 Sep 2017 11:24:44 -0700 Subject: [PATCH 62/71] Pass CI --- paddle/operators/gaussian_random_op.cu | 2 +- paddle/operators/uniform_random_op.cu | 2 +- paddle/platform/cudnn_helper.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index 9788d89408..d9dbc1dcfe 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -42,7 +42,7 @@ class GPUGaussianRandomKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); - unsigned int seed = static_cast(context.GetAttr("seed")); + unsigned int seed = static_cast(context.Attr("seed")); if (seed == 0) { std::random_device rd; seed = rd(); diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index fc6f856fa3..c2c041b144 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -45,7 +45,7 @@ class GPUUniformRandomKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); - unsigned int seed = static_cast(context.GetAttr("seed")); + unsigned int seed = static_cast(context.Attr("seed")); if (seed == 0) { std::random_device rd; seed = rd(); diff --git a/paddle/platform/cudnn_helper.h b/paddle/platform/cudnn_helper.h index 24ddf3441c..2841d2a2db 100644 --- a/paddle/platform/cudnn_helper.h +++ b/paddle/platform/cudnn_helper.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/enforce.h" #include "paddle/platform/macros.h" From d578b25db8ab9afe7720ca3a2863de91bf069a5a Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 6 Sep 2017 12:09:17 -0700 Subject: [PATCH 63/71] add var desc proto --- paddle/framework/framework.proto | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/paddle/framework/framework.proto b/paddle/framework/framework.proto index 368136a972..dfcb5fb621 100644 --- a/paddle/framework/framework.proto +++ b/paddle/framework/framework.proto @@ -87,3 +87,24 @@ message OpProto { repeated Attr attrs = 4; required string comment = 5; } + +enum DataType { + BOOL = 0; + INT16 = 1; + INT32 = 2; + INT64 = 3; + FP16 = 4; + FP32 = 5; + FP64 = 6; +} + +message LoDTensorDesc { + required DataType data_type = 1; + repeated int32 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] + optional int32 lod_level = 3 [ default = 0 ]; +} + +message VarDesc { + required string name = 1; + optional LoDTensorDesc lod_tensor = 2; +} From 2282624223af05c686cfce472abee1c5447ff6ae Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Wed, 6 Sep 2017 18:16:36 -0400 Subject: [PATCH 64/71] add offset implementation to lod tensor design (#3934) * add offset implementation to lod tensor design --- paddle/framework/lod_tensor.md | 40 +++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/paddle/framework/lod_tensor.md b/paddle/framework/lod_tensor.md index 8dfe3ee823..769b61f175 100644 --- a/paddle/framework/lod_tensor.md +++ b/paddle/framework/lod_tensor.md @@ -94,7 +94,7 @@ Let's go on slicing this slice. Its <1,1>-slice is ||| ``` -### The General Slicing Algorithm +### The Slicing Algorithm The algorithm, with over-simplified data structure, is defined as @@ -106,17 +106,41 @@ struct LoDTensor { float* tensor_; }; -LoDTensor Slice(const LoDTensor& lodt, int level, int sequence) { +LoDTensor Slice(const LoDTensor& lodt, int level, int sequence); +``` + +Let us revisit the example above -} +``` + 3 +3 1 2 +3 2 4 1 2 3 +||| || |||| | || ||| ``` -### Slicing the Top Level +Suppose that we want to retrieve the <1,2>-slice -Please be aware that an RNN operator only slices the top level of a LoD Tensor to get the step inputs. +``` +2 +2 3 +|| ||| +``` -```c++ -LoDTensor Slice(const LoDTensor& lodt, int sequence) { +we will need to find out the starting position of this slice by summing over all leaf nodes in `LoD` to the left of the slice, i.e., 3 + 2 + 4 + 1 = 10. + +To avoid the traversal of the LoD tree at slcing time, we can do it at the construction time -- instead of saving the lengths of the next level in the LoD tree, we can save the starting offset of the next level. For example, above LoD Tensor can be transformed into + +``` + 0 +0 9 10 +0 3 5 9 10 12 +||| || |||| | || ||| +``` + +We don't really need the 0 on top, so the LoD Tensor could be -} +``` +0 9 10 +0 3 5 9 10 12 +||| || |||| | || ||| ``` From eb3c774b8308e030407a113e8206f200899c7492 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Thu, 7 Sep 2017 07:12:18 +0800 Subject: [PATCH 65/71] Fix format error --- paddle/function/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 4076e20de2..4fd72d64a9 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -42,9 +42,9 @@ if(WITH_GPU) add_simple_unittest(MulOpTest) add_simple_unittest(CosSimOpTest) add_simple_unittest(RowConvOpTest) - add_simple_unittest(SwitchOpTest) add_simple_unittest(BlockExpandOpTest) add_simple_unittest(CropOpTest) + add_simple_unittest(SwitchOpTest) endif() add_simple_unittest(Im2ColTest) From 1f0341e19edcaf5a86908b8707901b17052797a1 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 6 Sep 2017 16:25:15 -0700 Subject: [PATCH 66/71] Rename `LargerThan` to `GreaterThan` --- doc/design/simple_op_design.md | 2 +- paddle/framework/attribute.h | 8 ++++---- paddle/framework/op_registry_test.cc | 2 +- paddle/framework/operator_test.cc | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/design/simple_op_design.md b/doc/design/simple_op_design.md index 5e07c29c56..fded4a6861 100644 --- a/doc/design/simple_op_design.md +++ b/doc/design/simple_op_design.md @@ -147,7 +147,7 @@ class CosineOp { struct CosineOpProtoMaker : public OpProtoMaker { CosineOpProtoMaker(OpProto* proto) : OpProtoMaker(proto) { AddInput("input", "input of cosine op"); - AddAttr("scale", "scale of cosine op", float).Default(1.0).LargerThan(0.0); + AddAttr("scale", "scale of cosine op", float).Default(1.0).GreaterThan(0.0); AddType("cos"); AddComment("This is cos op"); } diff --git a/paddle/framework/attribute.h b/paddle/framework/attribute.h index 071879a9d4..cde3dfa1d3 100644 --- a/paddle/framework/attribute.h +++ b/paddle/framework/attribute.h @@ -41,9 +41,9 @@ Attribute GetAttrValue(const OpDesc::Attr& attr_desc); // check whether a value(attribute) fit a certain limit template -class LargerThanChecker { +class GreaterThanChecker { public: - explicit LargerThanChecker(T lower_bound) : lower_bound_(lower_bound) {} + explicit GreaterThanChecker(T lower_bound) : lower_bound_(lower_bound) {} void operator()(T& value) const { PADDLE_ENFORCE(value > lower_bound_, "larger_than check fail"); } @@ -110,8 +110,8 @@ class TypedAttrChecker { return *this; } - TypedAttrChecker& LargerThan(const T& lower_bound) { - value_checkers_.push_back(LargerThanChecker(lower_bound)); + TypedAttrChecker& GreaterThan(const T& lower_bound) { + value_checkers_.push_back(GreaterThanChecker(lower_bound)); return *this; } diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index 0e2fb27b65..e00c6e8d90 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -21,7 +21,7 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { AddOutput("output", "output of cosine op"); AddAttr("scale", "scale of cosine op") .SetDefault(1.0) - .LargerThan(0.0); + .GreaterThan(0.0); AddComment("This is cos op"); } }; diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 8a1970c7a8..20bbb11896 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -102,7 +102,7 @@ class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { AddOutput("y", "output of test op"); AddAttr("scale", "scale of cosine op") .SetDefault(1.0) - .LargerThan(0.0); + .GreaterThan(0.0); AddComment("This is test op"); } }; @@ -140,7 +140,7 @@ class OpKernelTestMultiInputsProtoAndCheckerMaker AddOutput("ys", "outputs of test op").AsDuplicable(); AddAttr("scale", "scale of cosine op") .SetDefault(1.0) - .LargerThan(0.0); + .GreaterThan(0.0); AddComment("This is test op"); } }; From c0523f24b0a9b9b70a728eb76fa72c4d0f56f194 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Wed, 6 Sep 2017 19:34:40 -0400 Subject: [PATCH 67/71] rename LOD to LoD for short of "Level of Details" (#3936) --- paddle/framework/lod_tensor.cc | 16 ++++++++-------- paddle/framework/lod_tensor.h | 26 +++++++++++++------------- paddle/framework/lod_tensor_test.cc | 20 ++++++++++---------- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 71eac4a10b..908a1f2fd0 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -19,8 +19,8 @@ namespace paddle { namespace framework { -LOD SliceLevels(const LOD& in, size_t level_begin, size_t level_end) { - LOD new_lod; +LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) { + LoD new_lod; new_lod.reserve(level_end - level_begin); for (size_t i = level_begin; i < level_end; i++) { new_lod.emplace_back(in.at(i)); @@ -28,10 +28,10 @@ LOD SliceLevels(const LOD& in, size_t level_begin, size_t level_end) { return new_lod; } -LOD SliceInLevel(const LOD& in, size_t level, size_t elem_begin, +LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, size_t elem_end) { // slice the lod. - LOD new_lod; + LoD new_lod; new_lod.reserve(in.size() - level); auto start = in.at(level)[elem_begin]; auto end = in.at(level)[elem_end]; @@ -46,13 +46,13 @@ LOD SliceInLevel(const LOD& in, size_t level, size_t elem_begin, std::transform(new_lod.back().begin(), new_lod.back().end(), new_lod.back().begin(), [start](int v) { return v - start; }); - PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LOD"); + PADDLE_ENFORCE_EQ(new_lod.back().front(), 0, "error in slice LoD"); } PADDLE_ENFORCE_LE(new_lod.size(), in.size()); return new_lod; } -bool operator==(const LOD& a, const LOD& b) { +bool operator==(const LoD& a, const LoD& b) { if (a.size() != b.size()) { return false; } @@ -72,12 +72,12 @@ bool operator==(const LOD& a, const LOD& b) { return true; } -void LODTensor::SliceLevels(size_t level_begin, size_t level_end) { +void LoDTensor::SliceLevels(size_t level_begin, size_t level_end) { auto new_lod = framework::SliceLevels(lod_, level_begin, level_end); lod_ = new_lod; } -void LODTensor::SliceInLevel(size_t level, size_t elem_begin, size_t elem_end) { +void LoDTensor::SliceInLevel(size_t level, size_t elem_begin, size_t elem_end) { PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, NumLevels()); PADDLE_ENFORCE(elem_begin < NumElements(level), diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 9e6b6b4aca..154068fef6 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -35,34 +35,34 @@ template using Vector = thrust::host_vector; #endif -using LOD = std::vector>; +using LoD = std::vector>; -LOD SliceLevels(const LOD& in, size_t level_begin, size_t level_end); +LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end); -LOD SliceInLevel(const LOD& in, size_t level, size_t elem_begin, +LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, size_t elem_end); -bool operator==(const LOD& a, const LOD& b); +bool operator==(const LoD& a, const LoD& b); /* - * LODTensor (Level of details Tensor) + * LoDTensor (Level of details Tensor) * see https://en.wikipedia.org/wiki/Level_of_details for reference. */ -class LODTensor { +class LoDTensor { public: - LODTensor() {} - LODTensor(const LOD& lod, Tensor* t) : lod_(lod), tensor_(t) {} + LoDTensor() {} + LoDTensor(const LoD& lod, Tensor* t) : lod_(lod), tensor_(t) {} - void set_lod(const LOD& lod) { lod_ = lod; } + void set_lod(const LoD& lod) { lod_ = lod; } void set_tensor(Tensor* tensor) { tensor_ = tensor; } Tensor& tensor() { return *tensor_; } - LOD lod() { return lod_; } + LoD lod() { return lod_; } /* - * Get a element from LOD. + * Get a element from LoD. */ size_t lod_element(size_t level, size_t elem) const { PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level, @@ -74,7 +74,7 @@ class LODTensor { } /* - * Number of LODTensor's levels, each level has units of data, for example, + * Number of LoDTensor's levels, each level has units of data, for example, * in the sentence's view, article, paragraph, sentence are 3 levels. */ size_t NumLevels() const { return lod_.size(); } @@ -100,7 +100,7 @@ class LODTensor { void SliceInLevel(size_t level, size_t elem_begin, size_t elem_end); private: - LOD lod_; + LoD lod_; Tensor* tensor_; // not owned }; } // namespace framework diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 9a351605ed..1da8553134 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -21,7 +21,7 @@ namespace paddle { namespace framework { -class LODTensorTester : public ::testing::Test { +class LoDTensorTester : public ::testing::Test { public: virtual void SetUp() override { // tensor's batch_size: 30 @@ -29,7 +29,7 @@ class LODTensorTester : public ::testing::Test { // 0 10 20 // 0 5 10 15 20 // 0 2 5 7 10 12 15 20 - LOD lod; + LoD lod; lod.push_back(std::vector{0, 10, 20}); lod.push_back(std::vector{0, 5, 10, 15, 20}); lod.push_back(std::vector{0, 2, 5, 7, 10, 12, 15, 17, 20}); @@ -47,21 +47,21 @@ class LODTensorTester : public ::testing::Test { protected: platform::CPUPlace place; Tensor tensor; - LODTensor lod_tensor; + LoDTensor lod_tensor; }; -TEST_F(LODTensorTester, NumLevels) { ASSERT_EQ(lod_tensor.NumLevels(), 3UL); } +TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor.NumLevels(), 3UL); } -TEST_F(LODTensorTester, NumElements) { +TEST_F(LoDTensorTester, NumElements) { ASSERT_EQ(lod_tensor.NumElements(0), 2UL); ASSERT_EQ(lod_tensor.NumElements(1), 4UL); ASSERT_EQ(lod_tensor.NumElements(2), 8UL); } -TEST_F(LODTensorTester, SliceLevels) { +TEST_F(LoDTensorTester, SliceLevels) { // slice 1 level for (size_t level = 0; level < 3UL; ++level) { - LODTensor new_lod_tensor = lod_tensor; + LoDTensor new_lod_tensor = lod_tensor; new_lod_tensor.SliceLevels(level, level + 1); ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor.NumElements(level)); @@ -70,7 +70,7 @@ TEST_F(LODTensorTester, SliceLevels) { } // slice 2 level for (size_t level = 0; level < 2UL; ++level) { - LODTensor new_lod_tensor = lod_tensor; + LoDTensor new_lod_tensor = lod_tensor; new_lod_tensor.SliceLevels(level, level + 2); ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor.NumElements(level)); @@ -80,9 +80,9 @@ TEST_F(LODTensorTester, SliceLevels) { } } -TEST_F(LODTensorTester, SliceInLevel) { +TEST_F(LoDTensorTester, SliceInLevel) { size_t level = 0; - LODTensor new_lod_tensor = lod_tensor; + LoDTensor new_lod_tensor = lod_tensor; new_lod_tensor.SliceInLevel(level, 0, 2); EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL); EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL); From e033569dd649c08a986a4d97608692f354003c78 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Thu, 7 Sep 2017 11:22:21 +0800 Subject: [PATCH 68/71] Fix format --- paddle/function/SwitchOpGpu.cu | 56 +++++++++++++++++++--------------- proto/ModelConfig.proto | 11 +++---- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/paddle/function/SwitchOpGpu.cu b/paddle/function/SwitchOpGpu.cu index 0b9401dea1..45390a56c3 100644 --- a/paddle/function/SwitchOpGpu.cu +++ b/paddle/function/SwitchOpGpu.cu @@ -12,14 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "hl_base.h" #include "SwitchOp.h" +#include "hl_base.h" namespace paddle { -__global__ void KeNCHW2NHWC(real* outputs, const real* inputs, - int inC, int inH, int inW, - int nthreads, int argType) { +__global__ void KeNCHW2NHWC(real* outputs, + const real* inputs, + int inC, + int inH, + int inW, + int nthreads, + int argType) { const int idx = threadIdx.x + blockIdx.x * blockDim.x; if (idx < nthreads) { const int w = idx % inW; @@ -27,7 +31,7 @@ __global__ void KeNCHW2NHWC(real* outputs, const real* inputs, const int c = (idx / inW / inH) % inC; const int n = idx / inW / inH / inC; - const int off = ((n * inH + h) * inW + w) * inC +c; + const int off = ((n * inH + h) * inW + w) * inC + c; if (argType == ADD_TO) { outputs[off] += inputs[idx]; } else { @@ -38,23 +42,27 @@ __global__ void KeNCHW2NHWC(real* outputs, const real* inputs, template <> void NCHW2NHWC(real* outputs, - const real* inputs, - const int num, - const int inC, - const int inH, - const int inW, - const int argType) { + const real* inputs, + const int num, + const int inC, + const int inH, + const int inW, + const int argType) { size_t nth = num * inC * inH * inW; int blockSize = 1024; int gridSize = (nth + 1024 - 1) / 1024; - KeNCHW2NHWC<<>> - (outputs, inputs, inC, inH, inW, nth, argType); + KeNCHW2NHWC<<>>( + outputs, inputs, inC, inH, inW, nth, argType); CHECK_SYNC("NCHW2NHWC"); } -__global__ void KeNHWC2NCHW(real* outputs, const real* inputs, - int inH, int inW, int inC, - int nthreads, int argType) { +__global__ void KeNHWC2NCHW(real* outputs, + const real* inputs, + int inH, + int inW, + int inC, + int nthreads, + int argType) { const int idx = threadIdx.x + blockIdx.x * blockDim.x; if (idx < nthreads) { const int c = idx % inC; @@ -73,17 +81,17 @@ __global__ void KeNHWC2NCHW(real* outputs, const real* inputs, template <> void NHWC2NCHW(real* outputs, - const real* inputs, - const int num, - const int inH, - const int inW, - const int inC, - const int argType) { + const real* inputs, + const int num, + const int inH, + const int inW, + const int inC, + const int argType) { int nth = num * inC * inH * inW; int blockSize = 1024; int gridSize = (nth + 1024 - 1) / 1024; - KeNHWC2NCHW<<>> - (outputs, inputs, inH, inW, inC, nth, argType); + KeNHWC2NCHW<<>>( + outputs, inputs, inH, inW, inC, nth, argType); CHECK_SYNC("NHWC2NCHW"); } diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index f5b15c3adb..0f44d8cb8d 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -288,8 +288,8 @@ message PadConfig { } message ReshapeConfig { - repeated uint32 heightAxis = 1; - repeated uint32 widthAxis = 2; + repeated uint32 heightAxis = 1; + repeated uint32 widthAxis = 2; } message MultiBoxLossConfig { @@ -344,7 +344,6 @@ message LayerInputConfig { } message LayerConfig { - required string name = 1; required string type = 2; optional uint64 size = 3; @@ -516,13 +515,13 @@ message LayerConfig { optional int32 axis = 54 [ default = 2 ]; repeated uint32 offset = 55; repeated uint32 shape = 56; - + // for HuberRegressionLoss optional double delta = 57 [ default = 1.0 ]; optional uint64 depth = 58 [ default = 1 ]; - - // for switch order layer + + // for switch order layer optional ReshapeConfig reshape_conf = 59; } From 3360e9cdb8151baa33c3e82840fae2d105085a46 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 7 Sep 2017 11:06:32 +0800 Subject: [PATCH 69/71] Change the definition of vmlaq_laneq_f32 from template function to macro. --- .travis.yml | 2 +- Dockerfile.android | 4 +- paddle/function/GruFunctor.h | 1 - paddle/function/neon/NeonDepthwiseConv.cpp | 100 ++++++++++----------- paddle/function/neon/neon_util.h | 8 +- paddle/scripts/docker/build_android.sh | 6 +- 6 files changed, 59 insertions(+), 62 deletions(-) diff --git a/.travis.yml b/.travis.yml index 14a39c58de..b4b83fcdbc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,7 @@ cache: - $HOME/.ccache - $HOME/.cache/pip - $TRAVIS_BUILD_DIR/build/third_party - - $TRAVIS_BUILD_DIR/build/third_party_android + - $TRAVIS_BUILD_DIR/build_android/third_party sudo: required dist: trusty os: diff --git a/Dockerfile.android b/Dockerfile.android index 6013215d9d..452aa15745 100644 --- a/Dockerfile.android +++ b/Dockerfile.android @@ -11,8 +11,8 @@ ENV ANDROID_ABI=${ANDROID_ABI:-"armeabi-v7a"} ENV HOME=/root \ ANDROID_NDK_HOME=/opt/android-ndk-linux \ - ANDROID_ARM_STANDALONE_TOOLCHAIN=/opt/arm-toolchain-gcc \ - ANDROID_ARM64_STANDALONE_TOOLCHAIN=/opt/arm64-toolchain-gcc + ANDROID_ARM_STANDALONE_TOOLCHAIN=/opt/arm-toolchain \ + ANDROID_ARM64_STANDALONE_TOOLCHAIN=/opt/arm64-toolchain RUN apt-get update && \ apt-get install -y \ diff --git a/paddle/function/GruFunctor.h b/paddle/function/GruFunctor.h index 11f6174dbd..9f6392198e 100644 --- a/paddle/function/GruFunctor.h +++ b/paddle/function/GruFunctor.h @@ -15,7 +15,6 @@ limitations under the License. */ #pragma once #include "GemmFunctor.h" -#include "GruFunctor.h" #include "hl_cpu_gru.cuh" namespace paddle { diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/function/neon/NeonDepthwiseConv.cpp index 14e5198e1b..f09e98587d 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/function/neon/NeonDepthwiseConv.cpp @@ -116,15 +116,15 @@ struct DepthwiseConvKernel<3, 1> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); - tmp2 = vmlaq_laneq_f32<0>(tmp2, input[1][0], k[1]); - tmp1 = vmlaq_laneq_f32<1>(tmp1, input[1][1], k[1]); - tmp2 = vmlaq_laneq_f32<2>(tmp2, input[1][2], k[1]); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); @@ -223,15 +223,15 @@ struct DepthwiseConvKernel<3, 2> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); - tmp2 = vmlaq_laneq_f32<0>(tmp2, input[1][0], k[1]); - tmp1 = vmlaq_laneq_f32<1>(tmp1, input[1][1], k[1]); - tmp2 = vmlaq_laneq_f32<2>(tmp2, input[1][2], k[1]); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][0], k[1], 0); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][1], k[1], 1); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][2], k[1], 2); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); @@ -316,22 +316,22 @@ struct DepthwiseConvKernel<4, 1> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); - tmp2 = vmlaq_laneq_f32<3>(tmp2, input[0][3], k[0]); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[1][0], k[1]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[1][1], k[1]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[1][2], k[1]); - tmp2 = vmlaq_laneq_f32<3>(tmp2, input[1][3], k[1]); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); - tmp2 = vmlaq_laneq_f32<3>(tmp2, input[2][3], k[2]); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[3][0], k[3]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[3][1], k[3]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[3][2], k[3]); - tmp2 = vmlaq_laneq_f32<3>(tmp2, input[3][3], k[3]); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); @@ -431,22 +431,22 @@ struct DepthwiseConvKernel<4, 2> { float32x4_t tmp1 = vdupq_n_f32(0.f); float32x4_t tmp2 = vdupq_n_f32(0.f); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[0][0], k[0]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[0][1], k[0]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[0][2], k[0]); - tmp2 = vmlaq_laneq_f32<3>(tmp2, input[0][3], k[0]); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[1][0], k[1]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[1][1], k[1]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[1][2], k[1]); - tmp2 = vmlaq_laneq_f32<3>(tmp2, input[1][3], k[1]); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[2][0], k[2]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[2][1], k[2]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[2][2], k[2]); - tmp2 = vmlaq_laneq_f32<3>(tmp2, input[2][3], k[2]); - tmp1 = vmlaq_laneq_f32<0>(tmp1, input[3][0], k[3]); - tmp2 = vmlaq_laneq_f32<1>(tmp2, input[3][1], k[3]); - tmp1 = vmlaq_laneq_f32<2>(tmp1, input[3][2], k[3]); - tmp2 = vmlaq_laneq_f32<3>(tmp2, input[3][3], k[3]); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][0], k[0], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][1], k[0], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[0][2], k[0], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[0][3], k[0], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][0], k[1], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][1], k[1], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[1][2], k[1], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[1][3], k[1], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][0], k[2], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][1], k[2], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[2][2], k[2], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[2][3], k[2], 3); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][0], k[3], 0); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][1], k[3], 1); + tmp1 = vmlaq_laneq_f32(tmp1, input[3][2], k[3], 2); + tmp2 = vmlaq_laneq_f32(tmp2, input[3][3], k[3], 3); tmp1 = vaddq_f32(tmp1, tmp2); vst1q_f32(outputData, tmp1); diff --git a/paddle/function/neon/neon_util.h b/paddle/function/neon/neon_util.h index dbe017170b..e2db045067 100644 --- a/paddle/function/neon/neon_util.h +++ b/paddle/function/neon/neon_util.h @@ -33,12 +33,8 @@ inline float32_t vaddvq_f32(float32x4_t a) { return vget_lane_f32(vpadd_f32(v, v), 0); } -template -inline float32x4_t vmlaq_laneq_f32(float32x4_t a, - float32x4_t b, - float32x4_t v) { - return vmlaq_n_f32(a, b, vgetq_lane_f32(v, lane)); -} +#define vmlaq_laneq_f32(a, b, v, lane) \ + vmlaq_n_f32(a, b, vgetq_lane_f32(v, lane)) #endif } // namespace neon diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index 512a37166c..aabd2da5e4 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -36,6 +36,7 @@ elif [ $ANDROID_ABI == "arm64-v8a" ]; then -DUSE_EIGEN_FOR_BLAS=OFF \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ + -DWITH_STYLE_CHECK=OFF \ .. elif [ $ANDROID_ABI == "armeabi" ]; then cmake -DCMAKE_SYSTEM_NAME=Android \ @@ -48,10 +49,11 @@ elif [ $ANDROID_ABI == "armeabi" ]; then -DCMAKE_BUILD_TYPE=Release \ -DWITH_C_API=ON \ -DWITH_SWIG_PY=OFF \ + -DWITH_STYLE_CHECK=OFF \ .. else echo "Invalid ANDROID_ABI: $ANDROID_ABI" fi -make VERBOSE=1 -j2 -make install -j2 +make -j `nproc` +make install -j `nproc` From ebe8966e06392996a398b8b90e61769166ec8e4a Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 7 Sep 2017 11:33:03 +0800 Subject: [PATCH 70/71] Add im2col_test. --- paddle/operators/math/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index 7a41e294a8..f8333f34f7 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -7,3 +7,4 @@ else() endif() nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) +cc_test(im2col_test SRCS im2col_test.cc DEPS math_function tensor) From 441e60a40b14d49bd15d596efa7d65ae6dc663f0 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 7 Sep 2017 14:10:43 +0800 Subject: [PATCH 71/71] Remove build_android task from travis temporarily to avoid failure on travis because of the cache of third_party. --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index b4b83fcdbc..3fadf8deb7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ cache: - $HOME/.ccache - $HOME/.cache/pip - $TRAVIS_BUILD_DIR/build/third_party - - $TRAVIS_BUILD_DIR/build_android/third_party sudo: required dist: trusty os: @@ -12,7 +11,6 @@ os: env: - JOB=build_doc - JOB=check_style - - JOB=build_android addons: apt: packages: