From 2e47c9d828ea48b775572384260cc806674663aa Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 11 Jan 2017 17:44:18 +0800 Subject: [PATCH 01/37] Fix bug in DenseScanner of DataProviderConverter. --- paddle/py_paddle/dataprovider_converter.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py index 981d10afda..21d1cb75f4 100644 --- a/paddle/py_paddle/dataprovider_converter.py +++ b/paddle/py_paddle/dataprovider_converter.py @@ -34,6 +34,10 @@ class IScanner(object): class DenseScanner(IScanner): + """ + :type __mat__: numpy.ndarray + """ + def __init__(self, input_type, pos): IScanner.__init__(self, input_type, pos) self.__mat__ = None @@ -47,6 +51,8 @@ class DenseScanner(IScanner): def finish_scan(self, argument): assert isinstance(argument, swig_paddle.Arguments) assert isinstance(self.input_type, dp2.InputType) + if self.__mat__.dtype != numpy.float32: + self.__mat__ = self.__mat__.astype(numpy.float32) m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False) argument.setSlotValue(self.pos, m) From 2629d43ff7c798f1b3cd3be3883449a4b2877c35 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 12 Jan 2017 14:49:19 +0800 Subject: [PATCH 02/37] New FunctionTest --- paddle/function/FunctionTest.h | 138 ++++++++++++++++++++++++++++++--- 1 file changed, 126 insertions(+), 12 deletions(-) diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h index 32131037f6..2847188fd6 100644 --- a/paddle/function/FunctionTest.h +++ b/paddle/function/FunctionTest.h @@ -15,9 +15,33 @@ limitations under the License. */ #include "Function.h" #include "paddle/math/Vector.h" #include "paddle/math/tests/TensorCheck.h" +#include "paddle/testing/TestUtil.h" namespace paddle { +/** + * \brief A class for comparing CPU and GPU implementations of Function. + * + * + * Use case: + * // Initializes a test object, the corresponding cpu and gpu Function + * // are constructed according to FunctionName and FuncConfig. + * FunctionCompare test(FunctionName, FuncConfig); + * // Prepare inputs and outputs arguments. + * // Here the input and output can not contain real data, + * // only contains the argument type and shape. + * test.addInputs(input1); + * test.addInputs(input2); + * test.addOutputs(output1); + * test.addOutputs(output2); + * // Run. + * // Will according to the type and shape of arguments(inputs_/outputs_), + * // automatic initialization cpu and gpu function required arguments + * // (cpuInputs_/cpuOutputs_/gpuInputs_/gpuOutputs_). + * // Call the CPU and GPU Function calculation results. + * // Compares CPU and GPU calculation results for consistency. + * test.run(); + */ class FunctionCompare { public: FunctionCompare(const std::string& name, const FuncConfig& config) @@ -27,6 +51,32 @@ public: gpu->init(config); } + void addInputs(const BufferArg& input) { inputs.push_back(input); } + + void addOutputs(const BufferArg& output) { outputs.push_back(output); } + + void run() { + // prepare cpu/gpu arguments + prepareArgs(); + + // function calculate + cpu->calc(cpuInputs, cpuOutputs); + gpu->calc(gpuInputs, gpuOutputs); + + // check outputs and inouts + auto checkArgs = [=](const BufferArgs& cpuArgs, const BufferArgs& gpuArgs) { + for (size_t i = 0; i < cpuArgs.size(); i++) { + auto cpu = cpuArgs[i]; + auto gpu = gpuArgs[i]; + CpuVector cpuVector(cpu.shape().getElements(), (real*)cpu.getData()); + GpuVector gpuVector(cpu.shape().getElements(), (real*)gpu.getData()); + + autotest::TensorCheckErr(cpuVector, gpuVector); + } + }; + checkArgs(cpuOutputs, gpuOutputs); + } +#if 0 void cmpWithArg(const Arguments& inputs, const Arguments& outputs, const Arguments& inouts) { @@ -64,11 +114,10 @@ public: }; initArgs(cpuInputs, gpuInputs, inputs); initArgs(cpuOutputs, gpuOutputs, outputs); - initArgs(cpuInouts, gpuInouts, inouts); // function calculate - cpu->calc(cpuInputs, cpuOutputs, cpuInouts); - gpu->calc(gpuInputs, gpuOutputs, gpuInouts); + cpu->calc(cpuInputs, cpuOutputs); + gpu->calc(gpuInputs, gpuOutputs); // check outputs and inouts auto checkArgs = [=](const Arguments& cpuArgs, const Arguments& gpuArgs) { @@ -86,24 +135,89 @@ public: } }; checkArgs(cpuOutputs, gpuOutputs); - checkArgs(cpuInouts, gpuInouts); } +#endif std::shared_ptr getCpuFunction() const { return cpu; } std::shared_ptr getGpuFunction() const { return gpu; } +protected: + void prepareArgs() { + // TODO, if inputs has data + } + + void createArg(BufferArgs& cpuArgs, BufferArgs& gpuArgs, BufferArg& arg) { + size_t size = arg.shape().getElements() * sizeOfValuType(arg.valueType()); + cpuMemory_.emplace_back(std::make_shared(size)); + gpuMemory_.emplace_back(std::make_shared(size)); + + cpuArgs.emplace_back( + BufferArg(cpuMemory_.back()->getBuf()), arg.valueType(), arg.shape()); + gpuArgs.emplace_back( + BufferArg(gpuMemory_.back()->getBuf()), arg.valueType(), arg.shape()); + } + + void createArg(BufferArgs& cpuArgs, BufferArgs& gpuArgs, SequenceArg& arg) { + size_t batchSize = arg.shape()[0]; + size_t numSeqs = batchSize / 10 + 1; + + size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); + cpuMemory_.emplace_back(std::make_shared(size)); + gpuMemory_.emplace_back(std::make_shared(size)); + + TensorShape seqsId({numSeqs + 1}); + void* cpuBuffer = cpuMemory_.back()->getBuf(); + void* gpuBuffer = gpuMemory_.back()->getBuf(); + + size_t size = arg.shape().getElements() * sizeOfValuType(arg.valueType()); + cpuMemory_.emplace_back(std::make_shared(size)); + gpuMemory_.emplace_back(std::make_shared(size)); + + cpuArgs.emplace_back(SequenceArg(cpuMemory_.back()->getBuf(), + arg.valueType(), + arg.shape(), + SequenceIdArg(cpuBuffer, seqsId))); + gpuArgs.emplace_back(SequenceArg(gpuMemory_.back()->getBuf(), + arg.valueType(), + arg.shape(), + SequenceIdArg(gpuBuffer, seqsId))); + } + + // only init cpu argument, gpu argument copy from cpu argument. + void initArg(BufferArg& arg) { + CpuVector vector(arg.shape().getElements(), (real*)arg.data()); + vector.uniform(0.001, 1); + } + + void initArg(SequenceIdArg& arg, size_t batchSize) { + size_t numSeqs = arg.numSeqs(); + int* buf = arg.data(); + int pos = 0; + size_t maxLen = 2 * batchSize / numSeqs; + for (int i = 0; i < numSeqs; ++i) { + int len = uniformRandom( + std::min(maxLen, batchSize - pos - numSeqs + i)) + + 1; + buf[i] = pos; + pos += len; + VLOG(1) << " len=" << len; + } + buf[numSeqs] = batchSize; + } + protected: std::shared_ptr cpu; std::shared_ptr gpu; - std::vector cpuMemory; - std::vector gpuMemory; - Arguments cpuInputs; - Arguments cpuOutputs; - Arguments cpuInouts; - Arguments gpuInputs; - Arguments gpuOutputs; - Arguments gpuInouts; + std::vector cpuMemory_; + std::vector gpuMemory_; + // inputs and outputs + BufferArgs inputs; + BufferArgs outputs; + BufferArgs cpuInputs_; + BufferArgs cpuOutputs_; + BufferArgs gpuInputs_; + BufferArgs gpuOutputs_; }; } // namespace paddle From fdf194aeaf6df02fde1165737def451a5fec8e73 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 12 Jan 2017 18:03:18 +0800 Subject: [PATCH 03/37] move a test case from BufferArgTest.cpp to FunctionTest.cpp --- paddle/function/BufferArgTest.cpp | 53 ------------------------------- paddle/function/FunctionTest.cpp | 52 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 53 deletions(-) diff --git a/paddle/function/BufferArgTest.cpp b/paddle/function/BufferArgTest.cpp index b345597435..1744f37780 100644 --- a/paddle/function/BufferArgTest.cpp +++ b/paddle/function/BufferArgTest.cpp @@ -14,9 +14,7 @@ limitations under the License. */ #include "BufferArg.h" #include -#include "Function.h" #include "paddle/math/MemoryHandle.h" -#include "paddle/math/SparseMatrix.h" namespace paddle { @@ -37,55 +35,4 @@ TEST(BufferTest, SequenceIdArg) { EXPECT_EQ(buffer.numSeqs(), 9); } -TEST(BufferTest, asArgument) { - MatrixPtr matrix = Matrix::create(100, 200); - VectorPtr vector = Vector::create(100, false); - CpuSparseMatrix sparse(200, 300, 50); - - // prepare arguments - BufferArgs argments; - argments.addArg(*matrix); - argments.addArg(*vector); - argments.addArg(sparse); - - // function - auto function = [=](const BufferArgs& inputs) { - EXPECT_EQ(inputs.size(), 3); - - // check inputs[0] - EXPECT_EQ(inputs[0].shape().ndims(), 2); - EXPECT_EQ(inputs[0].shape()[0], 100); - EXPECT_EQ(inputs[0].shape()[1], 200); - EXPECT_EQ(inputs[0].data(), matrix->getData()); - - EXPECT_EQ(inputs[0].matrix().getHeight(), - matrix->getHeight()); - EXPECT_EQ(inputs[0].matrix().getWidth(), - matrix->getWidth()); - EXPECT_EQ(inputs[0].matrix().getData(), matrix->getData()); - - // check inputs[1] - EXPECT_EQ(inputs[1].shape().ndims(), 1); - EXPECT_EQ(inputs[1].shape()[0], 100); - EXPECT_EQ(inputs[1].data(), vector->getData()); - CpuVector inVector = inputs[1].vector(); - EXPECT_EQ(inVector.getSize(), vector->getSize()); - EXPECT_EQ(inVector.getData(), vector->getData()); - - // check inputs[2] - EXPECT_EQ(inputs[2].shape().ndims(), 2); - EXPECT_EQ(inputs[2].shape()[0], 200); - EXPECT_EQ(inputs[2].shape()[1], 300); - EXPECT_EQ(inputs[2].data(), sparse.getData()); - // CHECK_EQ(inputs[2].sparse().nnz(), 50); - // CHECK_EQ(inputs[2].sparse().dataFormat(), SPARSE_CSR_FORMAT); - // CHECK_EQ(inputs[2].sparse().dataType(), SPARSE_FLOAT_VALUE); - EXPECT_EQ(inputs[2].sparse().getRowBuf(), sparse.getRows()); - EXPECT_EQ(inputs[2].sparse().getColBuf(), sparse.getCols()); - }; - - // call function - function(argments); -} - } // namespace paddle diff --git a/paddle/function/FunctionTest.cpp b/paddle/function/FunctionTest.cpp index 7ce908320a..6e44c2f5db 100644 --- a/paddle/function/FunctionTest.cpp +++ b/paddle/function/FunctionTest.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "Function.h" #include +#include "paddle/math/SparseMatrix.h" namespace paddle { @@ -56,4 +57,55 @@ TEST(Function, BufferArgs) { Function(gpuArgments); } +TEST(BufferArgs, asArgument) { + MatrixPtr matrix = Matrix::create(100, 200); + VectorPtr vector = Vector::create(100, false); + CpuSparseMatrix sparse(200, 300, 50); + + // prepare arguments + BufferArgs argments; + argments.addArg(*matrix); + argments.addArg(*vector); + argments.addArg(sparse); + + // function + auto function = [=](const BufferArgs& inputs) { + EXPECT_EQ(inputs.size(), 3); + + // check inputs[0] + EXPECT_EQ(inputs[0].shape().ndims(), 2); + EXPECT_EQ(inputs[0].shape()[0], 100); + EXPECT_EQ(inputs[0].shape()[1], 200); + EXPECT_EQ(inputs[0].data(), matrix->getData()); + + EXPECT_EQ(inputs[0].matrix().getHeight(), + matrix->getHeight()); + EXPECT_EQ(inputs[0].matrix().getWidth(), + matrix->getWidth()); + EXPECT_EQ(inputs[0].matrix().getData(), matrix->getData()); + + // check inputs[1] + EXPECT_EQ(inputs[1].shape().ndims(), 1); + EXPECT_EQ(inputs[1].shape()[0], 100); + EXPECT_EQ(inputs[1].data(), vector->getData()); + CpuVector inVector = inputs[1].vector(); + EXPECT_EQ(inVector.getSize(), vector->getSize()); + EXPECT_EQ(inVector.getData(), vector->getData()); + + // check inputs[2] + EXPECT_EQ(inputs[2].shape().ndims(), 2); + EXPECT_EQ(inputs[2].shape()[0], 200); + EXPECT_EQ(inputs[2].shape()[1], 300); + EXPECT_EQ(inputs[2].data(), sparse.getData()); + // CHECK_EQ(inputs[2].sparse().nnz(), 50); + // CHECK_EQ(inputs[2].sparse().dataFormat(), SPARSE_CSR_FORMAT); + // CHECK_EQ(inputs[2].sparse().dataType(), SPARSE_FLOAT_VALUE); + EXPECT_EQ(inputs[2].sparse().getRowBuf(), sparse.getRows()); + EXPECT_EQ(inputs[2].sparse().getColBuf(), sparse.getCols()); + }; + + // call function + function(argments); +} + } // namespace paddle From bff19f57d193f00240ff52419b2c43a7df662453 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 12 Jan 2017 20:15:59 +0800 Subject: [PATCH 04/37] Add a CheckBufferArg. It is used to check the consistency between the BufferArg type argument received by Function and the original type argument. --- paddle/function/FunctionTest.cpp | 114 ++++++++++++++++++++----------- 1 file changed, 75 insertions(+), 39 deletions(-) diff --git a/paddle/function/FunctionTest.cpp b/paddle/function/FunctionTest.cpp index 6e44c2f5db..eb05ca9a21 100644 --- a/paddle/function/FunctionTest.cpp +++ b/paddle/function/FunctionTest.cpp @@ -57,55 +57,91 @@ TEST(Function, BufferArgs) { Function(gpuArgments); } -TEST(BufferArgs, asArgument) { +/** + * Some tests case are used to check the consistency between the BufferArg type + * argument received by Function and the original type argument. + * + * Use Case: + * TEST() { + * Matrix matrix(...); + * CheckBufferArg lambda = [=](const BufferArg& arg) { + * // check matrix and arg are equivalent + * EXPECT_EQ(matrix, arg); + * } + * + * BufferArgs argments{matrix...}; + * std::vector checkFunc{lambda...}; + * testBufferArgs(argments, checkFunc); + * } + */ +typedef std::function CheckBufferArg; + +void testBufferArgs(const BufferArgs& inputs, + const std::vector& check) { + EXPECT_EQ(inputs.size(), check.size()); + for (size_t i = 0; i < inputs.size(); i++) { + check[i](inputs[i]); + } +} + +TEST(Arguments, Matrix) { MatrixPtr matrix = Matrix::create(100, 200); - VectorPtr vector = Vector::create(100, false); - CpuSparseMatrix sparse(200, 300, 50); + CheckBufferArg check = [=](const BufferArg& arg) { + EXPECT_EQ(arg.shape().ndims(), 2); + EXPECT_EQ(arg.shape()[0], 100); + EXPECT_EQ(arg.shape()[1], 200); + EXPECT_EQ(arg.data(), matrix->getData()); + + EXPECT_EQ(arg.matrix().getHeight(), matrix->getHeight()); + EXPECT_EQ(arg.matrix().getWidth(), matrix->getWidth()); + EXPECT_EQ(arg.matrix().getData(), matrix->getData()); + }; - // prepare arguments BufferArgs argments; argments.addArg(*matrix); - argments.addArg(*vector); - argments.addArg(sparse); + std::vector checkFunc; + checkFunc.push_back(check); + testBufferArgs(argments, checkFunc); +} + +TEST(Arguments, Vector) { + VectorPtr vector = Vector::create(100, false); + CheckBufferArg check = [=](const BufferArg& arg) { + EXPECT_EQ(arg.shape().ndims(), 1); + EXPECT_EQ(arg.shape()[0], 100); + EXPECT_EQ(arg.data(), vector->getData()); - // function - auto function = [=](const BufferArgs& inputs) { - EXPECT_EQ(inputs.size(), 3); - - // check inputs[0] - EXPECT_EQ(inputs[0].shape().ndims(), 2); - EXPECT_EQ(inputs[0].shape()[0], 100); - EXPECT_EQ(inputs[0].shape()[1], 200); - EXPECT_EQ(inputs[0].data(), matrix->getData()); - - EXPECT_EQ(inputs[0].matrix().getHeight(), - matrix->getHeight()); - EXPECT_EQ(inputs[0].matrix().getWidth(), - matrix->getWidth()); - EXPECT_EQ(inputs[0].matrix().getData(), matrix->getData()); - - // check inputs[1] - EXPECT_EQ(inputs[1].shape().ndims(), 1); - EXPECT_EQ(inputs[1].shape()[0], 100); - EXPECT_EQ(inputs[1].data(), vector->getData()); - CpuVector inVector = inputs[1].vector(); + CpuVector inVector = arg.vector(); EXPECT_EQ(inVector.getSize(), vector->getSize()); EXPECT_EQ(inVector.getData(), vector->getData()); + }; - // check inputs[2] - EXPECT_EQ(inputs[2].shape().ndims(), 2); - EXPECT_EQ(inputs[2].shape()[0], 200); - EXPECT_EQ(inputs[2].shape()[1], 300); - EXPECT_EQ(inputs[2].data(), sparse.getData()); - // CHECK_EQ(inputs[2].sparse().nnz(), 50); - // CHECK_EQ(inputs[2].sparse().dataFormat(), SPARSE_CSR_FORMAT); - // CHECK_EQ(inputs[2].sparse().dataType(), SPARSE_FLOAT_VALUE); - EXPECT_EQ(inputs[2].sparse().getRowBuf(), sparse.getRows()); - EXPECT_EQ(inputs[2].sparse().getColBuf(), sparse.getCols()); + BufferArgs argments; + argments.addArg(*vector); + std::vector checkFunc; + checkFunc.push_back(check); + testBufferArgs(argments, checkFunc); +} + +TEST(Arguments, CpuSparseMatrix) { + CpuSparseMatrix sparse(200, 300, 50); + CheckBufferArg check = [=](const BufferArg& arg) { + EXPECT_EQ(arg.shape().ndims(), 2); + EXPECT_EQ(arg.shape()[0], 200); + EXPECT_EQ(arg.shape()[1], 300); + EXPECT_EQ(arg.data(), sparse.getData()); + // CHECK_EQ(arg.sparse().nnz(), 50); + // CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT); + // CHECK_EQ(arg.sparse().dataType(), SPARSE_FLOAT_VALUE); + EXPECT_EQ(arg.sparse().getRowBuf(), sparse.getRows()); + EXPECT_EQ(arg.sparse().getColBuf(), sparse.getCols()); }; - // call function - function(argments); + BufferArgs argments; + argments.addArg(sparse); + std::vector checkFunc; + checkFunc.push_back(check); + testBufferArgs(argments, checkFunc); } } // namespace paddle From edad8a6913a10ab83fd9463c6dce92f93cb5e315 Mon Sep 17 00:00:00 2001 From: zhanghaichao Date: Thu, 12 Jan 2017 11:02:17 -0800 Subject: [PATCH 05/37] bug fix in hierarchical layer doc --- doc/howto/deep_model/rnn/hierarchical_layer_cn.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst index 943b1d4bb8..4b328fc9d3 100644 --- a/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst +++ b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst @@ -32,7 +32,7 @@ pooling_layer 的使用示例如下,详细见 :ref:`api_trainer_config_helpers - `pooling_type` 目前支持两种,分别是:MaxPooling()和AvgPooling()。 -- `agg_level=AggregateLevel.TIMESTEP` 时(默认值): +- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值): - 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列 - 输入:一个双层序列,或一个单层序列 @@ -54,7 +54,7 @@ last_seq 的使用示例如下( :ref:`api_trainer_config_helpers_layers_first_ last = last_seq(input=layer, agg_level=AggregateLevel.EACH_SEQUENCE) -- `agg_level=AggregateLevel.TIMESTEP` 时(默认值): +- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值): - 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列 - 输入:一个双层序列或一个单层序列 From 86fa8c05280e18c6fc4a569931d9f50fd9467546 Mon Sep 17 00:00:00 2001 From: xutianbing Date: Thu, 5 Jan 2017 11:05:18 -0800 Subject: [PATCH 06/37] Wei Xu's comments, set up right inouts. --- paddle/function/ContextProjectionOp.cpp | 124 +++++++++++++------- paddle/function/ContextProjectionOp.h | 20 ++-- paddle/function/ContextProjectionOpGpu.cu | 25 ++-- paddle/function/ContextProjectionOpTest.cpp | 34 +++--- 4 files changed, 124 insertions(+), 79 deletions(-) diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index cb448562eb..8803ea7896 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -70,10 +70,11 @@ void ContextProjectionForward(CpuMatrix& out_mat, } /** + * \param outputs[0] output value. + * * \param inputs[0] input value. * \param inputs[1] input weight. * \param inputs[2] input sequence. - * \param outputs[0] output value. */ template class ContextProjectionForwardFunc : public FunctionBase { @@ -123,7 +124,8 @@ private: }; template <> -void ContextProjectionBackward(CpuMatrix& out_grad_mat, +<<<<<<< HEAD +void ContextProjectionBackward(const CpuMatrix& out_grad_mat, CpuMatrix& in_grad_mat, CpuMatrix& w_grad_mat, const CpuIVector& seq_vec, @@ -176,10 +178,10 @@ void ContextProjectionBackward(CpuMatrix& out_grad_mat, } /** - * \param inputs[0] input grad. - * \param inputs[1] weight grad. - * \param inputs[2] input sequence. - * \param outputs[0] output value. + * \param inputs[0] input sequence. + * \param inputs[1] output grad. + * \param inouts[0] input grad. + * \param inouts[1] weight grad. */ template class ContextProjectionBackwardFunc : public FunctionBase { @@ -192,6 +194,7 @@ public: total_pad_ = config.get("total_pad"); } +<<<<<<< HEAD void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ((size_t)3, inputs.size()); CHECK_EQ((size_t)1, outputs.size()); @@ -210,6 +213,42 @@ public: CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); CHECK_EQ(outputs[0].getArgType(), ADD_TO); +======= + void calc(const Arguments& inputs, + const Arguments& outputs, + const Arguments& inouts) override { + CHECK_EQ(2, inputs.size()); + CHECK_EQ(0, outputs.size()); + CHECK_EQ(2, inouts.size()); + + CHECK(inputs[0].getData() && inputs[1].getData()); + CHECK_EQ(inputs[0].dims_.size(), 1); + CHECK_EQ(inputs[1].dims_.size(), 2); + CHECK_EQ(inouts[0].dims_.size(), 2); + CHECK_EQ(inouts[1].dims_.size(), 2); + + /// dim of input grad == dim of weight grad + CHECK_EQ(inouts[0].dims_[1], inouts[1].dims_[1]); + /// input grad and output grad have the same batch_size + CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]); + /// dim of output = dim of input * context_length + CHECK_EQ(inputs[1].dims_[1], inputs[0].dims_[1] * context_length_); + + typename SequenceT::type seq_vec( + inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); + const auto out_grad_mat = std::make_shared::type>( + inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); + auto in_grad_mat = + !inouts[0].getData() + ? nullptr + : std::make_shared::type>( + inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]); + auto w_grad_mat = + !inouts[1].getData() + ? nullptr + : std::make_shared::type>( + inouts[1].getData(), inouts[1].dims_[0], inouts[1].dims_[1]); +>>>>>>> Wei Xu's comments, set up right inouts. auto out_grad_mat = outputs[0].matrix(); auto in_grad_mat = @@ -240,9 +279,9 @@ private: #if 0 /** - * \param inputs[0] input grad. - * \param inputs[1] input sequence. - * \param outputs[0] output grad. + * \param inouts[0] input grad. + * \param inputs[0] input sequence. + * \param inputs[1] output grad. */ template class ContextProjectionBackwardDataFunc : public FunctionBase { @@ -255,23 +294,24 @@ public: void calc(const Arguments& inputs, const Arguments& outputs, const Arguments& inouts) override { - CHECK_EQ(2, static_cast(inputs.size())); - CHECK_EQ(1, static_cast(outputs.size())); - CHECK_EQ(0, static_cast(inouts.size())); - CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData()); - CHECK_EQ(static_cast(outputs[0].dims_.size()), 2); - CHECK_EQ(static_cast(inputs[0].dims_.size()), 2); - CHECK_EQ(static_cast(inputs[1].dims_.size()), 1); - CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); - /// input and output has the same batch_size - CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]); + CHECK_EQ(2, inputs.size()); + CHECK_EQ(0, outputs.size()); + CHECK_EQ(1, inouts.size()); + + CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData()); + CHECK_EQ(inputs[0].dims_.size(), 1); + CHECK_EQ(inputs[1].dims_.size(), 2); + CHECK_EQ(inouts[0].dims_.size(), 2); + CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_); + /// input and output grad have the same batch_size + CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]); - auto out_grad_mat = std::make_shared::type>( - outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); - const auto in_grad_mat = std::make_shared::type>( - inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); typename SequenceT::type seq_vec( - inputs[1].dims_[0], reinterpret_cast(inputs[1].getData())); + inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); + const auto out_grad_mat = std::make_shared::type>( + inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); + auto in_grad_mat = std::make_shared::type>( + inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]); ContextProjectionBackwardData(out_grad_mat.get(), in_grad_mat.get(), @@ -286,9 +326,9 @@ private: }; /** - * \param inputs[0] weight grad. - * \param inputs[1] input sequence. - * \param outputs[0] output grad. + * \param inouts[0] weight grad. + * \param inputs[0] input sequence. + * \param inputs[1] output grad. */ template class ContextProjectionBackwardWeightFunc : public FunctionBase { @@ -303,22 +343,22 @@ public: void calc(const Arguments& inputs, const Arguments& outputs, const Arguments& inouts) override { - CHECK_EQ(2, static_cast(inputs.size())); - CHECK_EQ(1, static_cast(outputs.size())); - CHECK_EQ(0, static_cast(inouts.size())); - - CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData()); - CHECK_EQ(static_cast(outputs[0].dims_.size()), 2); - CHECK_EQ(static_cast(inputs[0].dims_.size()), 2); - CHECK_EQ(static_cast(inputs[1].dims_.size()), 1); - CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); - - auto out_grad_mat = std::make_shared::type>( - outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); - auto w_grad_mat = std::make_shared::type>( - inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); + CHECK_EQ(2, inputs.size()); + CHECK_EQ(0, outputs.size()); + CHECK_EQ(1, inouts.size()); + + CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData()); + CHECK_EQ(inputs[0].dims_.size(), 1); + CHECK_EQ(inputs[1].dims_.size(), 2); + CHECK_EQ(inouts[0].dims_.size(), 2); + CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_); + typename SequenceT::type seq_vec( - inputs[1].dims_[0], reinterpret_cast(inputs[1].getData())); + inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); + const auto out_grad_mat = std::make_shared::type>( + inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); + auto w_grad_mat = std::make_shared::type>( + inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]); ContextProjectionBackwardWeight(out_grad_mat.get(), w_grad_mat.get(), diff --git a/paddle/function/ContextProjectionOp.h b/paddle/function/ContextProjectionOp.h index a558df5e07..8e956c6c6f 100644 --- a/paddle/function/ContextProjectionOp.h +++ b/paddle/function/ContextProjectionOp.h @@ -21,14 +21,14 @@ namespace paddle { /** * \brief Context Projection Forward. * - * \param[out] outputs output data. - * \param[in] input input data. - * \param[in] weight input weight. - * \param[in] sequence input data. - * \param[in] context_length consecutive rows for concatenation. - * \param[in] context_start context start position. - * \param[in] begin_pad begining pad position. - * \param[in] is_padding whether padding 0 or not. + * \param[in/out] outputs output data. + * \param[in] input input data. + * \param[in] weight input weight. + * \param[in] sequence input data. + * \param[in] context_length consecutive rows for concatenation. + * \param[in] context_start context start position. + * \param[in] begin_pad begining pad position. + * \param[in] is_padding whether padding 0 or not. * */ template @@ -68,7 +68,7 @@ void ContextProjectionBackward( template void ContextProjectionBackwardData( - typename Tensor::Matrix& out_grad, + const typename Tensor::Matrix& out_grad, typename Tensor::Matrix& in_grad, const typename Tensor::Vector& sequence, size_t context_length, @@ -76,7 +76,7 @@ void ContextProjectionBackwardData( template void ContextProjectionBackwardWeight( - typename Tensor::Matrix& out_grad, + const typename Tensor::Matrix& out_grad, typename Tensor::Matrix& w_grad, const typename Tensor::Vector& seq_vec, size_t context_length, diff --git a/paddle/function/ContextProjectionOpGpu.cu b/paddle/function/ContextProjectionOpGpu.cu index 6a4a01a651..6194ad8e74 100644 --- a/paddle/function/ContextProjectionOpGpu.cu +++ b/paddle/function/ContextProjectionOpGpu.cu @@ -138,10 +138,10 @@ void ContextProjectionForward(GpuMatrix& output, begin_pad); } -__global__ void KeContextProjectionBackwardData(real* out_grad, +__global__ void KeContextProjectionBackwardData(const real* out_grad, const int* sequence, real* in_grad, - int input_dim, + size_t input_dim, int context_length, int context_start) { int idx = threadIdx.x; @@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, real value = 0; int instances = seq_end - seq_start + context_length - 1; - out_grad += seq_start * input_dim * context_length; + auto out = const_cast(out_grad); + out += seq_start * input_dim * context_length; in_grad += seq_start * input_dim; for (int k = 0; k <= input_dim / block_size; k++) { if (idx < input_dim) { @@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, int outx = (i - context_length) < 0 ? i : (context_length - 1); int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1)); real* output_r = - out_grad + outy * input_dim * context_length + outx * input_dim; + out + outy * input_dim * context_length + outx * input_dim; for (int j = outy; j < seq_end - seq_start; j++) { value += output_r[idx]; if (j - outy == outx) break; @@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, * @param[in] context_start context start. * */ -void hl_context_projection_backward_data(real* out_grad, +void hl_context_projection_backward_data(const real* out_grad, const int* sequence, real* input_grad, size_t num_sequences, @@ -216,7 +217,8 @@ void hl_context_projection_backward_data(real* out_grad, } template <> -void ContextProjectionBackwardData(GpuMatrix& out_grad, +<<<<<<< HEAD +void ContextProjectionBackwardData(const GpuMatrix& out_grad, GpuMatrix& in_grad, const GpuIVector& sequence, size_t context_length, @@ -231,7 +233,7 @@ void ContextProjectionBackwardData(GpuMatrix& out_grad, } template -__global__ void KeContextProjectionBackwardWeight(real* out_grad, +__global__ void KeContextProjectionBackwardWeight(const real* out_grad, const int* sequence, real* w_grad, int num_sequences, @@ -254,7 +256,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad, for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) { int seq_start = sequence[seqId]; int seq_end = sequence[seqId+1]; - output_r = out_grad + seq_start * w_dim * context_length; + output_r = const_cast(out_grad) + + seq_start * w_dim * context_length; if (context_start < 0) { if (padId + context_start < 0) { @@ -318,7 +321,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad, * beginning. * */ -void hl_context_projection_backward_weight(real* out_grad, +void hl_context_projection_backward_weight(const real* out_grad, const int* sequence, real* w_grad, size_t num_sequences, @@ -346,7 +349,7 @@ void hl_context_projection_backward_weight(real* out_grad, template <> void ContextProjectionBackwardWeight( - GpuMatrix& out_grad, + const GpuMatrix& out_grad, GpuMatrix& w_grad, const GpuIVector& seq_vec, size_t context_length, @@ -365,7 +368,7 @@ void ContextProjectionBackwardWeight( } template <> -void ContextProjectionBackward(GpuMatrix& out_grad, +void ContextProjectionBackward(const GpuMatrix& out_grad, GpuMatrix& in_grad, GpuMatrix& w_grad, const GpuIVector& sequence, diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp index 6223d2fd23..169c1dd505 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -62,16 +62,18 @@ void testMatrixProjectionForward(int context_start, Dims{pad, input_dim}), Tensor(reinterpret_cast(cpu_seq->getData()), Dims{cpu_seq->getSize()})}, - {Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})}, - {}); + {}, + {Tensor(cpu_out.getData(), + Dims{batch_size, input_dim * context_length})}); compare.getGpuFunction()->calc( {Tensor(gpu_in.getData(), Dims{batch_size, input_dim}), Tensor(gpu_weight ? gpu_weight->getData() : nullptr, Dims{pad, input_dim}), Tensor(reinterpret_cast(gpu_seq->getData()), Dims{gpu_seq->getSize()})}, - {Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})}, - {}); + {}, + {Tensor(gpu_out.getData(), + Dims{batch_size, input_dim * context_length})}); autotest::TensorCheckEqual(cpu_out, gpu_out); } @@ -118,24 +120,24 @@ void testMatrixProjectionBackward(int context_start, } compare.getCpuFunction()->calc( + {Tensor(reinterpret_cast(cpu_seq->getData()), + Dims{cpu_seq->getSize()}), + Tensor(cpu_out_grad.getData(), + Dims{batch_size, input_dim * context_length})}, + {}, {Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}), Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr, - Dims{pad, input_dim}), - Tensor(reinterpret_cast(cpu_seq->getData()), - Dims{cpu_seq->getSize()})}, - {Tensor(cpu_out_grad.getData(), - Dims{batch_size, input_dim * context_length})}, - {}); + Dims{pad, input_dim})}); compare.getGpuFunction()->calc( + {Tensor(reinterpret_cast(gpu_seq->getData()), + Dims{gpu_seq->getSize()}), + Tensor(gpu_out_grad.getData(), + Dims{batch_size, input_dim * context_length})}, + {}, {Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}), Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr, - Dims{pad, input_dim}), - Tensor(reinterpret_cast(gpu_seq->getData()), - Dims{gpu_seq->getSize()})}, - {Tensor(gpu_out_grad.getData(), - Dims{batch_size, input_dim * context_length})}, - {}); + Dims{pad, input_dim})}); autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad); if (is_padding) { From df66957ec32f38b45b3e7274ef8f99699391854f Mon Sep 17 00:00:00 2001 From: xutianbing Date: Thu, 5 Jan 2017 11:14:51 -0800 Subject: [PATCH 07/37] clean a little bit code. --- paddle/function/ContextProjectionOp.cpp | 2 +- paddle/function/ContextProjectionOpGpu.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 8803ea7896..f1e42cad72 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -232,7 +232,7 @@ public: /// input grad and output grad have the same batch_size CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]); /// dim of output = dim of input * context_length - CHECK_EQ(inputs[1].dims_[1], inputs[0].dims_[1] * context_length_); + CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_); typename SequenceT::type seq_vec( inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); diff --git a/paddle/function/ContextProjectionOpGpu.cu b/paddle/function/ContextProjectionOpGpu.cu index 6194ad8e74..c5a636dce8 100644 --- a/paddle/function/ContextProjectionOpGpu.cu +++ b/paddle/function/ContextProjectionOpGpu.cu @@ -256,7 +256,7 @@ __global__ void KeContextProjectionBackwardWeight(const real* out_grad, for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) { int seq_start = sequence[seqId]; int seq_end = sequence[seqId+1]; - output_r = const_cast(out_grad) + output_r = const_cast(out_grad) + seq_start * w_dim * context_length; if (context_start < 0) { From 1482ec430a918cc5f9b44c3acf9d60d895c05b26 Mon Sep 17 00:00:00 2001 From: xutianbing Date: Sat, 7 Jan 2017 13:57:31 -0800 Subject: [PATCH 08/37] some comments. --- paddle/function/ContextProjectionOp.cpp | 88 ++++++++++++------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index f1e42cad72..75c09108b1 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -18,6 +18,10 @@ limitations under the License. */ namespace paddle { +/** + * Context Projection Forward with CPU Matrix Device. + * + */ template <> void ContextProjectionForward(CpuMatrix& out_mat, const CpuMatrix& input_mat, @@ -70,11 +74,29 @@ void ContextProjectionForward(CpuMatrix& out_mat, } /** - * \param outputs[0] output value. + * Paddle Function for Context Projection Forward. + * Calculate the value for the output layer with context projection. + * + * What is Context Projection? + * For example, assumed input (x) has 4 words and the dimension of each word + * representation is 2. If we use zero to pad instead of learned weight to pad, + * and the context_lenth is 3, the output (y) is: * - * \param inputs[0] input value. - * \param inputs[1] input weight. - * \param inputs[2] input sequence. + * @code + * x = [a1, a2; + * b1, b2; + * c1, c2; + * d1, d2] + * y = [0, 0, a1, a2, b1, b2; + * a1, a2, b1, b2, c1, c2; + * b1, b2, c1, c2, d1, d2; + * c1, c2, d1, d2, 0, 0] + * @endcode + * + * \param outputs[0] output value. + * \param inputs[0] input value. + * \param inputs[1] input weight. + * \param inputs[2] input sequence. */ template class ContextProjectionForwardFunc : public FunctionBase { @@ -123,6 +145,10 @@ private: size_t begin_pad_; }; +/** + * Context Projection Backward with CPU Matrix Device. + * + */ template <> <<<<<<< HEAD void ContextProjectionBackward(const CpuMatrix& out_grad_mat, @@ -178,10 +204,13 @@ void ContextProjectionBackward(const CpuMatrix& out_grad_mat, } /** - * \param inputs[0] input sequence. - * \param inputs[1] output grad. - * \param inouts[0] input grad. - * \param inouts[1] weight grad. + * Context Projection Backward Function. + * Update the weight gradient and input layer gradient with backprop + * + * \param inputs[0] input sequence. + * \param inputs[1] output grad. + * \param inouts[0] input grad. + * \param inouts[1] weight grad. */ template class ContextProjectionBackwardFunc : public FunctionBase { @@ -194,7 +223,6 @@ public: total_pad_ = config.get("total_pad"); } -<<<<<<< HEAD void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ((size_t)3, inputs.size()); CHECK_EQ((size_t)1, outputs.size()); @@ -213,42 +241,6 @@ public: CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); CHECK_EQ(outputs[0].getArgType(), ADD_TO); -======= - void calc(const Arguments& inputs, - const Arguments& outputs, - const Arguments& inouts) override { - CHECK_EQ(2, inputs.size()); - CHECK_EQ(0, outputs.size()); - CHECK_EQ(2, inouts.size()); - - CHECK(inputs[0].getData() && inputs[1].getData()); - CHECK_EQ(inputs[0].dims_.size(), 1); - CHECK_EQ(inputs[1].dims_.size(), 2); - CHECK_EQ(inouts[0].dims_.size(), 2); - CHECK_EQ(inouts[1].dims_.size(), 2); - - /// dim of input grad == dim of weight grad - CHECK_EQ(inouts[0].dims_[1], inouts[1].dims_[1]); - /// input grad and output grad have the same batch_size - CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]); - /// dim of output = dim of input * context_length - CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_); - - typename SequenceT::type seq_vec( - inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); - const auto out_grad_mat = std::make_shared::type>( - inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); - auto in_grad_mat = - !inouts[0].getData() - ? nullptr - : std::make_shared::type>( - inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]); - auto w_grad_mat = - !inouts[1].getData() - ? nullptr - : std::make_shared::type>( - inouts[1].getData(), inouts[1].dims_[0], inouts[1].dims_[1]); ->>>>>>> Wei Xu's comments, set up right inouts. auto out_grad_mat = outputs[0].matrix(); auto in_grad_mat = @@ -279,6 +271,9 @@ private: #if 0 /** + * Context Projection Backward Data Function. + * Update gradient of the input layer with backprop. + * * \param inouts[0] input grad. * \param inputs[0] input sequence. * \param inputs[1] output grad. @@ -326,6 +321,9 @@ private: }; /** + * Context Projection Backward Weight Function. + * Update weight gradient with backprop. + * * \param inouts[0] weight grad. * \param inputs[0] input sequence. * \param inputs[1] output grad. From 23ac0b78cb472e2f5007531427e142d553831e91 Mon Sep 17 00:00:00 2001 From: xutianbing Date: Tue, 10 Jan 2017 16:13:41 -0800 Subject: [PATCH 09/37] merge Daoyuan's FuncArgs, pass the ContextProjection test. --- paddle/function/CMakeLists.txt | 2 +- paddle/function/ContextProjectionOp.cpp | 181 ++++---------------- paddle/function/ContextProjectionOp.h | 2 +- paddle/function/ContextProjectionOpGpu.cu | 1 - paddle/function/ContextProjectionOpTest.cpp | 75 ++++---- paddle/function/FunctionTest.h | 72 ++------ paddle/gserver/layers/ContextProjection.cpp | 15 +- 7 files changed, 101 insertions(+), 247 deletions(-) diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 75a2acc55e..39733479cc 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -24,7 +24,7 @@ if(WITH_TESTING) add_simple_unittest(TensorTypeTest) add_simple_unittest(BufferArgTest) add_simple_unittest(FunctionTest) - # add_simple_unittest(ContextProjectionOpTest) + add_simple_unittest(ContextProjectionOpTest) endif() endif() diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 75c09108b1..42b78eacfd 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -125,11 +125,11 @@ public: CHECK_EQ(outputs[0].getArgType(), ADD_TO); auto out_mat = outputs[0].matrix(); - auto in_mat = inputs[0].matrix(); - auto w_mat = !inputs[1].data() - ? typename Tensor::Matrix(nullptr, 0, 0) - : inputs[1].matrix(); - auto seq_vec = inputs[2].vector(); + const auto in_mat = inputs[0].matrix(); + const auto w_mat = + !inputs[1].data() ? typename Tensor::Matrix(nullptr, 0, 0) + : inputs[1].matrix(); + const auto seq_vec = inputs[2].vector(); ContextProjectionForward(out_mat, in_mat, w_mat, @@ -150,7 +150,6 @@ private: * */ template <> -<<<<<<< HEAD void ContextProjectionBackward(const CpuMatrix& out_grad_mat, CpuMatrix& in_grad_mat, CpuMatrix& w_grad_mat, @@ -174,7 +173,8 @@ void ContextProjectionBackward(const CpuMatrix& out_grad_mat, int64_t pad_size = std::min(starts[i] - begin, starts[i + 1] - starts[i]); if (is_padding && w_grad_mat) { - MatrixPtr mat = out_grad_mat.subMatrix(starts[i], pad_size); + MatrixPtr mat = const_cast(out_grad_mat) + .subMatrix(starts[i], pad_size); MatrixPtr sub = w_grad_mat.subMatrix(j, pad_size); sub->addAtOffset(*mat, j * input_dim); } @@ -185,8 +185,8 @@ void ContextProjectionBackward(const CpuMatrix& out_grad_mat, int64_t pad_size = std::min(end - starts[i + 1], starts[i + 1] - starts[i]); if (is_padding && w_grad_mat) { - MatrixPtr mat = - out_grad_mat.subMatrix(starts[i + 1] - pad_size, pad_size); + MatrixPtr mat = const_cast(out_grad_mat) + .subMatrix(starts[i + 1] - pad_size, pad_size); MatrixPtr sub = w_grad_mat.subMatrix( begin_pad + context_start + j - pad_size, pad_size); sub->addAtOffset(*mat, j * input_dim); @@ -197,7 +197,8 @@ void ContextProjectionBackward(const CpuMatrix& out_grad_mat, if (end <= begin) continue; if (!in_grad_mat) continue; MatrixPtr src = in_grad_mat.subMatrix(begin, end - begin); - MatrixPtr dst = out_grad_mat.subMatrix(dst_begin, dst_end - dst_begin); + MatrixPtr dst = const_cast(out_grad_mat) + .subMatrix(dst_begin, dst_end - dst_begin); src->addAtOffset(*dst, j * input_dim); } } @@ -207,10 +208,10 @@ void ContextProjectionBackward(const CpuMatrix& out_grad_mat, * Context Projection Backward Function. * Update the weight gradient and input layer gradient with backprop * - * \param inputs[0] input sequence. - * \param inputs[1] output grad. - * \param inouts[0] input grad. - * \param inouts[1] weight grad. + * \param inputs[0] input sequence. + * \param inputs[1] output layer grad. + * \param outputs[0] input layer grad. + * \param outputs[1] weight grad. */ template class ContextProjectionBackwardFunc : public FunctionBase { @@ -224,32 +225,34 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ((size_t)3, inputs.size()); - CHECK_EQ((size_t)1, outputs.size()); + CHECK_EQ((size_t)2, inputs.size()); + CHECK_EQ((size_t)2, outputs.size()); - CHECK(outputs[0].data() && inputs[2].data()); - CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); - CHECK_EQ(inputs[0].shape().ndims(), (size_t)2); + CHECK(inputs[0].data() && inputs[1].data()); + CHECK_EQ(inputs[0].shape().ndims(), (size_t)1); CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); - CHECK_EQ(inputs[2].shape().ndims(), (size_t)1); + CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); + CHECK_EQ(outputs[1].shape().ndims(), (size_t)2); - /// dim of input == dim of weight - CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]); - /// input and output has the same batch_size - CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]); - /// dim of output = dim of input * context_length - CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); + /// dim of input grad == dim of weight + CHECK_EQ(outputs[0].shape()[1], outputs[1].shape()[1]); + /// input and output grad has the same batch_size + CHECK_EQ(outputs[0].shape()[0], inputs[1].shape()[0]); + /// dim of output val = dim of input grad * context_length + CHECK_EQ(inputs[1].shape()[1], outputs[0].shape()[1] * context_length_); CHECK_EQ(outputs[0].getArgType(), ADD_TO); + CHECK_EQ(outputs[1].getArgType(), ADD_TO); - auto out_grad_mat = outputs[0].matrix(); + const auto seq_vec = inputs[0].vector(); + const auto out_grad_mat = inputs[1].matrix(); auto in_grad_mat = - !inputs[0].data() ? typename Tensor::Matrix(nullptr, 0, 0) - : inputs[0].matrix(); - auto w_grad_mat = !inputs[1].data() + !outputs[0].data() + ? typename Tensor::Matrix(nullptr, 0, 0) + : outputs[0].matrix(); + auto w_grad_mat = !outputs[1].data() ? typename Tensor::Matrix(nullptr, 0, 0) - : inputs[1].matrix(); - auto seq_vec = inputs[2].vector(); + : outputs[1].matrix(); ContextProjectionBackward(out_grad_mat, in_grad_mat, w_grad_mat, @@ -269,112 +272,6 @@ private: size_t total_pad_; }; -#if 0 -/** - * Context Projection Backward Data Function. - * Update gradient of the input layer with backprop. - * - * \param inouts[0] input grad. - * \param inputs[0] input sequence. - * \param inputs[1] output grad. - */ -template -class ContextProjectionBackwardDataFunc : public FunctionBase { -public: - void init(const FuncConfig& config) override { - context_length_ = config.get("context_length"); - context_start_ = config.get("context_start"); - } - - void calc(const Arguments& inputs, - const Arguments& outputs, - const Arguments& inouts) override { - CHECK_EQ(2, inputs.size()); - CHECK_EQ(0, outputs.size()); - CHECK_EQ(1, inouts.size()); - - CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData()); - CHECK_EQ(inputs[0].dims_.size(), 1); - CHECK_EQ(inputs[1].dims_.size(), 2); - CHECK_EQ(inouts[0].dims_.size(), 2); - CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_); - /// input and output grad have the same batch_size - CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]); - - typename SequenceT::type seq_vec( - inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); - const auto out_grad_mat = std::make_shared::type>( - inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); - auto in_grad_mat = std::make_shared::type>( - inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]); - - ContextProjectionBackwardData(out_grad_mat.get(), - in_grad_mat.get(), - seq_vec, - context_length_, - context_start_); - } - -private: - size_t context_length_; - int context_start_; -}; - -/** - * Context Projection Backward Weight Function. - * Update weight gradient with backprop. - * - * \param inouts[0] weight grad. - * \param inputs[0] input sequence. - * \param inputs[1] output grad. - */ -template -class ContextProjectionBackwardWeightFunc : public FunctionBase { -public: - void init(const FuncConfig& config) override { - context_length_ = config.get("context_length"); - context_start_ = config.get("context_start"); - begin_pad_ = config.get("begin_pad"); - total_pad_ = config.get("total_pad"); - } - - void calc(const Arguments& inputs, - const Arguments& outputs, - const Arguments& inouts) override { - CHECK_EQ(2, inputs.size()); - CHECK_EQ(0, outputs.size()); - CHECK_EQ(1, inouts.size()); - - CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData()); - CHECK_EQ(inputs[0].dims_.size(), 1); - CHECK_EQ(inputs[1].dims_.size(), 2); - CHECK_EQ(inouts[0].dims_.size(), 2); - CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_); - - typename SequenceT::type seq_vec( - inputs[0].dims_[0], reinterpret_cast(inputs[0].getData())); - const auto out_grad_mat = std::make_shared::type>( - inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); - auto w_grad_mat = std::make_shared::type>( - inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]); - - ContextProjectionBackwardWeight(out_grad_mat.get(), - w_grad_mat.get(), - seq_vec, - context_length_, - context_start_, - total_pad_, - begin_pad_); - } - -private: - size_t context_length_; - int context_start_; - size_t begin_pad_; - size_t total_pad_; -}; -#endif - REGISTER_TYPED_FUNC(ContextProjectionForward, CPU, ContextProjectionForwardFunc); @@ -388,13 +285,5 @@ REGISTER_TYPED_FUNC(ContextProjectionForward, REGISTER_TYPED_FUNC(ContextProjectionBackward, GPU, ContextProjectionBackwardFunc); -#if 0 -REGISTER_TYPED_FUNC(ContextProjectionBackwardData, - GPU, - ContextProjectionBackwardDataFunc); -REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight, - GPU, - ContextProjectionBackwardWeightFunc); -#endif #endif } // namespace paddle diff --git a/paddle/function/ContextProjectionOp.h b/paddle/function/ContextProjectionOp.h index 8e956c6c6f..2bdd47e4e9 100644 --- a/paddle/function/ContextProjectionOp.h +++ b/paddle/function/ContextProjectionOp.h @@ -56,7 +56,7 @@ void ContextProjectionForward( */ template void ContextProjectionBackward( - typename Tensor::Matrix& out_grad, + const typename Tensor::Matrix& out_grad, typename Tensor::Matrix& in_grad, typename Tensor::Matrix& w_grad, const typename Tensor::Vector& seq_vec, diff --git a/paddle/function/ContextProjectionOpGpu.cu b/paddle/function/ContextProjectionOpGpu.cu index c5a636dce8..1a5b404240 100644 --- a/paddle/function/ContextProjectionOpGpu.cu +++ b/paddle/function/ContextProjectionOpGpu.cu @@ -217,7 +217,6 @@ void hl_context_projection_backward_data(const real* out_grad, } template <> -<<<<<<< HEAD void ContextProjectionBackwardData(const GpuMatrix& out_grad, GpuMatrix& in_grad, const GpuIVector& sequence, diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp index 169c1dd505..c8d5b4f278 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -56,24 +56,25 @@ void testMatrixProjectionForward(int context_start, cpu_out.randomizeUniform(); gpu_out.copyFrom(cpu_out); - compare.getCpuFunction()->calc( - {Tensor(cpu_in.getData(), Dims{batch_size, input_dim}), - Tensor(cpu_weight ? cpu_weight->getData() : nullptr, - Dims{pad, input_dim}), - Tensor(reinterpret_cast(cpu_seq->getData()), - Dims{cpu_seq->getSize()})}, - {}, - {Tensor(cpu_out.getData(), - Dims{batch_size, input_dim * context_length})}); - compare.getGpuFunction()->calc( - {Tensor(gpu_in.getData(), Dims{batch_size, input_dim}), - Tensor(gpu_weight ? gpu_weight->getData() : nullptr, - Dims{pad, input_dim}), - Tensor(reinterpret_cast(gpu_seq->getData()), - Dims{gpu_seq->getSize()})}, - {}, - {Tensor(gpu_out.getData(), - Dims{batch_size, input_dim * context_length})}); + BufferArgs cpu_inputs; + BufferArgs cpu_outputs; + cpu_inputs.addArg(cpu_in); + cpu_inputs.addArg(cpu_weight ? *cpu_weight + : CpuMatrix(nullptr, 0, input_dim)); + cpu_inputs.addArg(*cpu_seq); + cpu_outputs.addArg(cpu_out, ADD_TO); + + compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs); + + BufferArgs gpu_inputs; + BufferArgs gpu_outputs; + gpu_inputs.addArg(gpu_in); + gpu_inputs.addArg(gpu_weight ? *gpu_weight + : GpuMatrix(nullptr, 0, input_dim)); + gpu_inputs.addArg(*gpu_seq); + gpu_outputs.addArg(gpu_out, ADD_TO); + + compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs); autotest::TensorCheckEqual(cpu_out, gpu_out); } @@ -119,25 +120,25 @@ void testMatrixProjectionBackward(int context_start, gpu_w_grad->copyFrom(*cpu_w_grad); } - compare.getCpuFunction()->calc( - {Tensor(reinterpret_cast(cpu_seq->getData()), - Dims{cpu_seq->getSize()}), - Tensor(cpu_out_grad.getData(), - Dims{batch_size, input_dim * context_length})}, - {}, - {Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}), - Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr, - Dims{pad, input_dim})}); - - compare.getGpuFunction()->calc( - {Tensor(reinterpret_cast(gpu_seq->getData()), - Dims{gpu_seq->getSize()}), - Tensor(gpu_out_grad.getData(), - Dims{batch_size, input_dim * context_length})}, - {}, - {Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}), - Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr, - Dims{pad, input_dim})}); + BufferArgs cpu_inputs; + BufferArgs cpu_outputs; + cpu_inputs.addArg(*cpu_seq); + cpu_inputs.addArg(cpu_out_grad); + cpu_outputs.addArg(cpu_in_grad, ADD_TO); + cpu_outputs.addArg( + cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO); + + compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs); + + BufferArgs gpu_inputs; + BufferArgs gpu_outputs; + gpu_inputs.addArg(*gpu_seq); + gpu_inputs.addArg(gpu_out_grad); + gpu_outputs.addArg(gpu_in_grad, ADD_TO); + gpu_outputs.addArg( + gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO); + + compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs); autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad); if (is_padding) { diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h index 32131037f6..da4c0f4f07 100644 --- a/paddle/function/FunctionTest.h +++ b/paddle/function/FunctionTest.h @@ -27,66 +27,28 @@ public: gpu->init(config); } - void cmpWithArg(const Arguments& inputs, - const Arguments& outputs, - const Arguments& inouts) { + void cmpWithArg(const BufferArgs& inputs, + const BufferArgs& outputs, + const BufferArgs& inouts) { // init cpu and gpu arguments auto initArgs = [=]( - Arguments& cpuArgs, Arguments& gpuArgs, const Arguments& inArgs) { - for (const auto arg : inArgs) { - size_t size = sizeof(real); - for (const auto dim : arg.dims_) { - size *= dim; - } - if (arg.getData()) { - // todo(tianbing), waste unnecessary mem here - cpuMemory.emplace_back(std::make_shared(size)); - gpuMemory.emplace_back(std::make_shared(size)); - cpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_)); - gpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_)); - // already init outside - } else { - cpuMemory.emplace_back(std::make_shared(size)); - gpuMemory.emplace_back(std::make_shared(size)); - cpuArgs.emplace_back( - Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_)); - gpuArgs.emplace_back( - Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_)); - // will use an api to refactor this code. - CpuVector cpuVector(size / sizeof(real), - (real*)cpuArgs.back().getData()); - GpuVector gpuVector(size / sizeof(real), - (real*)gpuArgs.back().getData()); - cpuVector.uniform(0.001, 1); - gpuVector.copyFrom(cpuVector); - } - } + BufferArgs& cpuArgs, BufferArgs& gpuArgs, const BufferArgs& inArgs) { + /// leave it empty to pass the compile of ContextProjectionTest + /// Daoyuan is working on FunctionTest + /// and I will further merge with it }; initArgs(cpuInputs, gpuInputs, inputs); initArgs(cpuOutputs, gpuOutputs, outputs); - initArgs(cpuInouts, gpuInouts, inouts); // function calculate - cpu->calc(cpuInputs, cpuOutputs, cpuInouts); - gpu->calc(gpuInputs, gpuOutputs, gpuInouts); + cpu->calc(cpuInputs, cpuOutputs); + gpu->calc(gpuInputs, gpuOutputs); // check outputs and inouts - auto checkArgs = [=](const Arguments& cpuArgs, const Arguments& gpuArgs) { - for (size_t i = 0; i < cpuArgs.size(); i++) { - auto cpu = cpuArgs[i]; - auto gpu = gpuArgs[i]; - size_t size = 1; - for (auto dim : cpu.dims_) { - size *= dim; - } - CpuVector cpuVector(size, (real*)cpu.getData()); - GpuVector gpuVector(size, (real*)gpu.getData()); - - autotest::TensorCheckErr(cpuVector, gpuVector); - } + auto checkArgs = [=](const BufferArgs& cpuArgs, const BufferArgs& gpuArgs) { + /// leave it open }; checkArgs(cpuOutputs, gpuOutputs); - checkArgs(cpuInouts, gpuInouts); } std::shared_ptr getCpuFunction() const { return cpu; } @@ -98,12 +60,12 @@ protected: std::shared_ptr gpu; std::vector cpuMemory; std::vector gpuMemory; - Arguments cpuInputs; - Arguments cpuOutputs; - Arguments cpuInouts; - Arguments gpuInputs; - Arguments gpuOutputs; - Arguments gpuInouts; + BufferArgs cpuInputs; + BufferArgs cpuOutputs; + BufferArgs cpuInouts; + BufferArgs gpuInputs; + BufferArgs gpuOutputs; + BufferArgs gpuInouts; }; } // namespace paddle diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp index ebcc87cbf4..def7c15ca8 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/gserver/layers/ContextProjection.cpp @@ -166,13 +166,16 @@ void ContextProjection::backward(const UpdateCallback& callback) { BufferArgs inputs; BufferArgs outputs; - inputs.addArg(CpuMatrix( - in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim)); - inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, - w_ptr ? w_ptr->getHeight() : 0, - input_dim)); inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); - outputs.addArg(*out_->grad, ADD_TO); + inputs.addArg(*out_->grad); + outputs.addArg( + CpuMatrix( + in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim), + ADD_TO); + outputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, + w_ptr ? w_ptr->getHeight() : 0, + input_dim), + ADD_TO); backward_[0]->calc(inputs, outputs); if (config_.trainable_padding()) { From 9edfd2003153dd122b01300c3973249be1abd8c1 Mon Sep 17 00:00:00 2001 From: xutianbing Date: Tue, 10 Jan 2017 20:10:16 -0800 Subject: [PATCH 10/37] use Daoyuan's SequenceArg. --- paddle/function/BufferArg.h | 3 ++- paddle/function/ContextProjectionOp.cpp | 26 ++++++++++----------- paddle/function/ContextProjectionOpTest.cpp | 6 ++--- paddle/function/Function.cpp | 6 +++++ paddle/function/Function.h | 4 ++++ paddle/gserver/layers/ContextProjection.cpp | 3 +-- 6 files changed, 28 insertions(+), 20 deletions(-) diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 12352ba29e..f3a4350e12 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -190,7 +190,7 @@ private: size_t numSeqs_; }; -// sequence data +// sequence data {seqId(vec), buf(matrix)} class SequenceArg : public BufferArg { public: SequenceArg(void* buf, @@ -210,6 +210,7 @@ public: void* getIdBuf() const { return startPositions_.data(); } size_t numSeqs() const { return startPositions_.numSeqs(); } + const SequenceIdArg& getSequenceIds() const { return startPositions_; } private: SequenceIdArg startPositions_; diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 42b78eacfd..177708d00f 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -17,7 +17,6 @@ limitations under the License. */ #include "paddle/math/Vector.h" namespace paddle { - /** * Context Projection Forward with CPU Matrix Device. * @@ -208,10 +207,10 @@ void ContextProjectionBackward(const CpuMatrix& out_grad_mat, * Context Projection Backward Function. * Update the weight gradient and input layer gradient with backprop * - * \param inputs[0] input sequence. - * \param inputs[1] output layer grad. - * \param outputs[0] input layer grad. - * \param outputs[1] weight grad. + * \param inputs[0].seq input sequence. + * \param inputs[0].matrix output layer grad. + * \param outputs[0] input layer grad. + * \param outputs[1] weight grad. */ template class ContextProjectionBackwardFunc : public FunctionBase { @@ -225,27 +224,28 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ((size_t)2, inputs.size()); + CHECK_EQ((size_t)1, inputs.size()); CHECK_EQ((size_t)2, outputs.size()); - CHECK(inputs[0].data() && inputs[1].data()); - CHECK_EQ(inputs[0].shape().ndims(), (size_t)1); - CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); + const auto seqArg = dynamic_cast(inputs[0]); + CHECK(seqArg.data() && inputs[0].data()); + CHECK_EQ(seqArg.shape().ndims(), (size_t)2); + CHECK_EQ(seqArg.getSequenceIds().shape().ndims(), (size_t)1); CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); CHECK_EQ(outputs[1].shape().ndims(), (size_t)2); /// dim of input grad == dim of weight CHECK_EQ(outputs[0].shape()[1], outputs[1].shape()[1]); /// input and output grad has the same batch_size - CHECK_EQ(outputs[0].shape()[0], inputs[1].shape()[0]); + CHECK_EQ(outputs[0].shape()[0], seqArg.shape()[0]); /// dim of output val = dim of input grad * context_length - CHECK_EQ(inputs[1].shape()[1], outputs[0].shape()[1] * context_length_); + CHECK_EQ(seqArg.shape()[1], outputs[0].shape()[1] * context_length_); CHECK_EQ(outputs[0].getArgType(), ADD_TO); CHECK_EQ(outputs[1].getArgType(), ADD_TO); - const auto seq_vec = inputs[0].vector(); - const auto out_grad_mat = inputs[1].matrix(); + const auto seq_vec = seqArg.getSequenceIds().vector(); + const auto out_grad_mat = seqArg.matrix(); auto in_grad_mat = !outputs[0].data() ? typename Tensor::Matrix(nullptr, 0, 0) diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp index c8d5b4f278..50ca204005 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -122,8 +122,7 @@ void testMatrixProjectionBackward(int context_start, BufferArgs cpu_inputs; BufferArgs cpu_outputs; - cpu_inputs.addArg(*cpu_seq); - cpu_inputs.addArg(cpu_out_grad); + cpu_inputs.addArg(cpu_out_grad, *cpu_seq); cpu_outputs.addArg(cpu_in_grad, ADD_TO); cpu_outputs.addArg( cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO); @@ -132,8 +131,7 @@ void testMatrixProjectionBackward(int context_start, BufferArgs gpu_inputs; BufferArgs gpu_outputs; - gpu_inputs.addArg(*gpu_seq); - gpu_inputs.addArg(gpu_out_grad); + gpu_inputs.addArg(gpu_out_grad, *gpu_seq); gpu_outputs.addArg(gpu_in_grad, ADD_TO); gpu_outputs.addArg( gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO); diff --git a/paddle/function/Function.cpp b/paddle/function/Function.cpp index dbe3a4e9f6..3b65908465 100644 --- a/paddle/function/Function.cpp +++ b/paddle/function/Function.cpp @@ -90,6 +90,12 @@ void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) { args_.push_back(std::make_shared(arg, argType)); } +void BufferArgs::addArg(const Matrix& matrix, + const IVector& vector, + ArgType argType) { + args_.push_back(std::make_shared(matrix, vector, argType)); +} + ClassRegistrar FunctionBase::funcRegistrar_; } // namespace paddle diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 249f8f9cfa..c15045143b 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -77,6 +77,10 @@ public: void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); + void addArg(const Matrix& matrix, + const IVector& vector, + ArgType argType = UNSPECIFIED); + // get argument const BufferArg& operator[](size_t num) const { CHECK_LT(num, args_.size()); diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp index def7c15ca8..17fd36ef56 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/gserver/layers/ContextProjection.cpp @@ -166,8 +166,7 @@ void ContextProjection::backward(const UpdateCallback& callback) { BufferArgs inputs; BufferArgs outputs; - inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); - inputs.addArg(*out_->grad); + inputs.addArg(*out_->grad, *in_->sequenceStartPositions->getVector(useGpu_)); outputs.addArg( CpuMatrix( in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim), From 8560ce69ff8ed6d201e0c31d561aaa6aab7cb5b2 Mon Sep 17 00:00:00 2001 From: xutianbing Date: Wed, 11 Jan 2017 16:55:32 -0800 Subject: [PATCH 11/37] Daoyuan's comments about SequenceArg. --- paddle/function/ContextProjectionOp.cpp | 163 ++++++++++++++++---- paddle/function/ContextProjectionOpTest.cpp | 20 +-- paddle/gserver/layers/ContextProjection.cpp | 15 +- 3 files changed, 151 insertions(+), 47 deletions(-) diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 177708d00f..ec697a381f 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -74,7 +74,7 @@ void ContextProjectionForward(CpuMatrix& out_mat, /** * Paddle Function for Context Projection Forward. - * Calculate the value for the output layer with context projection. + * Calculate the output sequence after context projection. * * What is Context Projection? * For example, assumed input (x) has 4 words and the dimension of each word @@ -92,10 +92,12 @@ void ContextProjectionForward(CpuMatrix& out_mat, * c1, c2, d1, d2, 0, 0] * @endcode * - * \param outputs[0] output value. - * \param inputs[0] input value. - * \param inputs[1] input weight. - * \param inputs[2] input sequence. + * \param outputs[0].matrix output value, n * (d * l) + * \param outputs[0].vector input sequence, n * 1 + * \param inputs[0].matrix input value, n * d + * \param inputs[0].vector input sequence, n * 1 + * \param inputs[1].matrix input weight, pad * d + * \param inputs[1].vector input sequence, n * 1 */ template class ContextProjectionForwardFunc : public FunctionBase { @@ -107,28 +109,40 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ((size_t)3, inputs.size()); + CHECK(1 == inputs.size() || 2 == inputs.size()); CHECK_EQ((size_t)1, outputs.size()); - CHECK(outputs[0].data() && inputs[0].data() && inputs[2].data()); - CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); - CHECK_EQ(inputs[0].shape().ndims(), (size_t)2); - CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); - CHECK_EQ(inputs[2].shape().ndims(), (size_t)1); + const auto val_seqs = dynamic_cast(inputs[0]); + const auto w_seqs = inputs.size() <= 1 + ? nullptr + : dynamic_cast(&inputs[1]); + auto out_seqs = dynamic_cast(outputs[0]); + + CHECK(out_seqs.data() && val_seqs.data() && + val_seqs.getSequenceIds().data()); + CHECK_EQ(out_seqs.shape().ndims(), (size_t)2); + CHECK_EQ(val_seqs.shape().ndims(), (size_t)2); + CHECK_EQ(val_seqs.getSequenceIds().shape().ndims(), (size_t)1); + if (w_seqs) { + CHECK_EQ(w_seqs->shape().ndims(), (size_t)2); + CHECK_EQ(w_seqs->getSequenceIds().shape().ndims(), (size_t)1); + } /// dim of output = dim of input * context_length - CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); - /// dim of input == dim of weight - CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]); + CHECK_EQ(out_seqs.shape()[1], val_seqs.shape()[1] * context_length_); /// input and output has the same batch_size - CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]); + CHECK_EQ(val_seqs.shape()[0], out_seqs.shape()[0]); + /// dim of input == dim of weight + if (w_seqs) { + CHECK_EQ(val_seqs.shape()[1], w_seqs->shape()[1]); + } - CHECK_EQ(outputs[0].getArgType(), ADD_TO); - auto out_mat = outputs[0].matrix(); - const auto in_mat = inputs[0].matrix(); + CHECK_EQ(out_seqs.getArgType(), ADD_TO); + auto out_mat = out_seqs.matrix(); + const auto in_mat = val_seqs.matrix(); const auto w_mat = - !inputs[1].data() ? typename Tensor::Matrix(nullptr, 0, 0) - : inputs[1].matrix(); - const auto seq_vec = inputs[2].vector(); + w_seqs ? w_seqs->matrix() + : typename Tensor::Matrix(nullptr, 0, 0); + const auto seq_vec = val_seqs.getSequenceIds().vector(); ContextProjectionForward(out_mat, in_mat, w_mat, @@ -227,25 +241,25 @@ public: CHECK_EQ((size_t)1, inputs.size()); CHECK_EQ((size_t)2, outputs.size()); - const auto seqArg = dynamic_cast(inputs[0]); - CHECK(seqArg.data() && inputs[0].data()); - CHECK_EQ(seqArg.shape().ndims(), (size_t)2); - CHECK_EQ(seqArg.getSequenceIds().shape().ndims(), (size_t)1); + const auto seq_arg = dynamic_cast(inputs[0]); + CHECK(seq_arg.data() && inputs[0].data()); + CHECK_EQ(seq_arg.shape().ndims(), (size_t)2); + CHECK_EQ(seq_arg.getSequenceIds().shape().ndims(), (size_t)1); CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); CHECK_EQ(outputs[1].shape().ndims(), (size_t)2); /// dim of input grad == dim of weight CHECK_EQ(outputs[0].shape()[1], outputs[1].shape()[1]); /// input and output grad has the same batch_size - CHECK_EQ(outputs[0].shape()[0], seqArg.shape()[0]); + CHECK_EQ(outputs[0].shape()[0], seq_arg.shape()[0]); /// dim of output val = dim of input grad * context_length - CHECK_EQ(seqArg.shape()[1], outputs[0].shape()[1] * context_length_); + CHECK_EQ(seq_arg.shape()[1], outputs[0].shape()[1] * context_length_); CHECK_EQ(outputs[0].getArgType(), ADD_TO); CHECK_EQ(outputs[1].getArgType(), ADD_TO); - const auto seq_vec = seqArg.getSequenceIds().vector(); - const auto out_grad_mat = seqArg.matrix(); + const auto seq_vec = seq_arg.getSequenceIds().vector(); + const auto out_grad_mat = seq_arg.matrix(); auto in_grad_mat = !outputs[0].data() ? typename Tensor::Matrix(nullptr, 0, 0) @@ -272,6 +286,91 @@ private: size_t total_pad_; }; +/** + * \param inputs[0].matrix input grad, n*d + * \param inputs[0].vector input sequence, n*1 + * \param outputs[0] output grad, n*(d*l) + */ +template +class ContextProjectionBackwardDataFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + context_length_ = config.get("context_length"); + context_start_ = config.get("context_start"); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1, static_cast(inputs.size())); + CHECK_EQ(1, static_cast(outputs.size())); + const auto in_seqs = dynamic_cast(inputs[0]); + CHECK(in_seqs.data() && outputs[0].data() && + in_seqs.getSequenceIds().data()); + CHECK_EQ(static_cast(outputs[0].shape().ndims()), 2); + CHECK_EQ(static_cast(in_seqs.shape().ndims()), 2); + CHECK_EQ(static_cast(in_seqs.getSequenceIds().shape().ndims()), 1); + CHECK_EQ(outputs[0].shape().ndims(), + in_seqs.shape().ndims() * context_length_); + /// input and output has the same batch_size + CHECK_EQ(in_seqs.shape()[0], outputs[0].shape()[0]); + const auto out_grad_mat = outputs[0].matrix(); + auto in_grad_mat = in_seqs.matrix(); + const auto seq_vec = in_seqs.getSequenceIds().vector(); + + ContextProjectionBackwardData( + out_grad_mat, in_grad_mat, seq_vec, context_length_, context_start_); + } + +private: + size_t context_length_; + int context_start_; +}; + +/** + * \param inputs[0].matrix weight grad, pad * d + * \param inputs[0].vecotr input sequence, n * 1 + * \param outputs[0] output grad, n * (d * l) + */ +template +class ContextProjectionBackwardWeightFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + context_length_ = config.get("context_length"); + context_start_ = config.get("context_start"); + begin_pad_ = config.get("begin_pad"); + total_pad_ = config.get("total_pad"); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1, static_cast(inputs.size())); + CHECK_EQ(1, static_cast(outputs.size())); + + const auto in_seqs = dynamic_cast(inputs[0]); + CHECK(in_seqs.data() && in_seqs.getSequenceIds().data() && + outputs[0].data()); + CHECK_EQ(static_cast(outputs[0].shape().ndims()), 2); + CHECK_EQ(static_cast(in_seqs.shape().ndims()), 2); + CHECK_EQ(static_cast(in_seqs.getSequenceIds().shape().ndims()), 1); + CHECK_EQ(in_seqs.shape()[0], outputs[0].shape()[0]); + CHECK_EQ(outputs[0].shape()[1], in_seqs.shape()[1] * context_length_); + const auto out_grad_mat = outputs[0].matrix(); + auto w_grad_mat = inputs[0].matrix(); + const auto seq_vec = in_seqs.getSequenceIds().vector(); + ContextProjectionBackwardWeight(out_grad_mat, + w_grad_mat, + seq_vec, + context_length_, + context_start_, + total_pad_, + begin_pad_); + } + +private: + size_t context_length_; + int context_start_; + size_t begin_pad_; + size_t total_pad_; +}; + REGISTER_TYPED_FUNC(ContextProjectionForward, CPU, ContextProjectionForwardFunc); @@ -285,5 +384,11 @@ REGISTER_TYPED_FUNC(ContextProjectionForward, REGISTER_TYPED_FUNC(ContextProjectionBackward, GPU, ContextProjectionBackwardFunc); +REGISTER_TYPED_FUNC(ContextProjectionBackwardData, + GPU, + ContextProjectionBackwardDataFunc); +REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight, + GPU, + ContextProjectionBackwardWeightFunc); #endif } // namespace paddle diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp index 50ca204005..bd0c06c5f6 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -58,21 +58,21 @@ void testMatrixProjectionForward(int context_start, BufferArgs cpu_inputs; BufferArgs cpu_outputs; - cpu_inputs.addArg(cpu_in); - cpu_inputs.addArg(cpu_weight ? *cpu_weight - : CpuMatrix(nullptr, 0, input_dim)); - cpu_inputs.addArg(*cpu_seq); - cpu_outputs.addArg(cpu_out, ADD_TO); + cpu_inputs.addArg(cpu_in, *cpu_seq); + if (cpu_weight) { + cpu_inputs.addArg(*cpu_weight, *cpu_seq); + } + cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO); compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs); BufferArgs gpu_inputs; BufferArgs gpu_outputs; - gpu_inputs.addArg(gpu_in); - gpu_inputs.addArg(gpu_weight ? *gpu_weight - : GpuMatrix(nullptr, 0, input_dim)); - gpu_inputs.addArg(*gpu_seq); - gpu_outputs.addArg(gpu_out, ADD_TO); + gpu_inputs.addArg(gpu_in, *gpu_seq); + if (gpu_weight) { + gpu_inputs.addArg(*gpu_weight, *gpu_seq); + } + gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO); compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs); diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp index 17fd36ef56..edcef17ad4 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/gserver/layers/ContextProjection.cpp @@ -118,16 +118,15 @@ void ContextProjection::forward() { /// first use state_, otherwise use weight_(padding false === w nullptr) auto w_ptr = state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr; - auto start_pos = in_->sequenceStartPositions; - + const auto start_pos = in_->sequenceStartPositions->getVector(useGpu_); BufferArgs inputs; BufferArgs outputs; - inputs.addArg(*in_->value); - inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, - w_ptr ? w_ptr->getHeight() : 0, - input_dim)); - inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); - outputs.addArg(*out_->value, ADD_TO); + inputs.addArg(*in_->value, *start_pos); + if (w_ptr) { + inputs.addArg(CpuMatrix(w_ptr->getData(), w_ptr->getHeight(), input_dim), + *start_pos); + } + outputs.addArg(*out_->value, *start_pos, ADD_TO); forward_[0]->calc(inputs, outputs); if (state_ && config_.context_start() < 0) { From e9794214cbca438b1b467d614c6398ec09ab1d0b Mon Sep 17 00:00:00 2001 From: xutianbing Date: Thu, 12 Jan 2017 13:26:10 -0800 Subject: [PATCH 12/37] Address further comments. --- paddle/function/BufferArg.cpp | 12 +- paddle/function/BufferArg.h | 30 +++- paddle/function/ContextProjectionOp.cpp | 169 +++++++++++--------- paddle/function/ContextProjectionOpTest.cpp | 4 +- paddle/gserver/layers/ContextProjection.cpp | 1 + 5 files changed, 126 insertions(+), 90 deletions(-) diff --git a/paddle/function/BufferArg.cpp b/paddle/function/BufferArg.cpp index fde48a73b6..5d595deb12 100644 --- a/paddle/function/BufferArg.cpp +++ b/paddle/function/BufferArg.cpp @@ -20,23 +20,27 @@ limitations under the License. */ namespace paddle { const SequenceArg& BufferArg::sequence() const { - // CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA); + CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA); return dynamic_cast(*this); } const SparseMatrixArg& BufferArg::sparse() const { - // CHECK_EQ(bufferType_, TENSOR_SPARSE); + CHECK_EQ(bufferType_, TENSOR_SPARSE); return dynamic_cast(*this); } SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType) : BufferArg(sparse, argType), row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), - col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) {} + col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) { + bufferType_ = TENSOR_SPARSE; +} SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType) : BufferArg(sparse, argType), row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), - col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) {} + col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) { + bufferType_ = TENSOR_SPARSE; +} } // namespace paddle diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index f3a4350e12..440a924a7a 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -23,10 +23,11 @@ limitations under the License. */ namespace paddle { enum BufferType { - TENSOR_NORMAL = 0, - TENSOR_SEQUENCE_ID = 1, - TENSOR_SEQUENCE_DATA = 2, - TENSOR_SPARSE = 3 + TENSOR_UNKNOWN = 0, + TENSOR_NORMAL = 1, + TENSOR_SEQUENCE_ID = 2, + TENSOR_SEQUENCE_DATA = 3, + TENSOR_SPARSE = 4 }; enum SparseDataType { @@ -86,6 +87,7 @@ public: valueType_(DataType::value), shape_(2), argType_(argType) { + bufferType_ = TENSOR_NORMAL; shape_.setDim(0, matrix.getHeight()); shape_.setDim(1, matrix.getWidth()); } @@ -98,6 +100,7 @@ public: valueType_(DataType::value), shape_(shape), argType_(argType) { + bufferType_ = TENSOR_NORMAL; CHECK_EQ(matrix.getElementCnt(), shape.getElements()); } @@ -107,6 +110,7 @@ public: valueType_(DataType::value), shape_(1), argType_(argType) { + bufferType_ = TENSOR_NORMAL; shape_.setDim(0, vector.getSize()); } @@ -116,6 +120,7 @@ public: valueType_(VALUE_TYPE_INT32), shape_(1), argType_(argType) { + bufferType_ = TENSOR_NORMAL; shape_.setDim(0, vector.getSize()); } @@ -150,6 +155,8 @@ public: ValueType valueType() const { return valueType_; } BufferType bufferType() const { return bufferType_; } const TensorShape& shape() const { return shape_; } + bool isSparse() const { return (TENSOR_SPARSE == bufferType_); } + bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; } const SequenceArg& sequence() const; const SparseMatrixArg& sparse() const; @@ -158,8 +165,8 @@ protected: void* buf_; ValueType valueType_; TensorShape shape_; - BufferType bufferType_; - ArgType argType_ = UNSPECIFIED; + BufferType bufferType_{TENSOR_UNKNOWN}; + ArgType argType_{UNSPECIFIED}; // leading dimensions. The size is dims_.size() // Dims lds_; }; @@ -174,11 +181,13 @@ public: const TensorShape& shape, ArgType argType = UNSPECIFIED) : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) { + bufferType_ = TENSOR_SEQUENCE_ID; CHECK_EQ(shape_.ndims(), (size_t)1); numSeqs_ = shape_[0] - 1; } SequenceIdArg(const IVector& vector) : BufferArg(vector) { + bufferType_ = TENSOR_SEQUENCE_ID; numSeqs_ = shape_[0] - 1; } @@ -199,12 +208,16 @@ public: const SequenceIdArg& startPositions, ArgType argType = UNSPECIFIED) : BufferArg(buf, valueType, shape, argType), - startPositions_(startPositions) {} + startPositions_(startPositions) { + bufferType_ = TENSOR_SEQUENCE_DATA; + } SequenceArg(const Matrix& matrix, const IVector& vector, ArgType argType = UNSPECIFIED) - : BufferArg(matrix, argType), startPositions_(vector) {} + : BufferArg(matrix, argType), startPositions_(vector) { + bufferType_ = TENSOR_SEQUENCE_DATA; + } ~SequenceArg() {} @@ -236,6 +249,7 @@ public: nnz_(nnz), format_(format), type_(type) { + bufferType_ = TENSOR_SPARSE; CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE)); CHECK_EQ(shape_.ndims(), (size_t)2); CHECK_EQ(row_.shape().ndims(), (size_t)1); diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index ec697a381f..2ef53cd6d9 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -74,9 +74,9 @@ void ContextProjectionForward(CpuMatrix& out_mat, /** * Paddle Function for Context Projection Forward. - * Calculate the output sequence after context projection. + * Calculate the output layer value sequence after context projection. * - * What is Context Projection? + * What is Context Projection for a sequence? * For example, assumed input (x) has 4 words and the dimension of each word * representation is 2. If we use zero to pad instead of learned weight to pad, * and the context_lenth is 3, the output (y) is: @@ -92,12 +92,11 @@ void ContextProjectionForward(CpuMatrix& out_mat, * c1, c2, d1, d2, 0, 0] * @endcode * - * \param outputs[0].matrix output value, n * (d * l) - * \param outputs[0].vector input sequence, n * 1 - * \param inputs[0].matrix input value, n * d - * \param inputs[0].vector input sequence, n * 1 - * \param inputs[1].matrix input weight, pad * d - * \param inputs[1].vector input sequence, n * 1 + * \param outputs[0].matrix output layer value, n * (d * l) + * \param outputs[0].vector start position sequence, n * 1 + * \param inputs[0].matrix input layer value, n * d + * \param inputs[0].vector start position sequence, n * 1 + * \param inputs[1].matrix input layer weight, pad * d */ template class ContextProjectionForwardFunc : public FunctionBase { @@ -111,37 +110,35 @@ public: void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK(1 == inputs.size() || 2 == inputs.size()); CHECK_EQ((size_t)1, outputs.size()); - + CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) + << "SequenceArg required here"; const auto val_seqs = dynamic_cast(inputs[0]); - const auto w_seqs = inputs.size() <= 1 - ? nullptr - : dynamic_cast(&inputs[1]); - auto out_seqs = dynamic_cast(outputs[0]); + auto out_seq = dynamic_cast(outputs[0]); - CHECK(out_seqs.data() && val_seqs.data() && + CHECK(out_seq.data() && val_seqs.data() && val_seqs.getSequenceIds().data()); - CHECK_EQ(out_seqs.shape().ndims(), (size_t)2); + CHECK_EQ(out_seq.shape().ndims(), (size_t)2); CHECK_EQ(val_seqs.shape().ndims(), (size_t)2); CHECK_EQ(val_seqs.getSequenceIds().shape().ndims(), (size_t)1); - if (w_seqs) { - CHECK_EQ(w_seqs->shape().ndims(), (size_t)2); - CHECK_EQ(w_seqs->getSequenceIds().shape().ndims(), (size_t)1); + if (2 == inputs.size()) { + CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); } /// dim of output = dim of input * context_length - CHECK_EQ(out_seqs.shape()[1], val_seqs.shape()[1] * context_length_); + CHECK_EQ(out_seq.shape()[1], val_seqs.shape()[1] * context_length_); /// input and output has the same batch_size - CHECK_EQ(val_seqs.shape()[0], out_seqs.shape()[0]); + CHECK_EQ(val_seqs.shape()[0], out_seq.shape()[0]); /// dim of input == dim of weight - if (w_seqs) { - CHECK_EQ(val_seqs.shape()[1], w_seqs->shape()[1]); + if (2 == inputs.size()) { + CHECK_EQ(val_seqs.shape()[1], inputs[1].shape()[1]); } - CHECK_EQ(out_seqs.getArgType(), ADD_TO); - auto out_mat = out_seqs.matrix(); + CHECK_EQ(out_seq.getArgType(), ADD_TO); + auto out_mat = out_seq.matrix(); const auto in_mat = val_seqs.matrix(); const auto w_mat = - w_seqs ? w_seqs->matrix() - : typename Tensor::Matrix(nullptr, 0, 0); + (2 == inputs.size()) + ? inputs[1].matrix() + : typename Tensor::Matrix(nullptr, 0, 0); const auto seq_vec = val_seqs.getSequenceIds().vector(); ContextProjectionForward(out_mat, in_mat, @@ -221,10 +218,11 @@ void ContextProjectionBackward(const CpuMatrix& out_grad_mat, * Context Projection Backward Function. * Update the weight gradient and input layer gradient with backprop * - * \param inputs[0].seq input sequence. - * \param inputs[0].matrix output layer grad. - * \param outputs[0] input layer grad. - * \param outputs[1] weight grad. + * \param inputs[0].matrix output layer grad, n * (d * l) + * \param inputs[0].vector start position sequence, n * 1 + * \param outputs[0].matrix input layer grad, n * d + * \param outputs[0].vector start position sequence, n * 1 + * \param outputs[1] weight grad, pad * d */ template class ContextProjectionBackwardFunc : public FunctionBase { @@ -240,30 +238,31 @@ public: void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ((size_t)1, inputs.size()); CHECK_EQ((size_t)2, outputs.size()); - - const auto seq_arg = dynamic_cast(inputs[0]); - CHECK(seq_arg.data() && inputs[0].data()); - CHECK_EQ(seq_arg.shape().ndims(), (size_t)2); - CHECK_EQ(seq_arg.getSequenceIds().shape().ndims(), (size_t)1); - CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); + CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) + << "SequenceArg required here"; + const auto in_seq = dynamic_cast(inputs[0]); + auto out_seq = dynamic_cast(outputs[0]); + CHECK(in_seq.data() && in_seq.getSequenceIds().data()); + CHECK_EQ(in_seq.shape().ndims(), (size_t)2); + CHECK_EQ(in_seq.getSequenceIds().shape().ndims(), (size_t)1); + CHECK_EQ(out_seq.shape().ndims(), (size_t)2); + CHECK_EQ(out_seq.getSequenceIds().shape().ndims(), (size_t)1); CHECK_EQ(outputs[1].shape().ndims(), (size_t)2); /// dim of input grad == dim of weight - CHECK_EQ(outputs[0].shape()[1], outputs[1].shape()[1]); + CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]); /// input and output grad has the same batch_size - CHECK_EQ(outputs[0].shape()[0], seq_arg.shape()[0]); - /// dim of output val = dim of input grad * context_length - CHECK_EQ(seq_arg.shape()[1], outputs[0].shape()[1] * context_length_); - - CHECK_EQ(outputs[0].getArgType(), ADD_TO); + CHECK_EQ(out_seq.shape()[0], in_seq.shape()[0]); + /// dim of output grad = dim of input grad * context_length + CHECK_EQ(in_seq.shape()[1], out_seq.shape()[1] * context_length_); + CHECK_EQ(out_seq.getArgType(), ADD_TO); CHECK_EQ(outputs[1].getArgType(), ADD_TO); - const auto seq_vec = seq_arg.getSequenceIds().vector(); - const auto out_grad_mat = seq_arg.matrix(); + const auto seq_vec = in_seq.getSequenceIds().vector(); + const auto out_grad_mat = in_seq.matrix(); auto in_grad_mat = - !outputs[0].data() - ? typename Tensor::Matrix(nullptr, 0, 0) - : outputs[0].matrix(); + !out_seq.data() ? typename Tensor::Matrix(nullptr, 0, 0) + : out_seq.matrix(); auto w_grad_mat = !outputs[1].data() ? typename Tensor::Matrix(nullptr, 0, 0) : outputs[1].matrix(); @@ -287,9 +286,15 @@ private: }; /** - * \param inputs[0].matrix input grad, n*d - * \param inputs[0].vector input sequence, n*1 - * \param outputs[0] output grad, n*(d*l) + * Context Projection Backward Data Function + * Update input layer grad + * input: sequence of output layer grad + * output: sequence of input layer grad + * + * \param outputs[0].matrix input layer grad, n * d + * \param outputs[0].vector start position sequence, n * 1 + * \param inputs[0].matrix output layer grad, n * (d * l) + * \param inputs[0].vector start positon sequence, n * 1 */ template class ContextProjectionBackwardDataFunc : public FunctionBase { @@ -302,19 +307,24 @@ public: void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1, static_cast(inputs.size())); CHECK_EQ(1, static_cast(outputs.size())); - const auto in_seqs = dynamic_cast(inputs[0]); - CHECK(in_seqs.data() && outputs[0].data() && - in_seqs.getSequenceIds().data()); - CHECK_EQ(static_cast(outputs[0].shape().ndims()), 2); - CHECK_EQ(static_cast(in_seqs.shape().ndims()), 2); - CHECK_EQ(static_cast(in_seqs.getSequenceIds().shape().ndims()), 1); - CHECK_EQ(outputs[0].shape().ndims(), - in_seqs.shape().ndims() * context_length_); + CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) + << "SequenceArg required here"; + const auto in_seq = dynamic_cast(inputs[0]); + const auto out_seq = dynamic_cast(outputs[0]); + + CHECK(in_seq.data() && out_seq.data() && in_seq.getSequenceIds().data()); + CHECK_EQ(static_cast(out_seq.shape().ndims()), 2); + CHECK_EQ(static_cast(in_seq.shape().ndims()), 2); + CHECK_EQ(static_cast(in_seq.getSequenceIds().shape().ndims()), 1); + /// output layer grad dim == input layer grad dim * context_length_ + CHECK_EQ(in_seq.shape().ndims(), out_seq.shape().ndims() * context_length_); /// input and output has the same batch_size - CHECK_EQ(in_seqs.shape()[0], outputs[0].shape()[0]); - const auto out_grad_mat = outputs[0].matrix(); - auto in_grad_mat = in_seqs.matrix(); - const auto seq_vec = in_seqs.getSequenceIds().vector(); + CHECK_EQ(in_seq.shape()[0], out_seq.shape()[0]); + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + + const auto out_grad_mat = in_seq.matrix(); + const auto seq_vec = in_seq.getSequenceIds().vector(); + auto in_grad_mat = out_seq.matrix(); ContextProjectionBackwardData( out_grad_mat, in_grad_mat, seq_vec, context_length_, context_start_); @@ -326,9 +336,14 @@ private: }; /** - * \param inputs[0].matrix weight grad, pad * d - * \param inputs[0].vecotr input sequence, n * 1 - * \param outputs[0] output grad, n * (d * l) + * Context Projection Backward Weight Function + * Update weight grad by backprop + * input: sequence of output layer grad + * output: weight grad + * + * \param outputs[0] weight grad, pad * d + * \param inputs[0].matrix output layer grad, n * (d * l) + * \param inputs[0].vecotr start positon sequence, n * 1 */ template class ContextProjectionBackwardWeightFunc : public FunctionBase { @@ -343,18 +358,20 @@ public: void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1, static_cast(inputs.size())); CHECK_EQ(1, static_cast(outputs.size())); - - const auto in_seqs = dynamic_cast(inputs[0]); - CHECK(in_seqs.data() && in_seqs.getSequenceIds().data() && - outputs[0].data()); + CHECK(inputs[0].isSequenceArg()) << "SequenceArg required here"; + const auto in_seq = dynamic_cast(inputs[0]); + CHECK(in_seq.data() && in_seq.getSequenceIds().data() && outputs[0].data()); CHECK_EQ(static_cast(outputs[0].shape().ndims()), 2); - CHECK_EQ(static_cast(in_seqs.shape().ndims()), 2); - CHECK_EQ(static_cast(in_seqs.getSequenceIds().shape().ndims()), 1); - CHECK_EQ(in_seqs.shape()[0], outputs[0].shape()[0]); - CHECK_EQ(outputs[0].shape()[1], in_seqs.shape()[1] * context_length_); - const auto out_grad_mat = outputs[0].matrix(); - auto w_grad_mat = inputs[0].matrix(); - const auto seq_vec = in_seqs.getSequenceIds().vector(); + CHECK_EQ(static_cast(in_seq.shape().ndims()), 2); + CHECK_EQ(static_cast(in_seq.getSequenceIds().shape().ndims()), 1); + CHECK_EQ(in_seq.shape()[0], outputs[0].shape()[0]); + /// output layer grad dim == weight dim * context_length_ + CHECK_EQ(in_seq.shape()[1], outputs[0].shape()[1] * context_length_); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + + const auto seq_vec = in_seq.getSequenceIds().vector(); + const auto out_grad_mat = in_seq.matrix(); + auto w_grad_mat = outputs[0].matrix(); ContextProjectionBackwardWeight(out_grad_mat, w_grad_mat, seq_vec, diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp index bd0c06c5f6..c9db2ff800 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -123,7 +123,7 @@ void testMatrixProjectionBackward(int context_start, BufferArgs cpu_inputs; BufferArgs cpu_outputs; cpu_inputs.addArg(cpu_out_grad, *cpu_seq); - cpu_outputs.addArg(cpu_in_grad, ADD_TO); + cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO); cpu_outputs.addArg( cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO); @@ -132,7 +132,7 @@ void testMatrixProjectionBackward(int context_start, BufferArgs gpu_inputs; BufferArgs gpu_outputs; gpu_inputs.addArg(gpu_out_grad, *gpu_seq); - gpu_outputs.addArg(gpu_in_grad, ADD_TO); + gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO); gpu_outputs.addArg( gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO); diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp index edcef17ad4..d7042af1c2 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/gserver/layers/ContextProjection.cpp @@ -169,6 +169,7 @@ void ContextProjection::backward(const UpdateCallback& callback) { outputs.addArg( CpuMatrix( in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim), + *in_->sequenceStartPositions->getVector(useGpu_), ADD_TO); outputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, w_ptr ? w_ptr->getHeight() : 0, From 143ff015c66fb70db0c89de77bbd8cb60bb082ce Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Thu, 5 Jan 2017 14:14:51 -0800 Subject: [PATCH 13/37] add permission AWSKeyManagementServicePowerUser for aws kms key creation Without the permission command `aws kms --region=us-west-1 create-key --description="kube-aws assets"` will fail with: An error occurred (AccessDeniedException) when calling the CreateKey operation: --- doc/howto/usage/k8s/k8s_aws_en.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/howto/usage/k8s/k8s_aws_en.md b/doc/howto/usage/k8s/k8s_aws_en.md index b04bfba590..a82243995a 100644 --- a/doc/howto/usage/k8s/k8s_aws_en.md +++ b/doc/howto/usage/k8s/k8s_aws_en.md @@ -31,6 +31,7 @@ the user group: - IAMUserSSHKeys - IAMFullAccess - NetworkAdministrator +- AWSKeyManagementServicePowerUser By the time we write this tutorial, we noticed that Chinese AWS users From 2778a65b0ff9aa825d1ccaf6907c7364427058f1 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Thu, 12 Jan 2017 17:19:05 -0800 Subject: [PATCH 14/37] first pass change for k8s aws en tutorial --- doc/howto/usage/k8s/k8s_aws_en.md | 108 +++++++++++++++--------------- 1 file changed, 55 insertions(+), 53 deletions(-) diff --git a/doc/howto/usage/k8s/k8s_aws_en.md b/doc/howto/usage/k8s/k8s_aws_en.md index a82243995a..c776ba9eb9 100644 --- a/doc/howto/usage/k8s/k8s_aws_en.md +++ b/doc/howto/usage/k8s/k8s_aws_en.md @@ -2,15 +2,9 @@ ## Create AWS Account and IAM Account -To use AWS, we need to sign up an AWS account on Amazon's Web site. -An AWS account allows us to login to the AWS Console Web interface to -create IAM users and user groups. Usually, we create a user group with -privileges required to run PaddlePaddle, and we create users for -those who are going to run PaddlePaddle and add these users into the -group. IAM users can identify themselves using password and tokens, -where passwords allows users to log in to the AWS Console, and tokens -make it easy for users to submit and inspect jobs from the command -line. +AWS account allow us to manage AWS from Web Console. Amazon AMI enable us to manage AWS from command line interface. + +We need to create an AMI user with sufficient privilege to create kubernetes cluster on AWS. To sign up an AWS account, please follow @@ -19,8 +13,7 @@ To create users and user groups under an AWS account, please follow [this guide](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html). -Please be aware that this tutorial needs the following privileges in -the user group: +Please be aware that this tutorial needs the following privileges for the user in AMI: - AmazonEC2FullAccess - AmazonS3FullAccess @@ -47,9 +40,11 @@ it. Here we will show you step by step on how to run PaddlePaddle training on AWS cluster. -###Download kube-aws and kubectl +### Download kube-aws and kubectl + +#### kube-aws -####kube-aws +[kube-aws](https://github.com/coreos/kube-aws) is a CLI tool to automate cluster deployment to AWS. Import the CoreOS Application Signing Public Key: @@ -89,24 +84,22 @@ mv ${PLATFORM}/kube-aws /usr/local/bin ``` -####kubectl +#### kubectl + +[kubectl](https://kubernetes.io/docs/user-guide/kubectl-overview/) is a command line interface for running commands against Kubernetes clusters. Go to the [releases](https://github.com/kubernetes/kubernetes/releases) and download the latest release tarball. Extract the tarball and then concate the kubernetes binaries directory into PATH: ``` -export PATH=/platforms/linux/amd64:$PATH - +export PATH=/platforms/linux/amd64:$PATH # The exact path depend on your platform ``` -User credentials and security tokens will be generated later in user directory, not in `~/.kube/config`, they will be necessary to use the CLI or the HTTP Basic Auth. - - -###Configure AWS Credentials -First check out [this](http://docs.aws.amazon.com/cli/latest/userguide/installing.html) for installing the AWS command line interface, if you use ec2 instance with default amazon AMI, the cli tool has already been installed on your machine. +### Configure AWS Credentials +First check out [this](http://docs.aws.amazon.com/cli/latest/userguide/installing.html) for installing the AWS command line interface. And then configure your AWS account information: @@ -127,33 +120,35 @@ Default output format: json ``` -Test that your credentials work by describing any instances you may already have running on your account: +Verify that your credentials work by describing any instances you may already have running on your account: ``` aws ec2 describe-instances ``` -###Define Cluster Parameters +### Define Cluster Parameters -####EC2 key pair +#### EC2 key pair The keypair that will authenticate SSH access to your EC2 instances. The public half of this key pair will be configured on each CoreOS node. -After creating a key pair, you will use the name you gave the keys to configure the cluster. Key pairs are only available to EC2 instances in the same region. More info in the [EC2 Keypair docs](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html). +Follow [EC2 Keypair docs](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) to create a EC2 key pair -####KMS key +After creating a key pair, you will use the name you gave the keys to configure the cluster. Key pairs are only available to EC2 instances in the same region. + +#### KMS key Amazon KMS keys are used to encrypt and decrypt cluster TLS assets. If you already have a KMS Key that you would like to use, you can skip creating a new key and provide the Arn string for your existing key. You can create a KMS key in the AWS console, or with the aws command line tool: ``` -$ aws kms --region=us-west-2 create-key --description="kube-aws assets" +$ aws kms --region=us-west-1 create-key --description="kube-aws assets" { "KeyMetadata": { "CreationDate": 1458235139.724, "KeyState": "Enabled", - "Arn": "arn:aws:kms:us-west-2:xxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx", + "Arn": "arn:aws:kms:us-west-1:xxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx", "AWSAccountId": "xxxxxxxxxxxxx", "Enabled": true, "KeyUsage": "ENCRYPT_DECRYPT", @@ -167,7 +162,9 @@ You will use the `KeyMetadata.Arn` string to identify your KMS key in the init s And then you need to add several inline policies in your user permission. -kms inline policy: +Go to AMI user page, click on `Add inline policy` button, and then select `Custom Policy` + +paste into following inline policies: ``` { @@ -183,16 +180,8 @@ kms inline policy: "Resource": [ "arn:aws:kms:*:xxxxxxxxx:key/*" ] - } - ] -} -``` -cloudformation inline policy: - -``` -"Version": "2012-10-17", - "Statement": [ - { + }, + { "Sid": "Stmt1482205746000", "Effect": "Allow", "Action": [ @@ -201,10 +190,11 @@ cloudformation inline policy: "cloudformation:DeleteStack", "cloudformation:DescribeStacks", "cloudformation:DescribeStackResource", - "cloudformation:GetTemplate" + "cloudformation:GetTemplate", + "cloudformation:DescribeStackEvents" ], "Resource": [ - "arn:aws:cloudformation:us-west-2:xxxxxxxxx:stack/YOUR_CLUSTER_NAME/*" + "arn:aws:cloudformation:us-west-1:xxxxxxxxx:stack/YOUR_CLUSTER_NAME/*" ] } ] @@ -212,15 +202,23 @@ cloudformation inline policy: ``` -####External DNS name +#### External DNS name When the cluster is created, the controller will expose the TLS-secured API on a public IP address. You will need to create an A record for the external DNS hostname you want to point to this IP address. You can find the API external IP address after the cluster is created by invoking kube-aws status. -####S3 bucket +#### S3 bucket You need to create an S3 bucket before startup the Kubernetes cluster. -####Initialize an asset directory +command (need to have a global unique name): + +``` +paddle aws s3api --region=us-west-1 create-bucket --bucket bucket-name +``` + +If you get an error message, try a different bucket name. The bucket name needs to be globally unique. + +#### Initialize an asset directory Create a directory on your local machine to hold the generated assets: @@ -238,12 +236,16 @@ $ kube-aws init \ --region=us-west-1 \ --availability-zone=us-west-1c \ --key-name=key-pair-name \ ---kms-key-arn="arn:aws:kms:us-west-2:xxxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx" +--kms-key-arn="arn:aws:kms:us-west-1:xxxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx" ``` +Here `us-west-1c` is used for parameter `--availability-zone`, but supported availability zone varies among AWS accounts. + +Please check if `us-west-1c` is supported by `aws ec2 --region us-west-1 describe-availability-zones`, if not switch to other supported availability zone. (e.g., `us-west-1a`, or `us-west-1b`) + There will now be a cluster.yaml file in the asset directory. This is the main configuration file for your cluster. -####Render contents of the asset directory +#### Render contents of the asset directory In the simplest case, you can have kube-aws generate both your TLS identities and certificate authority for you. @@ -286,21 +288,21 @@ $ tree These assets (templates and credentials) are used to create, update and interact with your Kubernetes cluster. -###Kubernetes Cluster Start Up +### Kubernetes Cluster Start Up -####Create the instances defined in the CloudFormation template +#### Create the instances defined in the CloudFormation template -Now for the exciting part, creating your cluster: +Now for the exciting part, creating your cluster (choose any ``): ``` $ kube-aws up --s3-uri s3:/// ``` -####Configure DNS +#### Configure DNS -You can invoke `kube-aws status` to get the cluster API endpoint after cluster creation, if necessary. This command can take a while. And then dig the load balancer hostname to get the ip address, use this ip to setup an A record for your external dns name. +You can invoke `kube-aws status` to get the cluster API endpoint after cluster creation, if necessary. This command can take a while. And use command `dig` to check the load balancer hostname to get the ip address, use this ip to setup an A record for your external dns name. -####Access the cluster +#### Access the cluster Once the API server is running, you should see: @@ -313,7 +315,7 @@ ip-10-0-0-xx.us-west-1.compute.internal Ready,SchedulingDisabled 5m ``` -###Setup PaddlePaddle Environment on AWS +### Setup PaddlePaddle Environment on AWS Now, we've created a cluster with following network capability: From 2a20fdc14bce87ce3d092ab9bff8349be5194b05 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 13 Jan 2017 14:35:56 +0800 Subject: [PATCH 15/37] Change BufferArgPtr to BufferArg* --- paddle/function/Function.cpp | 9 ++++++--- paddle/function/Function.h | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/paddle/function/Function.cpp b/paddle/function/Function.cpp index dbe3a4e9f6..3fdc37b968 100644 --- a/paddle/function/Function.cpp +++ b/paddle/function/Function.cpp @@ -79,15 +79,18 @@ FuncConfig& FuncConfig::set(const std::string& key, bool v) { void BufferArgs::addArg(const Matrix& arg, const TensorShape& shape, ArgType argType) { - args_.push_back(std::make_shared(arg, shape, argType)); + _args_.push_back(new BufferArg(arg, shape, argType)); + addArg(*_args_.back()); } void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) { - args_.push_back(std::make_shared(arg, argType)); + _args_.push_back(new SparseMatrixArg(arg, argType)); + addArg(*_args_.back()); } void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) { - args_.push_back(std::make_shared(arg, argType)); + _args_.push_back(new SparseMatrixArg(arg, argType)); + addArg(*_args_.back()); } ClassRegistrar FunctionBase::funcRegistrar_; diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 249f8f9cfa..afbd4911b0 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -50,10 +50,25 @@ protected: * Argument type for Function::calc(). * A BufferArgs contains a set of BufferArg, * because Function can have multiple inputs and outputs. + * + * addArg() with Matix object used to adapt Layer Argument. + * Will create a BufferArg object in addArg(), + * and free in destructor of BufferArgs. + * + * addArg() with BufferArg object, just save BufferArg object address, + * and the caller needs to guarantee the validity of the BufferArg object + * in the BufferArgs life time. */ class BufferArgs { public: BufferArgs() {} + + ~BufferArgs() { + for (auto arg : _args_) { + delete arg; + } + } + size_t size() const { return args_.size(); } // add argument into BufferArgs @@ -62,7 +77,8 @@ public: // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO. template void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) { - args_.push_back(std::make_shared(arg, argType)); + _args_.push_back(new BufferArg(arg, argType)); + addArg(*_args_.back()); } // Add arg into BufferArgs and reshape the arg. @@ -83,14 +99,27 @@ public: return *args_[num]; } + void addArg(BufferArg& arg) { args_.push_back(&arg); } + + void addArg(SequenceIdArg& arg) { args_.push_back(&arg); } + + void addArg(SequenceArg& arg) { args_.push_back(&arg); } + + void addArg(SparseMatrixArg& arg) { args_.push_back(&arg); } + private: - std::vector args_; + std::vector args_; + // The BufferArg object is constructed and freed by BufferArgs. + std::vector _args_; }; /** * \brief Base class for Function. * The basic Function implementation requires override init and calc interfaces. * + * The caller needs to ensure the validity of the arguments + * during Function execution. + * * Function inputs are readonly, Function outputs have two modes: ASSIGN_TO * and ADD_TO. * If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation From 039c0bf22384607446fcd38a4ba6c349ce073213 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 13 Jan 2017 15:05:22 +0800 Subject: [PATCH 16/37] Add some constructors for generating object that only contains shape (do not contains data). --- paddle/function/BufferArg.h | 33 ++++++++++++++++++++++++++++++-- paddle/function/FunctionTest.cpp | 18 +++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 12352ba29e..28542a8657 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -39,7 +39,6 @@ enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 }; class BufferArg; class SequenceArg; class SparseMatrixArg; -typedef std::shared_ptr BufferArgPtr; /** * \brief BufferArg used as the argument type of Function. @@ -50,6 +49,11 @@ typedef std::shared_ptr BufferArgPtr; * 3. SequenceArg for a Buffer of sequence data. * 4. SparseMatrixArg for a Buffer of sparse matrix. * + * Buffer shape + * For most buffers, the first dimension `shape()[0]` represents + * the size of the mini-batch. + * + * Buffer argType * There is an ArgType property for the BufferArg used as Function Output. * Whether the result of the Function calculation is assigned to the * output Buffer or added to the output Buffer is determined by the @@ -71,6 +75,14 @@ public: ArgType getArgType() const { return argType_; } public: + BufferArg(ValueType valueType, + const TensorShape& shape, + ArgType argType = UNSPECIFIED) + : buf_(nullptr), + valueType_(valueType), + shape_(shape), + argType_(argType) {} + BufferArg(void* buf, ValueType valueType, const TensorShape& shape, @@ -170,6 +182,12 @@ protected: // if a < b then value_.buf_[a] < value_.buf_[b] class SequenceIdArg : public BufferArg { public: + SequenceIdArg(const TensorShape& shape, ArgType argType = UNSPECIFIED) + : BufferArg(VALUE_TYPE_INT32, shape, argType) { + CHECK_EQ(shape_.ndims(), (size_t)1); + numSeqs_ = shape_[0] - 1; + } + SequenceIdArg(void* buf, const TensorShape& shape, ArgType argType = UNSPECIFIED) @@ -190,9 +208,18 @@ private: size_t numSeqs_; }; -// sequence data +// sequences data +// For mini-batch calculate, +// one batch can contain more than one sequence of data. +// SequenceArg can be used to represent sequences that contain multiple +// unequal lengths. class SequenceArg : public BufferArg { public: + SequenceArg(ValueType valueType, + const TensorShape& shape, + ArgType argType = UNSPECIFIED) + : BufferArg(valueType, shape, argType), startPositions_(TensorShape()) {} + SequenceArg(void* buf, ValueType valueType, const TensorShape& shape, @@ -210,6 +237,8 @@ public: void* getIdBuf() const { return startPositions_.data(); } size_t numSeqs() const { return startPositions_.numSeqs(); } + SequenceIdArg& getSequenceId() { return startPositions_; } + const SequenceIdArg& getSequenceId() const { return startPositions_; } private: SequenceIdArg startPositions_; diff --git a/paddle/function/FunctionTest.cpp b/paddle/function/FunctionTest.cpp index eb05ca9a21..03c609b524 100644 --- a/paddle/function/FunctionTest.cpp +++ b/paddle/function/FunctionTest.cpp @@ -84,6 +84,10 @@ void testBufferArgs(const BufferArgs& inputs, } } +void testBufferArgs(const BufferArgs& inputs, const CheckBufferArg& check) { + check(inputs[0]); +} + TEST(Arguments, Matrix) { MatrixPtr matrix = Matrix::create(100, 200); CheckBufferArg check = [=](const BufferArg& arg) { @@ -144,4 +148,18 @@ TEST(Arguments, CpuSparseMatrix) { testBufferArgs(argments, checkFunc); } +TEST(Arguments, BufferArg) { + BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3}); + CheckBufferArg check = [=](const BufferArg& arg) { + EXPECT_EQ(arg.shape().ndims(), 3); + EXPECT_EQ(arg.shape()[0], 1); + EXPECT_EQ(arg.shape()[1], 2); + EXPECT_EQ(arg.shape()[2], 3); + }; + + BufferArgs argments; + argments.addArg(arg); + testBufferArgs(argments, check); +} + } // namespace paddle From 678174339fcc1aeb953684f795e1bf8cf1d631a5 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 13 Jan 2017 17:46:05 +0800 Subject: [PATCH 17/37] Implement the FunctionTest --- paddle/function/Function.h | 13 +- paddle/function/FunctionTest.h | 225 +++++++++++++++------------------ 2 files changed, 112 insertions(+), 126 deletions(-) diff --git a/paddle/function/Function.h b/paddle/function/Function.h index afbd4911b0..b0c6ba0fac 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -75,8 +75,17 @@ public: // Tensor can be Matrix, Vector, IVector. // For inputs, do not need argType. // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO. - template - void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) { + void addArg(const Matrix& arg, ArgType argType = UNSPECIFIED) { + _args_.push_back(new BufferArg(arg, argType)); + addArg(*_args_.back()); + } + + void addArg(const Vector& arg, ArgType argType = UNSPECIFIED) { + _args_.push_back(new BufferArg(arg, argType)); + addArg(*_args_.back()); + } + + void addArg(const IVector& arg, ArgType argType = UNSPECIFIED) { _args_.push_back(new BufferArg(arg, argType)); addArg(*_args_.back()); } diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h index 2847188fd6..412e3a7d1b 100644 --- a/paddle/function/FunctionTest.h +++ b/paddle/function/FunctionTest.h @@ -19,6 +19,8 @@ limitations under the License. */ namespace paddle { +typedef std::shared_ptr BufferArgPtr; + /** * \brief A class for comparing CPU and GPU implementations of Function. * @@ -45,143 +47,121 @@ namespace paddle { class FunctionCompare { public: FunctionCompare(const std::string& name, const FuncConfig& config) - : cpu(FunctionBase::funcRegistrar_.createByType(name + "-CPU")), - gpu(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) { - cpu->init(config); - gpu->init(config); + : cpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-CPU")), + gpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) { + cpuFunc_->init(config); + gpuFunc_->init(config); + } + + ~FunctionCompare() {} + + // input need only contains shape, do not contains data. + void addInputs(const BufferArg& input) { + size_t size = + input.shape().getElements() * sizeOfValuType(input.valueType()); + cpuMemory_.emplace_back(std::make_shared(size)); + gpuMemory_.emplace_back(std::make_shared(size)); + + cpuInputs_.emplace_back(std::make_shared( + cpuMemory_.back()->getBuf(), input.valueType(), input.shape())); + gpuInputs_.emplace_back(std::make_shared( + gpuMemory_.back()->getBuf(), input.valueType(), input.shape())); + } + + // output need only contains shape, do not contains data. + void addOutputs(const BufferArg& output) { + size_t size = + output.shape().getElements() * sizeOfValuType(output.valueType()); + cpuMemory_.emplace_back(std::make_shared(size)); + gpuMemory_.emplace_back(std::make_shared(size)); + + cpuOutputs_.emplace_back( + std::make_shared(cpuMemory_.back()->getBuf(), + output.valueType(), + output.shape(), + ASSIGN_TO)); + gpuOutputs_.emplace_back( + std::make_shared(gpuMemory_.back()->getBuf(), + output.valueType(), + output.shape(), + ASSIGN_TO)); } - void addInputs(const BufferArg& input) { inputs.push_back(input); } + void addInputs(const SequenceArg& input) { + size_t batchSize = input.shape()[0]; + size_t numSeqs = batchSize / 10 + 1; + + size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); + cpuMemory_.emplace_back(std::make_shared(sizeId)); + gpuMemory_.emplace_back(std::make_shared(sizeId)); - void addOutputs(const BufferArg& output) { outputs.push_back(output); } + TensorShape seqsId({numSeqs + 1}); + // void* cpuBuffer = cpuMemory_.back()->getBuf(); + // void* gpuBuffer = gpuMemory_.back()->getBuf(); + + size_t size = + input.shape().getElements() * sizeOfValuType(input.valueType()); + cpuMemory_.emplace_back(std::make_shared(size)); + gpuMemory_.emplace_back(std::make_shared(size)); + + // TODO: need be implemented. + } void run() { // prepare cpu/gpu arguments - prepareArgs(); + initInputs(); // function calculate - cpu->calc(cpuInputs, cpuOutputs); - gpu->calc(gpuInputs, gpuOutputs); - - // check outputs and inouts - auto checkArgs = [=](const BufferArgs& cpuArgs, const BufferArgs& gpuArgs) { - for (size_t i = 0; i < cpuArgs.size(); i++) { - auto cpu = cpuArgs[i]; - auto gpu = gpuArgs[i]; - CpuVector cpuVector(cpu.shape().getElements(), (real*)cpu.getData()); - GpuVector gpuVector(cpu.shape().getElements(), (real*)gpu.getData()); - - autotest::TensorCheckErr(cpuVector, gpuVector); + auto callFunction = [](FunctionBase* function, + std::vector& inputs, + std::vector& outputs) { + BufferArgs inArgs; + BufferArgs outArgs; + for (auto arg : inputs) { + inArgs.addArg(*arg); } - }; - checkArgs(cpuOutputs, gpuOutputs); - } -#if 0 - void cmpWithArg(const Arguments& inputs, - const Arguments& outputs, - const Arguments& inouts) { - // init cpu and gpu arguments - auto initArgs = [=]( - Arguments& cpuArgs, Arguments& gpuArgs, const Arguments& inArgs) { - for (const auto arg : inArgs) { - size_t size = sizeof(real); - for (const auto dim : arg.dims_) { - size *= dim; - } - if (arg.getData()) { - // todo(tianbing), waste unnecessary mem here - cpuMemory.emplace_back(std::make_shared(size)); - gpuMemory.emplace_back(std::make_shared(size)); - cpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_)); - gpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_)); - // already init outside - } else { - cpuMemory.emplace_back(std::make_shared(size)); - gpuMemory.emplace_back(std::make_shared(size)); - cpuArgs.emplace_back( - Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_)); - gpuArgs.emplace_back( - Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_)); - // will use an api to refactor this code. - CpuVector cpuVector(size / sizeof(real), - (real*)cpuArgs.back().getData()); - GpuVector gpuVector(size / sizeof(real), - (real*)gpuArgs.back().getData()); - cpuVector.uniform(0.001, 1); - gpuVector.copyFrom(cpuVector); - } + for (auto arg : outputs) { + outArgs.addArg(*arg); } + function->calc(inArgs, outArgs); }; - initArgs(cpuInputs, gpuInputs, inputs); - initArgs(cpuOutputs, gpuOutputs, outputs); - // function calculate - cpu->calc(cpuInputs, cpuOutputs); - gpu->calc(gpuInputs, gpuOutputs); + callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_); + callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_); // check outputs and inouts - auto checkArgs = [=](const Arguments& cpuArgs, const Arguments& gpuArgs) { - for (size_t i = 0; i < cpuArgs.size(); i++) { - auto cpu = cpuArgs[i]; - auto gpu = gpuArgs[i]; - size_t size = 1; - for (auto dim : cpu.dims_) { - size *= dim; - } - CpuVector cpuVector(size, (real*)cpu.getData()); - GpuVector gpuVector(size, (real*)gpu.getData()); - - autotest::TensorCheckErr(cpuVector, gpuVector); - } - }; - checkArgs(cpuOutputs, gpuOutputs); + compareOutputs(); } -#endif - std::shared_ptr getCpuFunction() const { return cpu; } + std::shared_ptr getCpuFunction() const { return cpuFunc_; } - std::shared_ptr getGpuFunction() const { return gpu; } + std::shared_ptr getGpuFunction() const { return gpuFunc_; } protected: - void prepareArgs() { - // TODO, if inputs has data - } + void initInputs() { + for (size_t i = 0; i < cpuInputs_.size(); i++) { + initArg(*cpuInputs_[i]); - void createArg(BufferArgs& cpuArgs, BufferArgs& gpuArgs, BufferArg& arg) { - size_t size = arg.shape().getElements() * sizeOfValuType(arg.valueType()); - cpuMemory_.emplace_back(std::make_shared(size)); - gpuMemory_.emplace_back(std::make_shared(size)); + // TODO: Need a BufferCopy used to copy from one BufferArg to another. + CpuVector cpuVector(cpuInputs_[i]->shape().getElements(), + (real*)cpuInputs_[i]->data()); + GpuVector gpuVector(gpuInputs_[i]->shape().getElements(), + (real*)gpuInputs_[i]->data()); - cpuArgs.emplace_back( - BufferArg(cpuMemory_.back()->getBuf()), arg.valueType(), arg.shape()); - gpuArgs.emplace_back( - BufferArg(gpuMemory_.back()->getBuf()), arg.valueType(), arg.shape()); + gpuVector.copyFrom(cpuVector); + } } - void createArg(BufferArgs& cpuArgs, BufferArgs& gpuArgs, SequenceArg& arg) { - size_t batchSize = arg.shape()[0]; - size_t numSeqs = batchSize / 10 + 1; + void compareOutputs() { + for (size_t i = 0; i < cpuOutputs_.size(); i++) { + // TODO, Need a BufferCheck used to compare the two buffers. + auto cpu = cpuOutputs_[i]; + auto gpu = gpuOutputs_[i]; + CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data()); + GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data()); - size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); - cpuMemory_.emplace_back(std::make_shared(size)); - gpuMemory_.emplace_back(std::make_shared(size)); - - TensorShape seqsId({numSeqs + 1}); - void* cpuBuffer = cpuMemory_.back()->getBuf(); - void* gpuBuffer = gpuMemory_.back()->getBuf(); - - size_t size = arg.shape().getElements() * sizeOfValuType(arg.valueType()); - cpuMemory_.emplace_back(std::make_shared(size)); - gpuMemory_.emplace_back(std::make_shared(size)); - - cpuArgs.emplace_back(SequenceArg(cpuMemory_.back()->getBuf(), - arg.valueType(), - arg.shape(), - SequenceIdArg(cpuBuffer, seqsId))); - gpuArgs.emplace_back(SequenceArg(gpuMemory_.back()->getBuf(), - arg.valueType(), - arg.shape(), - SequenceIdArg(gpuBuffer, seqsId))); + autotest::TensorCheckErr(cpuVector, gpuVector); + } } // only init cpu argument, gpu argument copy from cpu argument. @@ -192,10 +172,10 @@ protected: void initArg(SequenceIdArg& arg, size_t batchSize) { size_t numSeqs = arg.numSeqs(); - int* buf = arg.data(); + int* buf = (int*)arg.data(); int pos = 0; size_t maxLen = 2 * batchSize / numSeqs; - for (int i = 0; i < numSeqs; ++i) { + for (int i = 0; i < (int)numSeqs; ++i) { int len = uniformRandom( std::min(maxLen, batchSize - pos - numSeqs + i)) + 1; @@ -207,17 +187,14 @@ protected: } protected: - std::shared_ptr cpu; - std::shared_ptr gpu; + std::shared_ptr cpuFunc_; + std::shared_ptr gpuFunc_; std::vector cpuMemory_; std::vector gpuMemory_; - // inputs and outputs - BufferArgs inputs; - BufferArgs outputs; - BufferArgs cpuInputs_; - BufferArgs cpuOutputs_; - BufferArgs gpuInputs_; - BufferArgs gpuOutputs_; + std::vector cpuInputs_; + std::vector cpuOutputs_; + std::vector gpuInputs_; + std::vector gpuOutputs_; }; } // namespace paddle From b791dcddcc0d83c7397cbd075ccce08b60dfcb27 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Fri, 13 Jan 2017 17:56:44 +0800 Subject: [PATCH 18/37] Fix CrossMapNormal Test --- paddle/function/CMakeLists.txt | 2 +- paddle/function/CrossMapNormalOpTest.cpp | 51 ++++++++++++++---------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 75a2acc55e..566fe53b14 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -19,7 +19,7 @@ if(WITH_TESTING) # TODO: # file(GLOB test_files . *OpTest.cpp) # add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files}) - # add_simple_unittest(CrossMapNormalOpTest) + add_simple_unittest(CrossMapNormalOpTest) add_simple_unittest(TensorShapeTest) add_simple_unittest(TensorTypeTest) add_simple_unittest(BufferArgTest) diff --git a/paddle/function/CrossMapNormalOpTest.cpp b/paddle/function/CrossMapNormalOpTest.cpp index d65d9310af..da196a699c 100644 --- a/paddle/function/CrossMapNormalOpTest.cpp +++ b/paddle/function/CrossMapNormalOpTest.cpp @@ -27,15 +27,19 @@ TEST(CrossMapNormal, real) { << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW << " size=" << size; - FunctionCompare compare("CrossMapNormal", - FuncConfig() - .set("size", size) - .set("scale", (real)1.5) - .set("pow", (real)0.5)); - Dims dims{numSamples, channels, imgSizeH, imgSizeW}; - compare.cmpWithArg({Tensor(nullptr, dims)}, - {Tensor(nullptr, dims), Tensor(nullptr, dims)}, - {}); + // init Test object + FunctionCompare test("CrossMapNormal", + FuncConfig() + .set("size", size) + .set("scale", (real)1.5) + .set("pow", (real)0.5)); + // prepare input arguments + TensorShape shape{numSamples, channels, imgSizeH, imgSizeW}; + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape)); + // run Function + test.run(); } } } @@ -43,6 +47,9 @@ TEST(CrossMapNormal, real) { } } +#if 0 +// TODO(hedaoyuan): Now CrossMapNormalGrad not support ASSIGN_TO mode. +// Maybe all Function need support ASSIGN_TO mode. TEST(CrossMapNormalGrad, real) { for (size_t numSamples : {5, 32}) { for (size_t channels : {1, 5, 32}) { @@ -53,23 +60,25 @@ TEST(CrossMapNormalGrad, real) { << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW << " size=" << size; - FunctionCompare compare("CrossMapNormalGrad", - FuncConfig() - .set("size", size) - .set("scale", (real)1.5) - .set("pow", (real)0.5)); - Dims dims{numSamples, channels, imgSizeH, imgSizeW}; - compare.cmpWithArg({Tensor(nullptr, dims), - Tensor(nullptr, dims), - Tensor(nullptr, dims), - Tensor(nullptr, dims)}, - {Tensor(nullptr, dims)}, - {}); + FunctionCompare test("CrossMapNormalGrad", + FuncConfig() + .set("size", size) + .set("scale", (real)1.5) + .set("pow", (real)0.5)); + TensorShape shape{numSamples, channels, imgSizeH, imgSizeW}; + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape)); + // run Function + test.run(); } } } } } } +#endif } // namespace paddle From d2e2042df33a0a5f55f0282cf7f10a71c98b8dd8 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 14 Jan 2017 16:04:58 +0800 Subject: [PATCH 19/37] Make sign compare as a compile warning not error. --- cmake/flags.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 0d1ef5cd84..b76852fc6c 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -96,6 +96,7 @@ set(COMMON_FLAGS -Wno-unused-parameter -Wno-unused-function -Wno-error=literal-suffix + -Wno-error=sign-compare -Wno-error=unused-local-typedefs) set(GPU_COMMON_FLAGS @@ -105,6 +106,7 @@ set(GPU_COMMON_FLAGS -Wdelete-non-virtual-dtor -Wno-unused-parameter -Wno-unused-function + -Wno-error=sign-compare -Wno-error=literal-suffix -Wno-error=unused-local-typedefs -Wno-error=unused-function # Warnings in Numpy Header. From cdf6af64a87c46da070da139925c32cc4064e6d3 Mon Sep 17 00:00:00 2001 From: liaogang Date: Sat, 14 Jan 2017 21:42:04 +0800 Subject: [PATCH 20/37] Add external openblas --- .travis.yml | 2 +- cmake/external/openblas.cmake | 32 +++++++++++++++---- cmake/system.cmake | 4 +-- .../build_and_install/build_from_source_en.md | 5 +-- paddle/scripts/travis/before_install.osx.sh | 4 +-- paddle/scripts/travis/build_and_test.sh | 2 +- paddle/scripts/travis/docs.sh | 2 +- 7 files changed, 34 insertions(+), 17 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0705baa1ac..162bebba09 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,9 +25,9 @@ addons: packages: - gcc-4.8 - g++-4.8 + - gfortran-4.8 - git - build-essential - - libatlas-base-dev - python - python-pip - python2.7-dev diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 0e8c29c831..43ebb39cd6 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -15,7 +15,6 @@ INCLUDE(cblas) IF(NOT ${CBLAS_FOUND}) - MESSAGE(FATAL_ERROR "Please install OpenBlas, MKL or ATLAS.") INCLUDE(ExternalProject) SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas) @@ -28,20 +27,39 @@ IF(NOT ${CBLAS_FOUND}) SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE) ENDIF(WIN32) + IF(CMAKE_COMPILER_IS_GNUCC) + ENABLE_LANGUAGE(Fortran) + LIST(APPEND CBLAS_LIBRARIES gfortran pthread) + ENDIF(CMAKE_COMPILER_IS_GNUCC) + + IF(NOT CMAKE_Fortran_COMPILER) + MESSAGE(FATAL_ERROR "To build lapack in libopenblas, " + "you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...") + ENDIF(NOT CMAKE_Fortran_COMPILER) + ExternalProject_Add( openblas - ${EXTERNAL_PROJECT_LOG_ARGS} - URL "https://github.com/xianyi/OpenBLAS/archive/v0.2.19.tar.gz" + GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git + GIT_TAG v0.2.19 PREFIX ${CBLAS_SOURCES_DIR} INSTALL_DIR ${CBLAS_INSTALL_DIR} BUILD_IN_SOURCE 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND make CC=${CMAKE_C_COMPILER} FC=${CMAKE_Fortran_COMPILER} - INSTALL_COMMAND make install PREFIX= + BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib + INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX= UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + ) + + ExternalProject_Add_Step( + openblas lapacke_install + COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h" "${CBLAS_INSTALL_DIR}/include/lapacke_mangling.h" + COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h" "${CBLAS_INSTALL_DIR}/include/lapacke.h" + COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h" "${CBLAS_INSTALL_DIR}/include/lapacke_config.h" + COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h" "${CBLAS_INSTALL_DIR}/include/lapacke_utils.h" + DEPENDEES install ) LIST(APPEND external_project_dependencies openblas) -ENDIF() +ENDIF(NOT ${CBLAS_FOUND}) INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) diff --git a/cmake/system.cmake b/cmake/system.cmake index 788db404eb..1e9f794964 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -47,7 +47,7 @@ SET(EXTERNAL_PROJECT_LOG_ARGS LOG_DOWNLOAD 0 # Wrap download in script to log output LOG_UPDATE 1 # Wrap update in script to log output LOG_CONFIGURE 1 # Wrap configure in script to log output - LOG_BUILD 1 # Wrap build in script to log output + LOG_BUILD 0 # Wrap build in script to log output LOG_TEST 1 # Wrap test in script to log output - LOG_INSTALL 1 # Wrap install in script to log output + LOG_INSTALL 0 # Wrap install in script to log output ) diff --git a/doc/getstarted/build_and_install/build_from_source_en.md b/doc/getstarted/build_and_install/build_from_source_en.md index 6954be3b2b..1abd7b698b 100644 --- a/doc/getstarted/build_and_install/build_from_source_en.md +++ b/doc/getstarted/build_and_install/build_from_source_en.md @@ -64,7 +64,8 @@ As a simple example, consider the following: 1. **BLAS Dependencies(optional)** - Paddle will find BLAS from system's default path. But you can specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`. + CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically. + To utilize preinstalled BLAS, you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`. ```bash # specify MKL @@ -99,7 +100,7 @@ As a simple example, consider the following: ```bash # necessary sudo apt-get update - sudo apt-get install -y g++ make cmake build-essential libatlas-base-dev python python-pip libpython-dev git + sudo apt-get install -y g++ make cmake build-essential python python-pip libpython-dev git sudo pip install wheel numpy sudo pip install 'protobuf>=3.0.0' ``` diff --git a/paddle/scripts/travis/before_install.osx.sh b/paddle/scripts/travis/before_install.osx.sh index 7036f971fd..80f031a74e 100755 --- a/paddle/scripts/travis/before_install.osx.sh +++ b/paddle/scripts/travis/before_install.osx.sh @@ -1,6 +1,4 @@ #!/bin/bash brew update brew tap homebrew/science -brew install python -sudo pip install --upgrade protobuf -brew install swig openblas md5sha1sum protobuf +brew install openblas swig md5sha1sum diff --git a/paddle/scripts/travis/build_and_test.sh b/paddle/scripts/travis/build_and_test.sh index fd3aeb02b2..5e6350b574 100755 --- a/paddle/scripts/travis/build_and_test.sh +++ b/paddle/scripts/travis/build_and_test.sh @@ -6,7 +6,7 @@ if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages export PYTHONHOME=/opt/python/2.7.12 export PATH=/opt/python/2.7.12/bin:${PATH} - cmake .. -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} + cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} NRPOC=`nproc` make -j $NPROC make coveralls diff --git a/paddle/scripts/travis/docs.sh b/paddle/scripts/travis/docs.sh index bdafb145bc..6b43cad20b 100755 --- a/paddle/scripts/travis/docs.sh +++ b/paddle/scripts/travis/docs.sh @@ -4,7 +4,7 @@ source ./common.sh # Compile Documentation only. -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS} +cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS} make paddle_docs paddle_docs_cn # check websites for broken links From 589bb84241baca1f53cee290b55ced88c331fc02 Mon Sep 17 00:00:00 2001 From: liaogang Date: Sat, 14 Jan 2017 21:53:52 +0800 Subject: [PATCH 21/37] Add openblas log configuration --- cmake/external/openblas.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 43ebb39cd6..29d17691db 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -39,6 +39,7 @@ IF(NOT ${CBLAS_FOUND}) ExternalProject_Add( openblas + ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git GIT_TAG v0.2.19 PREFIX ${CBLAS_SOURCES_DIR} From 35d7b17f79342431c7e392e4644140326d37feca Mon Sep 17 00:00:00 2001 From: liaogang Date: Sun, 15 Jan 2017 00:02:57 +0800 Subject: [PATCH 22/37] Disable externel python --- cmake/external/python.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index 29247d5c3d..209e679f2c 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -31,6 +31,7 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) "please use pip to upgrade protobuf.") ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) + MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.") ##################################### PYTHON ######################################## SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python) SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python) From f7c8287df9604c176373c819462243644e10c05d Mon Sep 17 00:00:00 2001 From: liujunyi Date: Sun, 15 Jan 2017 10:32:26 +0800 Subject: [PATCH 23/37] add lack dependency to zlib --- cmake/external/protobuf.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index c0cf2719f9..613614c0e3 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -54,6 +54,7 @@ ExternalProject_Add( CONFIGURE_COMMAND ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake -Dprotobuf_BUILD_TESTS=OFF + -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT} -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} From b697154ac46f825d69c6ced3e585d41733b6247c Mon Sep 17 00:00:00 2001 From: liujunyi Date: Sun, 15 Jan 2017 11:10:47 +0800 Subject: [PATCH 24/37] correct mkl env var --- cmake/cblas.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 4e1ae7dc81..26306f9849 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -16,7 +16,7 @@ set(CBLAS_FOUND OFF) ## Find MKL First. -set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL") +set(MKL_ROOT $ENV{MKLROOT} CACHE PATH "Folder contains MKL") find_path(MKL_INCLUDE_DIR mkl.h PATHS ${MKL_ROOT}/include) From 66851af107c9eb3e6cea8485db467daf9b6cc078 Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 16 Jan 2017 10:44:51 +0800 Subject: [PATCH 25/37] Detect CentOS 7 in CMake --- cmake/system.cmake | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cmake/system.cmake b/cmake/system.cmake index 788db404eb..d40499c39d 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -21,6 +21,7 @@ ELSE(WIN32) SET(MACOS_VERSION ${VERSION}) SET(HOST_SYSTEM "macosx") ELSE(APPLE) + IF(EXISTS "/etc/issue") FILE(READ "/etc/issue" LINUX_ISSUE) IF(LINUX_ISSUE MATCHES "CentOS") @@ -31,6 +32,14 @@ ELSE(WIN32) SET(HOST_SYSTEM "ubuntu") ENDIF() ENDIF(EXISTS "/etc/issue") + + IF(EXISTS "/etc/redhat-release") + FILE(READ "/etc/redhat-release" LINUX_ISSUE) + IF(LINUX_ISSUE MATCHES "CentOS") + SET(HOST_SYSTEM "centos") + ENDIF() + ENDIF(EXISTS "/etc/redhat-release") + ENDIF(APPLE) ENDIF(WIN32) From 1e233171ff59decc3d179e30ab29e65735583747 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 16 Jan 2017 17:18:24 +0800 Subject: [PATCH 26/37] Fix merge error --- paddle/function/Function.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/function/Function.cpp b/paddle/function/Function.cpp index 2632c17e3a..f47d55a4ad 100644 --- a/paddle/function/Function.cpp +++ b/paddle/function/Function.cpp @@ -96,7 +96,8 @@ void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) { void BufferArgs::addArg(const Matrix& matrix, const IVector& vector, ArgType argType) { - args_.push_back(std::make_shared(matrix, vector, argType)); + _args_.push_back(new SequenceArg(matrix, vector, argType)); + addArg(*_args_.back()); } ClassRegistrar FunctionBase::funcRegistrar_; From 19cb2a7fb3e750eb2a81337bed063782977d7195 Mon Sep 17 00:00:00 2001 From: zhangruiqing01 Date: Mon, 16 Jan 2017 11:26:29 +0800 Subject: [PATCH 27/37] fix default value of cos_sim scale --- python/paddle/trainer/config_parser.py | 2 +- python/paddle/trainer_config_helpers/layers.py | 2 +- .../tests/configs/protostr/test_ntm_layers.protostr | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 674b5ac58b..8e9c40877b 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2628,7 +2628,7 @@ class AverageLayer(LayerBase): @config_layer('cos') class CosSimLayer(LayerBase): - def __init__(self, name, inputs, cos_scale=5, device=None): + def __init__(self, name, inputs, cos_scale=1, device=None): super(CosSimLayer, self).__init__( name, 'cos', 1, inputs=inputs, device=device) config_assert(len(self.inputs) == 2, 'CosSimLayer must have 2 inputs') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 9b6e5774bc..23c0e9174f 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1673,7 +1673,7 @@ def trans_layer(input, name=None, layer_attr=None): @wrap_name_default() @layer_support() -def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): +def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): """ Cosine Similarity Layer. The cosine similarity equation is here. diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr index b30bbb2a4e..c1bfdf1b19 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr @@ -79,7 +79,7 @@ layers { inputs { input_layer_name: "b" } - cos_scale: 5 + cos_scale: 1 } layers { name: "__cos_sim_1__" @@ -92,7 +92,7 @@ layers { inputs { input_layer_name: "c" } - cos_scale: 5 + cos_scale: 1 } layers { name: "__sum_to_one_norm_layer_0__" From f8c9c889c34dd3530b899fc12523579802d4f582 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 16 Jan 2017 21:30:44 +0800 Subject: [PATCH 28/37] Fix CrossMapNormalTest --- paddle/function/CrossMapNormalOp.cpp | 9 +++++++-- paddle/function/CrossMapNormalOpTest.cpp | 4 ---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/paddle/function/CrossMapNormalOp.cpp b/paddle/function/CrossMapNormalOp.cpp index 92980c503f..8e7dc72524 100644 --- a/paddle/function/CrossMapNormalOp.cpp +++ b/paddle/function/CrossMapNormalOp.cpp @@ -188,8 +188,13 @@ public: CHECK(inputs[0].shape() == inputs[3].shape()); CHECK(inputs[0].shape() == outputs[0].shape()); - // TODO(hedaoyuan): need support ASSIGN_TO mode. - CHECK_EQ(outputs[0].getArgType(), ADD_TO); + if (outputs[0].getArgType() != ADD_TO) { + // Currently, some algorithm implementations are ASSIGN_TO mode, + // if need to support the ADD_TO calculation, need to clear the output. + typename Tensor::Vector tmp( + outputs[0].shape().getElements(), outputs[0].data()); + tmp.zero(); + } size_t samples = inputs[0].shape()[0]; size_t channels = inputs[0].shape()[1]; diff --git a/paddle/function/CrossMapNormalOpTest.cpp b/paddle/function/CrossMapNormalOpTest.cpp index da196a699c..51f5da81bf 100644 --- a/paddle/function/CrossMapNormalOpTest.cpp +++ b/paddle/function/CrossMapNormalOpTest.cpp @@ -47,9 +47,6 @@ TEST(CrossMapNormal, real) { } } -#if 0 -// TODO(hedaoyuan): Now CrossMapNormalGrad not support ASSIGN_TO mode. -// Maybe all Function need support ASSIGN_TO mode. TEST(CrossMapNormalGrad, real) { for (size_t numSamples : {5, 32}) { for (size_t channels : {1, 5, 32}) { @@ -79,6 +76,5 @@ TEST(CrossMapNormalGrad, real) { } } } -#endif } // namespace paddle From 07787f72ba69dd0bbca4ee01f84c59fb34dc02c9 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Mon, 16 Jan 2017 09:19:05 -0800 Subject: [PATCH 29/37] clarify and fix problems in paddle on aws k8s (create cluster part) --- doc/howto/usage/k8s/k8s_aws_en.md | 138 ++++++++++++++++++++---------- 1 file changed, 93 insertions(+), 45 deletions(-) diff --git a/doc/howto/usage/k8s/k8s_aws_en.md b/doc/howto/usage/k8s/k8s_aws_en.md index c776ba9eb9..bd9eee7296 100644 --- a/doc/howto/usage/k8s/k8s_aws_en.md +++ b/doc/howto/usage/k8s/k8s_aws_en.md @@ -2,18 +2,18 @@ ## Create AWS Account and IAM Account -AWS account allow us to manage AWS from Web Console. Amazon AMI enable us to manage AWS from command line interface. +AWS account allow us to manage AWS from Web Console. Amazon IAM enable us to manage AWS from command line interface. -We need to create an AMI user with sufficient privilege to create kubernetes cluster on AWS. +We need to create an IAM user with sufficient privilege to create kubernetes cluster on AWS. To sign up an AWS account, please follow [this guide](http://docs.aws.amazon.com/lambda/latest/dg/setting-up.html). -To create users and user groups under an AWS account, please +To create IAM users and user groups under an AWS account, please follow [this guide](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html). -Please be aware that this tutorial needs the following privileges for the user in AMI: +Please be aware that this tutorial needs the following privileges for the user in IAM: - AmazonEC2FullAccess - AmazonS3FullAccess @@ -27,14 +27,6 @@ Please be aware that this tutorial needs the following privileges for the user i - AWSKeyManagementServicePowerUser -By the time we write this tutorial, we noticed that Chinese AWS users -might suffer from authentication problems when running this tutorial. -Our solution is that we create a VM instance with the default Amazon -AMI and in the same zone as our cluster runs, so we can SSH to this VM -instance as a tunneling server and control our cluster and jobs from -it. - - ## PaddlePaddle on AWS Here we will show you step by step on how to run PaddlePaddle training on AWS cluster. @@ -59,7 +51,7 @@ gpg2 --fingerprint FC8A365E ``` The correct key fingerprint is `18AD 5014 C99E F7E3 BA5F 6CE9 50BD D3E0 FC8A 365E` -Go to the [releases](https://github.com/coreos/kube-aws/releases) and download the latest release tarball and detached signature (.sig) for your architecture. +Go to the [releases](https://github.com/coreos/kube-aws/releases) and download release tarball (this tutorial is using v0.9.1) and detached signature (.sig) for your architecture. Validate the tarball's GPG signature: @@ -88,14 +80,22 @@ mv ${PLATFORM}/kube-aws /usr/local/bin [kubectl](https://kubernetes.io/docs/user-guide/kubectl-overview/) is a command line interface for running commands against Kubernetes clusters. -Go to the [releases](https://github.com/kubernetes/kubernetes/releases) and download the latest release tarball. - -Extract the tarball and then concate the kubernetes binaries directory into PATH: +Download `kubectl` from the Kubernetes release artifact site with the `curl` tool. ``` -export PATH=/platforms/linux/amd64:$PATH # The exact path depend on your platform +# OS X +curl -O https://storage.googleapis.com/kubernetes-release/release/"$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)"/bin/darwin/amd64/kubectl + +# Linux +curl -O https://storage.googleapis.com/kubernetes-release/release/"$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)"/bin/linux/amd64/kubectl ``` +Make the kubectl binary executable and move it to your PATH (e.g. `/usr/local/bin`): + +``` +chmod +x ./kubectl +sudo mv ./kubectl /usr/local/bin/kubectl +``` ### Configure AWS Credentials @@ -109,17 +109,18 @@ aws configure ``` -Fill in the required fields (You can get your AWS aceess key id and AWS secrete access key by following [this](http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html) instruction): +Fill in the required fields: ``` AWS Access Key ID: YOUR_ACCESS_KEY_ID AWS Secrete Access Key: YOUR_SECRETE_ACCESS_KEY -Default region name: us-west-2 +Default region name: us-west-1 Default output format: json - ``` +`YOUR_ACCESS_KEY_ID`, and `YOUR_SECRETE_ACCESS_KEY` is the IAM key and secret from [Create AWS Account and IAM Account](#create-aws-account-and-iam-account) + Verify that your credentials work by describing any instances you may already have running on your account: ``` @@ -134,7 +135,9 @@ The keypair that will authenticate SSH access to your EC2 instances. The public Follow [EC2 Keypair docs](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) to create a EC2 key pair -After creating a key pair, you will use the name you gave the keys to configure the cluster. Key pairs are only available to EC2 instances in the same region. +After creating a key pair, you will use the key pair name to configure the cluster. + +Key pairs are only available to EC2 instances in the same region. We are using us-west-1 in our tutorial, so make sure to creat key pairs in that region (N. California). #### KMS key @@ -143,12 +146,12 @@ Amazon KMS keys are used to encrypt and decrypt cluster TLS assets. If you alrea You can create a KMS key in the AWS console, or with the aws command line tool: ``` -$ aws kms --region=us-west-1 create-key --description="kube-aws assets" +aws kms --region=us-west-1 create-key --description="kube-aws assets" { "KeyMetadata": { "CreationDate": 1458235139.724, "KeyState": "Enabled", - "Arn": "arn:aws:kms:us-west-1:xxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx", + "Arn": "arn:aws:kms:us-west-1:aaaaaaaaaaaaa:key/xxxxxxxxxxxxxxxxxxx", "AWSAccountId": "xxxxxxxxxxxxx", "Enabled": true, "KeyUsage": "ENCRYPT_DECRYPT", @@ -158,11 +161,11 @@ $ aws kms --region=us-west-1 create-key --description="kube-aws assets" } ``` -You will use the `KeyMetadata.Arn` string to identify your KMS key in the init step. +We will need to use the value of `Arn` later. And then you need to add several inline policies in your user permission. -Go to AMI user page, click on `Add inline policy` button, and then select `Custom Policy` +Go to IAM user page, click on `Add inline policy` button, and then select `Custom Policy` paste into following inline policies: @@ -178,7 +181,7 @@ paste into following inline policies: "kms:Encrypt" ], "Resource": [ - "arn:aws:kms:*:xxxxxxxxx:key/*" + "arn:aws:kms:*:AWS_ACCOUNT_ID:key/*" ] }, { @@ -194,29 +197,37 @@ paste into following inline policies: "cloudformation:DescribeStackEvents" ], "Resource": [ - "arn:aws:cloudformation:us-west-1:xxxxxxxxx:stack/YOUR_CLUSTER_NAME/*" + "arn:aws:cloudformation:us-west-1:AWS_ACCOUNT_ID:stack/MY_CLUSTER_NAME/*" ] } ] } ``` +`AWS_ACCOUNT_ID`: You can get it from following command line: + +``` +aws sts get-caller-identity --output text --query Account +``` + +`MY_CLUSTER_NAME`: Pick a MY_CLUSTER_NAME that you like, you will use it later as well. #### External DNS name -When the cluster is created, the controller will expose the TLS-secured API on a public IP address. You will need to create an A record for the external DNS hostname you want to point to this IP address. You can find the API external IP address after the cluster is created by invoking kube-aws status. +When the cluster is created, the controller will expose the TLS-secured API on a DNS name. + +The A record of that DNS name needs to be point to the cluster ip address. + +We will need to use DNS name later in tutorial. If you don't already own one, you can choose any DNS name (e.g., `paddle`) and modify `/etc/hosts` to associate cluster ip with that DNS name. #### S3 bucket You need to create an S3 bucket before startup the Kubernetes cluster. -command (need to have a global unique name): +There are some bug in aws cli in creating S3 bucket, so let's use [web console](https://console.aws.amazon.com/s3/home?region=us-west-1). -``` -paddle aws s3api --region=us-west-1 create-bucket --bucket bucket-name -``` +Click on `Create Bucket`, fill in a unique BUCKET_NAME, and make sure region is us-west-1 (Northern California). -If you get an error message, try a different bucket name. The bucket name needs to be globally unique. #### Initialize an asset directory @@ -230,33 +241,44 @@ $ cd my-cluster Initialize the cluster CloudFormation stack with the KMS Arn, key pair name, and DNS name from the previous step: ``` -$ kube-aws init \ ---cluster-name=my-cluster-name \ ---external-dns-name=my-cluster-endpoint \ +kube-aws init \ +--cluster-name=MY_CLUSTER_NAME \ +--external-dns-name=MY_EXTERNAL_DNS_NAME \ --region=us-west-1 \ ---availability-zone=us-west-1c \ ---key-name=key-pair-name \ +--availability-zone=us-west-1a \ +--key-name=KEY_PAIR_NAME \ --kms-key-arn="arn:aws:kms:us-west-1:xxxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx" ``` -Here `us-west-1c` is used for parameter `--availability-zone`, but supported availability zone varies among AWS accounts. +`MY_CLUSTER_NAME`: the one you picked in [KMS key](#kms-key) + +`MY_EXTERNAL_DNS_NAME`: see [External DNS name](#external-dns-name) -Please check if `us-west-1c` is supported by `aws ec2 --region us-west-1 describe-availability-zones`, if not switch to other supported availability zone. (e.g., `us-west-1a`, or `us-west-1b`) +`KEY_PAIR_NAME`: see [EC2 key pair](#ec2-key-pair) + +`--kms-key-arn`: the "Arn" in [KMS key](#kms-key) + +Here `us-west-1a` is used for parameter `--availability-zone`, but supported availability zone varies among AWS accounts. + +Please check if `us-west-1a` is supported by `aws ec2 --region us-west-1 describe-availability-zones`, if not switch to other supported availability zone. (e.g., `us-west-1a`, or `us-west-1b`) + +Note: please don't use `us-west-1c`. Subnets can currently only be created in the following availability zones: us-west-1b, us-west-1a. There will now be a cluster.yaml file in the asset directory. This is the main configuration file for your cluster. + #### Render contents of the asset directory In the simplest case, you can have kube-aws generate both your TLS identities and certificate authority for you. ``` -$ kube-aws render credentials --generate-ca +kube-aws render credentials --generate-ca ``` The next command generates the default set of cluster assets in your asset directory. ``` -sh $ kube-aws render stack +kube-aws render stack ``` Here's what the directory structure looks like: @@ -292,15 +314,41 @@ These assets (templates and credentials) are used to create, update and interact #### Create the instances defined in the CloudFormation template -Now for the exciting part, creating your cluster (choose any ``): +Now let's create your cluster (choose any PREFIX for the command below): ``` -$ kube-aws up --s3-uri s3:/// +kube-aws up --s3-uri s3://BUCKET_NAME/PREFIX ``` +`BUCKET_NAME`: the bucket name that you used in [S3 bucket](#s3-bucket) + + #### Configure DNS -You can invoke `kube-aws status` to get the cluster API endpoint after cluster creation, if necessary. This command can take a while. And use command `dig` to check the load balancer hostname to get the ip address, use this ip to setup an A record for your external dns name. +You can invoke `kube-aws status` to get the cluster API endpoint after cluster creation. + +``` +$ kube-aws status +Cluster Name: paddle-cluster +Controller DNS Name: paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-1.elb.amazonaws.com +``` + +Use command `dig` to check the load balancer hostname to get the ip address. + +``` +$ dig paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-1.elb.amazonaws.com + +;; QUESTION SECTION: +;paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-1.elb.amazonaws.com. IN A + +;; ANSWER SECTION: +paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-1.elb.amazonaws.com. 59 IN A 54.241.164.52 +paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-1.elb.amazonaws.com. 59 IN A 54.67.102.112 +``` + +In the above output, both ip `54.241.164.52`, `54.67.102.112` will work. + +If you own a DNS name, set the A record to any of the above ip. Otherwise you can edit `/etc/hosts` to associate ip with the DNS name. #### Access the cluster From 50afa35a59de9e9c2a31e873ab650eb181de801d Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Mon, 16 Jan 2017 14:29:52 -0800 Subject: [PATCH 30/37] fixes according to comment --- doc/howto/usage/k8s/k8s_aws_en.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/howto/usage/k8s/k8s_aws_en.md b/doc/howto/usage/k8s/k8s_aws_en.md index bd9eee7296..00bc41e5c3 100644 --- a/doc/howto/usage/k8s/k8s_aws_en.md +++ b/doc/howto/usage/k8s/k8s_aws_en.md @@ -2,9 +2,7 @@ ## Create AWS Account and IAM Account -AWS account allow us to manage AWS from Web Console. Amazon IAM enable us to manage AWS from command line interface. - -We need to create an IAM user with sufficient privilege to create kubernetes cluster on AWS. +Under each AWS account, we can create multiple [IAM](http://docs.aws.amazon.com/IAM/latest/UserGuide/introduction.html) users. This allows us to grant some privileges to each IAM user and to create/operate AWS clusters as an IAM user. To sign up an AWS account, please follow @@ -51,7 +49,7 @@ gpg2 --fingerprint FC8A365E ``` The correct key fingerprint is `18AD 5014 C99E F7E3 BA5F 6CE9 50BD D3E0 FC8A 365E` -Go to the [releases](https://github.com/coreos/kube-aws/releases) and download release tarball (this tutorial is using v0.9.1) and detached signature (.sig) for your architecture. +We can download `kube-aws` from its [release page](https://github.com/coreos/kube-aws/releases). In this tutorial, we use version 0.9.1 Validate the tarball's GPG signature: @@ -224,7 +222,7 @@ We will need to use DNS name later in tutorial. If you don't already own one, yo You need to create an S3 bucket before startup the Kubernetes cluster. -There are some bug in aws cli in creating S3 bucket, so let's use [web console](https://console.aws.amazon.com/s3/home?region=us-west-1). +There are some bugs in aws cli in creating S3 bucket, so let's use the [Web console](https://console.aws.amazon.com/s3/home?region=us-west-1). Click on `Create Bucket`, fill in a unique BUCKET_NAME, and make sure region is us-west-1 (Northern California). From ae0f953eb03f021e5892bdea2009d7088a346e46 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 17 Jan 2017 03:10:53 +0000 Subject: [PATCH 31/37] add centos build doc --- .../build_and_install/build_from_source_en.md | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/doc/getstarted/build_and_install/build_from_source_en.md b/doc/getstarted/build_and_install/build_from_source_en.md index 1abd7b698b..924ccf0116 100644 --- a/doc/getstarted/build_and_install/build_from_source_en.md +++ b/doc/getstarted/build_and_install/build_from_source_en.md @@ -4,6 +4,8 @@ Installing from Sources * [1. Download and Setup](#download) * [2. Requirements](#requirements) * [3. Build on Ubuntu](#ubuntu) +* [4. Build on Centos](#centos) + ## Download and Setup You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle). @@ -151,3 +153,64 @@ export PATH=/bin:$PATH # install PaddlePaddle Python modules. sudo pip install /opt/paddle/share/wheels/*.whl ``` +## Build on Centos 7 + +### Install Dependencies + +- **CPU Dependencies** + + ```bash + # necessary + sudo yum update + sudo yum install -y epel-release + sudo yum install -y make cmake3 python-devel python-pip gcc-gfortran swig git + sudo pip install wheel numpy + sudo pip install 'protobuf>=3.0.0' + ``` + +- **GPU Dependencies (optional)** + + To build GPU version, you will need the following installed: + + 1. a CUDA-capable GPU + 2. A supported version of Linux with a gcc compiler and toolchain + 3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads) + 4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn) + + The CUDA development environment relies on tight integration with the host development environment, + including the host compiler and C runtime libraries, and is therefore only supported on + distribution versions that have been qualified for this CUDA Toolkit release. + + After downloading cuDNN library, issue the following commands: + + ```bash + sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local + sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn* + ``` + Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc. + + ```bash + export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH + export PATH=/usr/local/cuda/bin:$PATH + ``` + +### Build and Install + +As usual, the best option is to create build folder under paddle project directory. + +```bash +mkdir build && cd build +``` + +Finally, you can build and install PaddlePaddle: + +```bash +# you can add build option here, such as: +cmake3 .. -DCMAKE_INSTALL_PREFIX= +# please use sudo make install, if you want to install PaddlePaddle into the system +make -j `nproc` && make install +# set PaddlePaddle installation path in ~/.bashrc +export PATH=/bin:$PATH +# install PaddlePaddle Python modules. +sudo pip install /opt/paddle/share/wheels/*.whl +``` From ceb2d39799fa600ff77fdbe019191f846829e916 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 17 Jan 2017 12:49:05 +0800 Subject: [PATCH 32/37] Fix bugs in config_helpers unittest. * It gets wrong command line arguments before. --- .../tests/configs/run_tests.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh b/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh index a37eb6439e..c8a3b190b1 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh @@ -2,16 +2,18 @@ cd `dirname $0` set -e +PYTHON_EXEC=$1 +COMPARE_PROTO_UTIL=$2 protostr=`dirname $0`/protostr files=`ls $protostr | grep -v "unittest"` -./generate_protostr.sh $1 +./generate_protostr.sh ${PYTHON_EXEC} . ./file_list.sh -if [ -z $1 ]; then +if [ -z ${COMPARE_PROTO_UTIL} ]; then for file in $files do base_protostr=$protostr/$file @@ -22,20 +24,20 @@ if [ -z $1 ]; then else for file in ${configs[*]} do - if ! $1 $protostr/$file.protostr $protostr/$file.protostr.unittest; then + if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.unittest; then diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u fi - if ! $1 $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest; then + if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest; then diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u fi done for file in ${whole_configs[*]} do - if ! $1 $protostr/$file.protostr $protostr/$file.protostr.unittest --whole; then + if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.unittest --whole; then diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u fi - if ! $1 $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest --whole; then + if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest --whole; then diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u fi done From a5c1658d455008bbe4e4a5a3075bbd6eced30f28 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 17 Jan 2017 17:20:13 +0800 Subject: [PATCH 33/37] Always create protobuf_equal * Because currently Paddle only use protobuf 3. --- .../tests/CMakeLists.txt | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt index 403aafabe9..93dd7796c2 100644 --- a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt +++ b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt @@ -9,17 +9,10 @@ add_test(NAME test_reset_hook ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) -if (PROTOBUF_3) - add_paddle_exe(protobuf_equal - ProtobufEqualMain.cpp) - add_test(NAME test_layerHelpers - COMMAND - ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} - ${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal - ) -else() - add_test(NAME test_layerHelpers - COMMAND - ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} - ) -endif() +add_paddle_exe(protobuf_equal + ProtobufEqualMain.cpp) +add_test(NAME test_layerHelpers + COMMAND + ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} + ${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal +) From 1c5a7c431690ba6a981ac9f72aacf556da1bc1db Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Tue, 17 Jan 2017 17:33:15 +0800 Subject: [PATCH 34/37] follow comments --- paddle/function/BufferArg.h | 1 + paddle/function/FunctionTest.cpp | 1 + paddle/function/FunctionTest.h | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 9c792c6bb7..84209265ce 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -192,6 +192,7 @@ public: SequenceIdArg(const TensorShape& shape, ArgType argType = UNSPECIFIED) : BufferArg(VALUE_TYPE_INT32, shape, argType) { CHECK_EQ(shape_.ndims(), (size_t)1); + CHECK_GT(shape_[0], 1); numSeqs_ = shape_[0] - 1; } diff --git a/paddle/function/FunctionTest.cpp b/paddle/function/FunctionTest.cpp index 03c609b524..fdf7e631e5 100644 --- a/paddle/function/FunctionTest.cpp +++ b/paddle/function/FunctionTest.cpp @@ -85,6 +85,7 @@ void testBufferArgs(const BufferArgs& inputs, } void testBufferArgs(const BufferArgs& inputs, const CheckBufferArg& check) { + EXPECT_EQ(inputs.size(), 1); check(inputs[0]); } diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h index 412e3a7d1b..24e7a36a43 100644 --- a/paddle/function/FunctionTest.h +++ b/paddle/function/FunctionTest.h @@ -172,7 +172,7 @@ protected: void initArg(SequenceIdArg& arg, size_t batchSize) { size_t numSeqs = arg.numSeqs(); - int* buf = (int*)arg.data(); + int* buf = reinterpret_cast(arg.data()); int pos = 0; size_t maxLen = 2 * batchSize / numSeqs; for (int i = 0; i < (int)numSeqs; ++i) { From 878b321a128bd405e0cb66efc00e800ea03d0fad Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Tue, 17 Jan 2017 11:59:26 -0800 Subject: [PATCH 35/37] changes wording for paddle on k8s tutorial --- doc/howto/usage/k8s/k8s_aws_en.md | 92 ++++++++---------- .../usage/k8s/src/pserver_and_trainer.png | Bin 0 -> 71688 bytes 2 files changed, 41 insertions(+), 51 deletions(-) create mode 100644 doc/howto/usage/k8s/src/pserver_and_trainer.png diff --git a/doc/howto/usage/k8s/k8s_aws_en.md b/doc/howto/usage/k8s/k8s_aws_en.md index 00bc41e5c3..10f5a2ef2f 100644 --- a/doc/howto/usage/k8s/k8s_aws_en.md +++ b/doc/howto/usage/k8s/k8s_aws_en.md @@ -361,20 +361,9 @@ ip-10-0-0-xx.us-west-1.compute.internal Ready,SchedulingDisabled 5m ``` -### Setup PaddlePaddle Environment on AWS +### Setup Elastic File System for Cluster -Now, we've created a cluster with following network capability: - -1. All Kubernetes nodes can communicate with each other. - -1. All Docker containers on Kubernetes nodes can communicate with each other. - -1. All Kubernetes nodes can communicate with all Docker containers on Kubernetes nodes. - -1. All other traffic loads from outside of Kubernetes nodes cannot reach to the Docker containers on Kubernetes nodes except for creating the services for containers. - - -For sharing the training data across all the Kubernetes nodes, we use EFS (Elastic File System) in AWS. Ceph might be a better solution, but it requires high version of Linux kernel that might not be stable enough at this moment. We haven't automated the EFS setup at this moment, so please do the following steps: +Training data is usually served on a distributed filesystem, we use Elastic File System (EFS) on AWS. Ceph might be a better solution, but it requires high version of Linux kernel that might not be stable enough at this moment. We haven't automated the EFS setup at this moment, so please do the following steps: 1. Make sure you added AmazonElasticFileSystemFullAccess policy in your group. @@ -391,57 +380,71 @@ For sharing the training data across all the Kubernetes nodes, we use EFS (Elast
![](src/efs_mount.png)
-Before starting the training, you should place your user config and divided training data onto EFS. When the training start, each task will copy related files from EFS into container, and it will also write the training results back onto EFS, we will show you how to place the data later in this article. +We will place user config and divided training data onto EFS. Training task will cache related files by copying them from EFS into container. It will also write the training results back onto EFS. We will show you how to place the data later in this article. + + + +### Core Concepts of PaddlePaddle Training on AWS +Now we've already setup a 3 nodes distributed Kubernetes cluster, and on each node we've attached the EFS volume. In this training demo, we will create three Kubernetes pods and schedule them on three nodes. Each pod contains a PaddlePaddle container. When container gets created, it will start parameter server (pserver) and trainer process, load the training data from EFS volume and start the distributed training task. +#### Distributed Training Job -###Core Concept of PaddlePaddle Training on AWS +Distributed training job is represented by a [kubernetes job](https://kubernetes.io/docs/user-guide/jobs/#what-is-a-job). -Now we've already setup a 3 nodes distributed Kubernetes cluster, and on each node we've attached the EFS volume, in this training demo, we will create three Kubernetes pod and scheduling them on 3 node. Each pod contains a PaddlePaddle container. When container gets created, it will start pserver and trainer process, load the training data from EFS volume and start the distributed training task. +Kubernetes job is described by a job config file. The file contains lots of configuration information. For example, PaddlePaddle's node number, `paddle pserver` open port number, the network card info etc. These information are passed into container for `pserver` and `trainer` to use as environment variables. -####Use Kubernetes Job +In one distributed training job, we will: -We use Kubernetes job to represent one time of distributed training. After the job get finished, Kubernetes will destroy job container and release all related resources. +1. Upload the pre-divided training data and configuration file onto EFS volume. +1. Create and submit the Kubernetes job config to the Kubernetes cluster to start the training job. -We can write a yaml file to describe the Kubernetes job. The file contains lots of configuration information, for example PaddlePaddle's node number, `paddle pserver` open port number, the network card info etc., these information are passed into container for processes to use as environment variables. +#### Parameter Server and Trainer -In one time of distributed training, user will confirm the PaddlePaddle node number first. And then upload the pre-divided training data and configuration file onth EFS volume. And then create the Kubernetes job yaml file; submit to the Kubernetes cluster to start the training job. +There are two roles in a PaddlePaddle cluster: `parameter server` and `trainer`. Each parameter server process maintains a shard of the global model. Each trainer has its local copy of the model, and uses its local data to update the model. During the training process, trainers send model updates to parameter servers, parameter servers are responsible for aggregating these updates, so that trainers can synchronize their local copy with the global model. -####Create PaddlePaddle Node +
![Model is partitioned into two shards. Managed by two parameter servers respectively.](src/pserver_and_trainer.png)
-After Kubernetes master gets the request, it will parse the yaml file and create several pods (defined by PaddlePaddle's node number), Kubernetes will allocate these pods onto cluster's node. A pod represents a PaddlePaddle node, when pod is successfully allocated onto one physical/virtual machine, Kubernetes will startup the container in the pod, and this container will use the environment variables in yaml file and start up `paddle pserver` and `paddle trainer` processes. +In order to communicate with pserver, trainer needs to know the ip address of each pserver. In kubernetes it's better to use a service discovery mechanism (e.g., DNS hostname) rather than static ip address, since any pserver's pod may be killed and a new pod could be schduled onto another node of different ip address. We will improve paddlepaddle's service discovery ability. For now we will use static ip. +Parameter server and trainer are packaged into a same docker image. They will run once pod is scheduled by kubernetes job. -####Start up Training +#### Trainer ID -After container gets started, it starts up the distributed training by using scripts. We know `paddle train` process need to know other node's ip address and it's own trainer_id, since PaddlePaddle currently don't have the ability to do the service discovery, so in the start up script, each node will use job pod's name to query all to pod info from Kubernetes apiserver (apiserver's endpoint is an environment variable in container by default). +Trainer id is the index of trainer within all trainers of a job. Trainer needs this information to do things like reading the correct shared of data. -With pod information, we can assign each pod a unique trainer_id. Here we sort all the pods by pod's ip, and assign the index to each PaddlePaddle node as it's trainer_id. The workflow of starting up the script is as follows: +#### Training -1. Query the api server to get pod information, and assign the trainer_id by sorting the ip. +After container gets started, it starts up the distributed training by using scripts. Each node will use job pod's name to query Kubernetes apiserver for information of all pods in current job. + +From pods information, script knows static ip addresses of pservers. And assign trainer it's own `trainer_id`. The workflow of the script is as follows: + +1. Query the api server to get pod information, and assign the `trainer_id` by sorting the ip. 1. Copy the training data from EFS sharing volume into container. -1. Parse the `paddle pserver` and 'paddle trainer' startup parameters from environment variables, and then start up the processes. -1. PaddlePaddle will automatically write the result onto the PaddlePaddle node with trainer_id:0, we set the output path to be the EFS volume to save the result data. +1. Parse the `paddle pserver` and `paddle trainer` startup parameters from environment variables, and then start up the processes. +1. Trainer with `train_id` 0 will automatically write results onto EFS volume. -###Start PaddlePaddle Training Demo on AWS +### Start PaddlePaddle Training Demo on AWS Now we'll start a PaddlePaddle training demo on AWS, steps are as follows: 1. Build PaddlePaddle Docker image. 1. Divide the training data file and upload it onto the EFS sharing volume. -1. Create the training job yaml file, and start up the job. +1. Create the training job config file, and start up the job. 1. Check the result after training. -####Build PaddlePaddle Docker Image +#### Build PaddlePaddle Docker Image -PaddlePaddle docker image need to provide the runtime environment for `paddle pserver` and `paddle train`, so the container use this image should have two main function: +PaddlePaddle docker image need to provide the runtime environment for `pserver` and `trainer`, so the container use this image should have two main function: 1. Copy the training data into container. -1. Generate the startup parameter for `paddle pserver` and `paddle train` process, and startup the training. +1. Generate the startup parameter for `pserver` and `trainer` process, and startup the training. + +We need to create a new image since official `paddledev/paddle:cpu-latest` only have PaddlePaddle binary, but lack of the above functionalities. -Since official `paddledev/paddle:cpu-latest` have already included the PaddlePaddle binary, but lack of the above functionalities, so we will create the startup script based on this image, to achieve the work above. the detailed Dockerfile is as follows: +Dockerfile for creating the new image is as follows: ``` FROM paddledev/paddle:cpu-latest @@ -530,7 +533,7 @@ And then push the built image onto docker registry. docker push your_repo/paddle:mypaddle ``` -####Upload Training Data File +#### Upload Training Data File Here we will use PaddlePaddle's official recommendation demo as the content for this training, we put the training data file into a directory named by job name, which located in EFS sharing volume, the tree structure for the directory looks like: @@ -550,7 +553,7 @@ efs The `paddle-cluster-job` directory is the job name for this training, this training includes 3 PaddlePaddle node, we store the pre-divided data under `paddle-cluster-job/data` directory, directory 0, 1, 2 each represent 3 nodes' trainer_id. the training data in in recommendation directory, the training results and logs will be in the output directory. -####Create Kubernetes Job +#### Create Kubernetes Job Kubernetes use yaml file to describe job details, and then use command line tool to create the job in Kubernetes cluster. @@ -632,7 +635,7 @@ After we execute the above command, Kubernetes will create 3 pods and then pull -####Check Training Results +#### Check Training Results During the training, we can see the logs and models on EFS sharing volume, the output directory contains the training results. (Caution: node_0, node_1, node_2 directories represents PaddlePaddle node and train_id, not the Kubernetes node) @@ -689,7 +692,7 @@ I1116 09:10:18.019836 50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7 It'll take around 8 hours to finish this PaddlePaddle recommendation training demo on three 2 core 8 GB EC2 machine (m3.large). -###Kubernetes Cluster Tear Down +### Kubernetes Cluster Tear Down If you want to tear down the whole Kubernetes cluster, make sure to *delete* the EFS volume first (otherwise, you will get stucked on following steps), and then use the following command: @@ -700,16 +703,3 @@ kube-aws destroy It's an async call, it might take 5 min to tear down the whole cluster. If you created any Kubernetes Services of type LoadBalancer, you must delete these first, as the CloudFormation cannot be fully destroyed if any externally-managed resources still exist. - - - -## For Experts with Kubernetes and AWS - -Sometimes we might need to create or manage the cluster on AWS manually with limited privileges, so here we will explain more on what’s going on with the Kubernetes setup script. - -### Some Presumptions - -* Instances run on CoreOS, the official IAM. -* Kubernetes node use instance storage, no EBS get mounted. Etcd is running on additional node. -* For networking, we use Flannel network at this moment, we will use Calico solution later on. -* When you create a service with Type=LoadBalancer, Kubernetes will create and ELB, and create a security group for the ELB. diff --git a/doc/howto/usage/k8s/src/pserver_and_trainer.png b/doc/howto/usage/k8s/src/pserver_and_trainer.png new file mode 100644 index 0000000000000000000000000000000000000000..f41fe48920590333ad332bb51eb18e03dc251541 GIT binary patch literal 71688 zcmeFZWmFwq)&__sK!RI>I|NN|hv4q+65QQ2xVt-n-~@MvV8MdBTX1)nx{&VvzV4n` zYu5akwPxN`Z{;iwlT8pUKfq|jegZ>9!GQ@WVZkRMtP_b8$lwjAlvY^%dVx?z5 z>ug~S+zkfC>C6s%v@o#OC2+Pdx3pt-<|2B&gB|z`x=cqz@O+EC85fa?qznPSm8}5* z6D=d{M7c0H?guL0M)CjXXRkeMMMN@=%4?+^wZwN z@Lw%i+Wmbj-~s7C-_U)c{YdxEx`CpcpsVcsRuYyWEJ<$f7s zD|;(o6l_iOMJ??OY=Oe|x}e^1)Bi2~e}2cm*ClOhVgNk#bLmfiOaISnfA8m{13mcv z7>Jiqe!dC}GdCP3-9M*{8}7$C{UR6`FPNynX9Z{Q{q)z)Xo{EtZT2yv>N_pOxStgi z_H*=<&oQZ(_u&=LuqhowaGSg-LPDV9^C2iDU9GaOFbKXKc@5(lRyEC*F@hk`{z^!ffRIiqJBO`XsOMOvpi zY~==p2M)m-Dk2PQ+Mjs6cWzbCdymg~h+LQ-f@Fo?djKAvpqY9M3Zu8Dr^a~Zhi)() z6o)i~u$a!ppGgsEX>Fo|YR=!aDw$AXtl(vDNZlc3AJz!l9kQKSE74I1d`8KTaOW2F zk3VK81d_OGag2$qfe_8Y_Be>je1i6^C9&HqhS$iC#eVphuz^Ted@KbJ$|FXtMJSI9k>V2&K+M5 zX`q_5j7U2yR6Q-LZUWoS9Hf;-88p)pU<_pRU}PF(_3}e-FZg*qegfs&q6bhqh0$a5e`acu;Sc5u{W|XMUAqSy>84*XY7YhdK#|Zuql^ew?N}7dO zahVV&XFL3~9m5{Ge>Fysb@bXpdq!RU{DBWBO%9Y^FiKkIKmnEZ!`q7iN^^pHzJGi3 zLGSe&f-J;JQy(;{*KN({V4<@SM9-yn?!F$u3rn`FzaDhxIC+AAib4b){>EFE9yW^C z87a$_m~(5aFn3f??J4kLRQvl4v=o7i$jZm(hwt=^Jbvi#f+q(adBsn4>s_cAFz~Yd z6?j15U;^Gvq2MGk$4W4W`%@(_0z-}%@9jiRVThG1TuJXe-`ww+Uq=D~({XSAm<%OC zzzGZ%GTzzFhAc2pdTnaT_u>#FSy1~|{tsf5Br8x5mrqR(fBZh#g^?gpgWq}Rw!Ko1 z+!XN!if+IHMNj3s5r9XI`i}#O2t5LiJK+RA&U7Y`=RRW(Va-dUv(#RYlfStU97)oUkj;B;S zKB!y9$j2Idc@Do1z+C9ESKwfP>c0kNqw6&cov1gI7_-k}lav1Ta5}Jz=w=~xkARCn z|3dtM;neHs6Z*e1Dn7XqyrpkK^mL<=lY?fT5eUi2$(M=PmnJ4A+|SMiM;{Fh3^o%z1a@B|;855;_^Mzu=qizbqgaywJ@1g_ znD$>$tsnvQp4A5!E2$)dh?9C$=i`poV-0pjZ{O2*Q!wB;uYgyZO(Pf##C@KvG#D$C zCtvV-dYGv(Q*QOpY_$Jy*tkclP_Bh*V`EdGR#W6+G4q4)_F`{aS=V~jATjw4@b>f7 zcO9?svs9}L^Oq8}J$G9ZJwHF4k4sC=c6SRXl&BzY4kij0%H>&)31aH5bq60-UiJ^M zESnFQWl%ESULJ%-;Bus#7%$q5aAA&rm#z!d2i(=pRB5V-@=^AK*Y#>3WQ3|@@KMW^ z7E3W4I_1j!*+_5J=dV#QF>99Uz2Dt0M%gJLU%j>dzRQfR{hq^-nzs3vkA##o*=3Y# z)@jLwoJ=xhLw!Qy_26$S12WL`dV>}kux`$HI>i5JA=@ZHG1-aHx%oI28zPotIsQ~= zo4#*otwoXtp5_DZUT>9<{Jv`-heT{X+KxTY5x?9Se30Vq zAD&){exyFA?7dT5eode8h3(uU`v}n^bLY;V4`Nlydku|;-SZVuG&OPn&A@D_QroS?t^k!+Fk_5p*1`n=S+NBn)yKM`EPMD zI%ei6(gf17o>0tGPm`@>ucw0|`R@_^Ja_w5EpxT;*hTiEjy)*dr0S{?i*1j$gB05K zB}J?<{-2*7A2jun_2x!x^+;C_udYwlTjrMPZE}n4cYl*Tt%cG#22X~VT&;9`r49C7 zE0WC(=$D5Mq%^8-dCy> zW8u>=_3wiN0hn$sQg8w1KI%KS3JGS#Y|4(s78DUC(n zZXWNB79Gw>-X&xthAW8Zl}0fEO_6EGyBht)=Fs7%TdvA-d|*PA!kEeM@V z=>6(;Ll*`57wn)L+XrA(G=^^I_7VibrlyZK!*yNTsk6El-?x-Ggtrpkkh$*j(F4UR>m$$QHtbcL}nC!J-s+s!k z?^X}Da(8vW%jPNjuRC`Hs@#<4vc?0BtSggC1JjnruYtB+LjyDIHoDIy*1HO3bUUDG zD>avE7;2(7K;`50fj_4+?}JJ~kOA~Y#jmv9-d^k{WUJe=Rxz@N>HW>RI_RliEV7ld;2R&@y)A`O=*Y(`7#P}Jrg}tyJY2Bh@ zs>dHsbaCYMCw^2;k8tz3YS>zyD8RwYYA&69`~3CQ&iMC8QIyS4Bc*n`3zr-z(MhkL zqU7ltbtSiFNYF6AjPclAQ^nzndl@7Pfu_X=##*y4ugifj5qjh$pukBJG#)Fed0p`P zQ%kol>XPs}?M9W}5S=on9!#VQG^gtbrE5(hUuO7Bjl)T|xUILl%B}qU`(t-zrBDf# z#F(ESR39_iuQaxCZN(rS#8}#S`>@}FwfEOF+sXCTt9*7J+Kjj6%P$t8G5|~x#rqL3I%oN{#+9M3$}e%V0~Npu&Pvpx5W*6?waifFUGlJ5+GUCj zx}mICJxQPR2|Pb$!GTGey{=(|K_g@0=?Pe@4KP$R3;rOV&r2LxP}yxMJ&!;Fy=k_1 zKX6?Ig`A1M~NOCfi$>~Dxdbtm4V{TfR!fXMzXqt1d zH-*xYsfAB9_M@88G-ILGVm`~VSltj#&Tb=)S?GkV$1df+%COiHh<@O z@yDR`dVbT_<-%yI_@dT))_0JuZNdEOE)bw**K*Y9IR)eKd*#drVJRj&QMp7SS5HWkcNlNg?m^lpO;DXLG6mVdjrQcU7r-d>th!z91}R0pab3lN4~3>!wc z?$l>47sJukzKO$ay5cjNF zGQA$(D{DHSS6g^lT$gqWfGmxI2#DO!U)TS~2xe`*dU`Fgl7u=+gM8HX#9dicwRp8u zYk@t?b)}(Sn5Be0!hKiLFCPl0UYM-}XKJwe0f;Rvvr2kiuvbG@)O$w@k4)wx8C>r^ z>G>%mUv$`joPH3Yqmb@=-5&boMZB`sCq?f2WBvuFWe*W?aXXd@w<}A|r~9)*V{+4> zwGXwKVF3DZnH;Cb)UXC0W?g@~^JuXckUOkuAsb+~_3P8sc(uF~TaUSZ#o5hsgb)q| ze5`N3ttLehviLtybDRc*?C%SD+ljc`aGZV`wd$y&f^72BygAYbyIr%M5cHvjYfI~N z$2k*P*^l)C!MeY@E7JDTot93p+EcAsO-`#llj#jK)_5l-Vr2@*S~Qy+X>C|dhkK;` z_kN4hIZR{H-IIA-AFm!Vj-bk#*6-8Lc@8GfNaDxmUJB-<)eDTR{dQ#19yb^aFv-#H zjUbcc>v3h?<<4X@iRqtq)iYU@y5eNpPP3Cp2E}!T*|a?m!7=^Qo`wSP-dO-tI;GTd z=(KxtUUdKI@zAEp4ZNrSLrYF*32(dyh7rzj#TrvZrxq+G(|Ts}nU%6!a^~;% zDz50nks`h62Ma1#7nR=P{^i85ridMJw@wC zljt_)@D-DcQ!kOx(Klm6NNFp~KT3)+qB~`jy0gsKdYMvQfA&+F1$RH~C!}KheC$CQ^J}(Lm~GsYP>bud7(H1t!2C zL#+3M&j5U~WQaU02scD+$0qJU0NDJ2ccWgk-2{p#jaesxr37Y&-`p3+U~{xSgQqXC zvGLSEQ;J`_(do#t*2&l}Vp`j4(OwGIffjJc0-sG58*8+Mvomr5KEm?IQfswr*;zdW zfI|lCKgJdSR5P9|iichP6aT*3Ur0)p2Mx>j!VgvIBR3CcaUrTy_{{GDS>llmecQ5m zk%{u40Cz>eUpz5Bk=u)8faYzt=7l$oUEDm5-9$9_IOa$3O{$uIt=Ec+Q27>p?RP-94`pk#5Y!L56nRP z4S)m~=o3Q4>zDrb#(8^QF1sDITmx{o<*>;4lub_baLdm!4QfMl{md*~`!_sdzM(Su-kZ02sjlP9a+9IFDJ z{^Ym|F}3*V@eUCX!CeME`n(vNJ;>l1?Yc6e!SW@NeZ^@TpbQ2R{JHH($;eX9sLJX) zH=y|#9rslwx^Ul+2^rW5aevRqU_geG|t?z-KN7P?%`U zA-b8Tvpd)xEH+ISN3gmv+^qF4k=|u%=6thBEC)EL@@%IDz%6^K&-SZEF!I%cDczj$ zV*JZn>CPXrk5E0@WJCxSUOfEK_61Gd!rJc(p&Y|a^{tkxB?dDA35hT&E-tQRTT+^@ zdwlg?=Qi#y#zmUWJHy=_;!;Y*&OXPfqv^EhP$rqm+>n{u9q1=cO(Bs%3G0sEisyd( zIaV2fSBaD`fI?*SCuH-hcyn7Sr(#$TBAipIs~#20{_W;Kyj}35pV{$BN7=^4muY|; zefgv1d9$f?vHwPqw~r@Iwhx4~q#XN~4C>^BE*Y8}6I`Q%1 z+(*8)sD5vD03Nq^%VfT2XJ@&st#F%Gr4N;oMeKGyq?ocnWR`wU59X{+-r#8*-#t#EQ22(@>Xslup`DxkG0nq}dXS35fFFr5v{D)T;X^ z1QlFP0ESvW&L#cDEX1VA)Ia-`u?xGa`w9E(bp#$M-0Pwx+#iP^Gk*X!G?X2DTdQ-J>V z8GV@c&nZPQ{q*4}Z42g0o=Wvpq!kS%OT<<$!aUiFGxs5W?y~73f%C7E(?PmB526$f zr!sNA$p#?O9u6lSJLE@PZ&-i7$=JC>oTn7v>}{&D7=%|ODL@Y>4_aqWyk7oz5Q)V! zz%oyYwiC#Y@{h;Gj*hj{R(vRmq|z=; z8EwiAIHsW%4TQSK``BQb9-a=Pw<|S&;@8)<{#QxgbD{s>(cr?Z*Eea{z zGig}0!Va4bhqL)S2%jxM=yOck?%)HYDX4tQGOb!~^2Oth4AbR$=K*FfE8`%pN2q4$ zcNB;x49Esi;()tvpmr95AQa@y30(n#NNDLj zm+}ixj1Q#wVrZE1iP3lzES^f4)|xYiqF+AW@nFv7_rBlRFb5*wX}YFL)x(*B0TzC< zm1^~C51wT~N9%wGh=5y+%*+^%)ROzQ8RbA={kUE+KA|U&(>;^u{ybp4C@YORk`K{k zZyEzMT{h#BxIG$8WM~kUo!8+hw1M;nz;~Yy97G7+hd1eM+5wEe_5dEa!wx|A1q@^^ zw@Z6WbZB2u<Y~xqAh*vV8gjo)JzVG!vGlQ21DW=-{W9ApzmmeV=>X2#Zv5Pp00m>^zWhw zOu+8lX9!{;0#;lE6j9`QiwmF^__)K|-yOBR<8oy@pOhz&gQacUA<`eSx!wJqN?o(? zqvtYS_ipLOxQF)S?=^tKc7>o*xmUq8CsF?P=D_0Biu$Oh^~onIE(+v(6zljbq~*4*dpWrSlCPwK)T|^ovrF z10XDzv0&Z0G|3gf?=T{8O!ljK+IB!=CY2;5y>mi#@XIF}lOVuAKeQh~JxSF~^IWnY zmo%gbHPZUHU8+{Y*W`E*m5~vF?|I$PLfX?>XM`=8!sz1{&m_7Qyu>i*QHGe>*dlnB zJ8wbp;^tj|*w8ol?Z<~Tc}PGh;Fpt&nf-0TQAKOc@sD1mfO<9LqFat5#haT`U|fw1 z8yf;-0bGzprKEn6XS(bC1`$oC3`)*(H4jZ=Y(TcJf3m-wSAu|0f#kPsYXfZdD zuAPKB@2cMyMHppWO;#x$h=6;~d@?{)o^+1LbC2YDR9(&J#AbsRvYqL*ggd)ArEcZ0 zed6~5HxdElt$`uPh0W>wqtJC)(+PSrv8domm+Kimy}(CmpC3!=G-B|HV4nhoKJU)e z6pPn>1IX!0NJt3%?qspD6OCr$gqia3>E@tjC2qH-@?e_FR`O5sfJH69UL+VXMnXDn z7e?9V>#UrDdxGxpuxLo5I04bOtglZ*UMnrMvHP@Z@|j&;nzeg_Ye@nsBxVGkrnQxW zE{~WBcNn1S0UGSpnH;hE>|RtE8#Hd?a6qiJCQPn08^Q~4CZLA(yWlS|pSErR7GN0w zd^TNR#3dx`MZcgV?qgrx?K=S2M>&(*LqpGXPf^)juio|Q@IWrmv{mjMDE0*AoN623 zVKA9ne5N-TKuZ*GI*|)8_Nc(K0f->Iv;!r}DFH0I4`PMxjZ5!mXyIoqy)837mR$N$ z4bkBN)*e7^3)m5P&%_}PM!-+bXa*C7f&|nYTV$grG zFQu~71Ykmy$}hpL)-3`N>9*hcqA7)IanMj;jDYp(o1|Ix<})u`6_EyJr#T-R4ltK( zP|f_<*EF4g#Nx9qN=OmbAc{!7<^Ynq|5ooy!wQyY3GW6$TGoFYerN#Q<1=NS0U-B3V$Huk_;jLV+2;P} zR>}Bp+=}2EDE%lm-ZA{2^#WJW5_s)(seW(*g6s*h^W)9-C)e|xHM1xhTyzM)!FJ0_ zb2%u?8RmleydnzlWAgA(uI)yo7w`_tng~#{;dHkMPAC}no6SU(MpJZ|VkMaK0L^1@ zopqrdPT7XlL_j7$ zU>1bvKA#0qxpMjBE|N;vk&qK#*t-yV%}+(s+YVDn8Q z($NFJ9;qMa$w5}siMsMf)!DG*GlU@7OEMqudz3685~njt6M-t}bZ%xL!^F?NBIGOJ z_13bS_vcp{t@o49BAyK#39uwMqHcE-3PXrng5r5OdV2aWXx>jOBU=#=;OPwMbm=UV z%n=Ye%%Ttoz;=M_jR#0;O$rygpLl?20s{_+5?&E(ok&Dki2S!^k$9%i?Q{c#jBMbI zIcML_3`TIbX~3urfyYJ*0ovzVg-h<#z~nQeoIwW7u{9v^cZK_slUVP=9E+b;=xgFQ zo!+0y;v7@};xRZ12+?#TiWqC#X~G=b=pF%6kR}R(o$8pr z-dx(3W`sqR6rj0-BGK_Gn<)o>!@%$bX6DSGEkL68Z;KQ_mvPunaG3ww!g>RG9s@Eq zg#WUztp7h#(#JS3BDD)(3dbZ-$*^CHAZ2BFCBlrP#zq+Ez0+_A)VK}LS(R)Vf7+JZ z4&Q~@v$@(KSfe|s;8jFs&YRi5LgZpy?DFc zBw!Ov#3$>e2q?aob_PiAacO8Rmxk-c=_};op}GKc?bMEHd&ZwAzdTd}Kz^(NWb3*5p%v^3!!1Sx?yeaduZv94i-C>KI~R2^@}sLHV5!7KdhO}$5PKZaAfOOHJ1`ar%aWX};C;zIq!Lq{dbtgT0iyS# z-{++0b!!m?y?>Jx0Hs)O07cj$&}1W+mknwnK#UbrFz)WBBs$M7MZf< zr(3L>jXG@~t2S_@@>jpdOKh>m@?ygZw`-<~pXMZ9a5(Dd4#^(5ySJwJWA~CGLv(`| zCW;v||0Zf<_KD(%&bW4_{#o@{!Q3hlWBqk@mWxID4KDN31Jh->x_7lEHZra@hQXg& zp&rBUIgf&fM3l7(?Le0*?u)@^s?6CD%+)9jh-LDnsN$^biLkV8-{}o1oU3aSXPVy_ zy=9iTg}Gy){Ves9P@a~Ls8PYx<<+0`>DTdJ%+X$!^ZJ@SK zwAl*3!$&MZH}GYfg1GntE{*{yDNunFh7%L`b6N}B^Y&AnJs*U%jOWdrRfNO+CunEg zs7Xi5!i6e+J+9N0hlEZ+dGUV8@=J$Zl)a?e?*b++lD8xSDAKZml5N61yW_U<7fV=L zN_%FvZBGz%;!lkk3F-kxFUA$X>*z|k)n@m`@fYq9847x0(AU(2iTFlI;u0D{i97oU_-c#1C zqtH;u(L;?&j}D2ec}muAIn)h3o*r9O?3*JO z#ktVd4pVM2ohg}3(`{l^OgO#$iY}sUbXaPKZovnIxqTq9G4wA76SMY;pMux;;`X^K zPa>ry#SR#(1k%}6O(19HC@hS2EOYN5>N;W zGs>!%0H#YJ!5?N{lT99tjRl)3#-XiEcrzdsOB&=D+S#CX?7GA!e3-jF`Oe5%#7I%nUS(r3moBk{N<>zSUE1`$})E) z^DC6yjZu}Jgv^rId6G$`#57j#_0X_gan&1%Or4tElXIU`-IPczSH16F4>U>8E}VYg zuCCvOk1Ly8pdbfUC0}@KxzP4^PvI{TI1`CK8?~2DikkST85I6#UwgloHksAR4S|n8 z4d=*SCG{A(X;rLbihbE2yTS@UH&)95wv`_w$CHUuKd@M8shRKLPxIBgi--rbBc2!) znG3$w7l|fl!+YhW)Pd0+>TZVp5^l&=7qx7i)V2`UwHaHz)gE}qiTgX}hBSyr zwhf!I(cx!RM*KkA^8gq_?0LFPhqXx@pvp)B4TyflLnJ1mzjn}FysrDTZBZD)6b>|C zkzh88$JC@{RT)#$j6YUN0`JPXMlSsJT0mEKTKj6ALn}h8xSCr%j`BgKC6r?CLF9{l zn&?nMDTDCZ%7pZ|1VHpbfD|0S`xM}@-Lx^hK2Q_3{CR^Pri$Cz8U=w|dB;G?q*6bv zCIrV@u9j3qsobRfndrKU*l(@dj;PBh+iCiF1^AH%^R{+Y7zV!%oCc)rFYLxEyHCH= zn18)7e0v=66u7m}$@#paa2rKLG{fqZcEix>hzbvdeP{^#$y+6apmK!xft)We8y^`9 z*L(6c5(GYp&J(L6dVI4ju4=ALz5bG`W>oPOO_m4Gq=}`WN|EsQ&MCP7`0t7IgxGi! zoY1mNRvfCIoa<8Cu+Qkn`wT{a!>SMG6RaPh&rl%#W0CS3&U%vmI&?xGakWEcfX#BZ;yW%JiOw)GY(}(m^A)i*R9<5JFh)Iyt0ODet;$n zJ!BE@IZf0~2!(JEHKHGVEmq0&$vdt{;Z37I={vUiNjXGH(#1!B~w+WjNBDUY96}MckR|pCZp4%ofc&Sv%pD-oHS(&0%)*? z9UtdeQmt!K^2OpP51O#?i46Xvfh``=UkznA+Wc*SHH;U0nH@*yYZoby72=JDWHJrP z>v5gMAd29E3>G4T6{;RmI~AXut7&&NVTp3yDu(D(t~I}qJZZh$ud?W=u=|ewN~x~M zeh22b&gx0OE^c#)}7meEJ?wJC9bb=Qojs)5?g)d?&FqF*od7zK|eWy$c(H zrRrwvo`;J}36r^K$CB32iwQ;4rq=ym|VvSKAD;3DciVDX^|PFWF{o!20DpqB-N~@00Zl>HtU{T zU2H;G$dg__AU+rG$O8K3!V?(a+&G{3EF0tX9wei!EB^-iHYgNCEBM(@(zTbH#CN1$ ziY>8Wl~9V+=VdQTu32>Axdxw@Z%nhZ#W1vITkI)8wN{#qp;3uX`CoUzg)I)|WA_9Y z8lPio_XOmdv8uj|2hdrWhSi0J?Y^#H-bnG%v4X0hw(r}x$wH_%#?DLUydNCV;x-4p zm%YxkZKul?kbW%Eo#KZJmVWfa*=}nUV!!b$iFevkZHN$`gcHn@c%LOly0>suZj>69 zuqGZj4Y(x)a+$Mp?6*KJNYe7_Z4_(r77WiJOTHQqed}x6shrC7EC;ipY9f#i53yOi3@7 z>VAg~OvG|fa+F{nlsdbV{{_zO+y34Nj7Ck3-=i7-qb%MRdBh|6DGTC}w}3(oF_7>E zPk4IVI|8@#l#Vk~`E!LHoAosAl*ih-4+m+bsOM=BCvW5f^Yr_I1Yo0MixZ@}!F-6C z#R8edtXz4kL#-Fj$45+#N!P`y*UpzFEBJefpJNN6)&44fmJp)2w0sS(aDQFg>$2$V zg;n&4RdG{Ie%k50vg)l>-3-A}+8_mBDE6@Su7-+cop?M!$|hh9);8nvG5yuF67i%3mmvYqcyzCAW}mL~Jp1 zXuQq#`<|%KfYkdD+?fWQ91gBMq|&>EkIv6=2!W8maRV?XpC1Gtj|B&%dj`*z{;n9#tN%mKGt0 zVnAxK?V6jUdNz(t7BWECQm*C@lqb|Rh}WBfuN_E51<8=XgBV`66KSgV@cD(SSib_L zp!GA@#K@6kywLFn5+Af!RN$a$0z^c?Qy8Hw6X>2IuU=o`EJbQ$>sbp_4@GiU%#~0g z7TyWS(yQaR6zLJm#$kO=gA2ofPjhsAJSar}d4~^?E*+D(**)HRyCWinF|Q|U%4h}N ze8p|u6Tft!_~Gm)QUwm%n3y&d;xE&}3QOG4ET3u?oE;BrYoWTX8#4p*oR8Trn71Cb zKs#g@k}ON>>Q2ySR7yWDL_36wLcWd(>)aU)06-z27(=0YS#Op7Q&=^^2fr3#ahehd*S9p-b~A}okR0K8RXMCHZ&dN;Y} z=kzL9H^oTgJc+=--mDx7HU$JrHA8A9z^8r?i`v?mcv7x7cYX`3joQLiNx2p?uXv{C>v zoX>@+E}(y%IH~YA6e;rS=~>P@^>wXr+92I-fVE#1MyIMDCsVRa1rcQzaY9Wa^#oGN zz!5t>$l34`+el!d%3E7p%vQL>D%O7P(rCkRlyrzIS~WJCu9XkDs@Ya7AU*?hSyqKI zjrV}i8c$ITtM@MV$us-I%WE;>0;o_A{c>P@)f%H7VzahDE;tR=ucz?`6%`&JxT)@+ z#lJm&9Mn}-5H5_a$dd?C;V5O*{?0W88=r${ zqe&rEU>*u$=OI4c_x57IfKr@5B?R#a=SIY#a*b>TmJ;Y~g3BVRGn<2LjntKyqRS%7 zlyQzqc|K4Tq2;wwpa~R5J>gh2;l~cmUzz@Et*mY{OtaWsE#n; z7|}vi+D9HNs)w!&6-*l0Wfm5 zZ)-kGnAH~7auva}L^kA&Ds_z4Bt+_RfL=xZK0Fo7V$ON{EaX{XAov9%ptH#`lKLsW zaQf9Iw&OWgrVy#>rHghIZCv3v+$Q$m)aQTXzv6U&>f=`W^`}MKC3(~BYtcjDE{trZ z584H}N<_%>#K5V#Kc@!RY7!$%D;zfA2YqY8ee>!G#JG5!QDpG3(Ytf_9#oB?VpBBB zF}={g_hUpr?Q6GzsXyMldd*D3^wnza%*Cd+!-0_V36+%+gnO+K07WE?VH(GT6;)<1 zoUqFq^A)iFL}X^ts-(;ASIr)n^!e*K^=xpiDrc!@W`d*zHE|^vDBRk!7J0;=tHUDk zD8g6nxxv)uX9_=dx|NNxq?pDp|5twjUZ?^DYd>j%CTR#u2-CciW(pQOXRzx#E7;UV zMVycjdD)9{+3&QYIsSEs2u^~3`K=$ZPeDCYb?Y8KW6LGY;VFNplk#Yl zQse<95#5nJ*B|vk`3o%T{y9w-DOd)oF^ATHfucC;SN$U(;2kqhj4@3^4%2fC! zLV?iLz50^YkF$nY&{6Fqk6Hvtt^a;}2Pv1kPvAH_|3yaxiUPBA4EyEkzieP?-vu$r zm!f_UK5uQ@Q(_?n28CnrgejAgc^#RN(4o7#aZJogUNU*sVXzy|T3UEe!8-vV{F`lO zDWz8$v#P(fs;e>G!i2l#0eKbhe>iUdZJHG$&IJ(&dF@bm&4{d;e8`^1S}V34Tiguo zrbqFKUrRe|Uq*vaKxf!YS#Ql^vchyfwC8uR|IxDUocif(SI=)e?7vQm3uMeoX+MNt z%KY2eAM`#Jp-#oK|*9d5(8MO|_1W^fF<&!JF;jrYbOMjp}YZ@`UN-6H1ucLl_bu&<sWm+{tu9;P&w@G)aZd|6u>FlhWLzSf^RY@&E!Nk4kweY}>;a&=hg z`P^QT+@4hK1AXSh<>uXZ;Td*}=2+d>b)}fs|6U=TKOubfROf-cF(sCC*jYx{? zELIfNN4pntFcEUT79u#$CtuR%^1uh%5@K!bv>3KT1w8B?LZ!01u`R)T2B!X;e&v+v zwA^T0BKd9D9k>eR#lfdtj}dlk){0ookn)$Y3(?SBhiGscSk3SE?7o#G@8QWqH(1q3CFaNtGVJU9Wn0#ulmQOoERJas z$_!mwCylDPs(?fq*js0@&MZUlland$X;Mg6V~UnDIpWL&Y7K|8Pwmi1nPNBIdqORY1Fzp2DoR$rmGd^-TE`v zz26TtTI#s+!@b-Cl$@FavB>2c-=DWL^ol;hz;P;nbDp_$966UhSF-DoSWhu;%c+W zb?_+%bf|5q9Av8O<1@t_*0L>LAdhV z0f|}q4h8FyiM@=m$gzI3j{TWL0ojElFb_+=lMP#A1mdV@@U%HaG)y{3cq|n-KY^S0 z!&PDM8b=vfgP&2lyz{lZ3UM`(f+OWe2Ajn(MC85f(`}9E8%DAO1?R1jaRd(_MKF=- zwT2_x@zQIhrujO^Tp?pAKz+|}Sjx(#pxiEuKl&+$U#loSeecqqqLP;R!wAC^C}!yrkdmB@7Wp0YGbBY*;wejYoJlWhe}S zkiGS7%7rVm>4ARfu2MxSEUrX{?MvJ(9Gv{4> zW0ygs+?i3oncwKLP4h#nC$JSWu@__>BZefAi6d5ssuu^mc8ZOLcJ{7o7pBKAPNGm4s8rV$sAjaJH|4Lm zJg6q}mpy>e);LUyIOa2`GNJH_I?f?#OcO;GdG>7Y3)ChVrrE6)W?Uy}D2qrcHV2~< zPP0s7HkG@iuG}q_AnNM_AF%_gs9kMMuqW(AV>BI=qzV=fZ!;GcWJ2 zrIwu-=+0cw@pbF@$8Q1HmFInvrHV2D5bN_k;%1nFo&YsLXR&H?X;Bdr()u7vwhxLG z0YTHO?fC{XE%rqX zMQ$BjmOXyy<>k?16~^wH@)@FsXMl1{6_Q}CA1v2877T2!i6v9!(T`&Uh(D${QU%_( zmng{^09KdBYFNZJ`hz?x;azP^r_&GEoE4cu#X;AL2eZ5FV@c&b*!X{Fikrvsxdr<( zgwlhG)NSvSEVAa4FIAVOw}xoUq9n0f&L+qM#n5GcQfqxlzq?cC#~WZV2YXCLD9~5< zD}4|GjTo4!i@|A=)INnf&q}RH@4GK^D#{cnSb8|}24;)m(j}DAY4bjKeLI--l|wID zU)e%G+$u_UR{3x!)z9@oxed2kJFvEVNHVVKp>k4?RBCRHiu?0v>@}rBbfX2lB3;-Wk^-|bGgpo$Ix7k9qzek&c%j*M zYy$7V;xCwfXhAK)t-U@|7po0cFxwa&Go~n75Lu+f@E`)JjpRk4i`H_F#|AJii=w?* z$ZQ5m0eHVWdEr0uN=V0TIc!GvI3EC{>wL6tyeEI(7$flELzY>_W>ekGmr^kIK3lRs z%!1nVbA_kJN@K>2!yh#R3p0f9&KyEbuH1s_+FFNVrx3L)V-Bhwznh|B@4d3C;cfJ1 zSla%H97wPDJ}Z3u5 z%IMg8F=t!G6zb^VD<~pllGQ|wJB}cQ5P+%evZPsXuh2n-FR7pAzX%KT@OtPJI9n>t z)?6dTZXJzer(+Aa#O!5u(dK5GE1EB3J2oEeO(ieuk59-#Q|h?youoWtyR0uD+VEur z-wpTyh(vE(Xu;g*`^VHLw*!>Hw;WLBSD__4q-vF-s5ZwRhWQk^A7(<_^)y|ca8DYZ zn%^|4Z+hM--rJa;mg#FtroQ4ld3U_PprnAwk^UE&3H$Sc#u%C>&-E+3UA<&o zVbMaHKQ&~o!5Gx)-_Zv`s$}As*0mU-rGNR4WNv5&+zu7nh)K)a4)W+nH4@K$PjG0^ zU$RPinhIB8yr&hlzM<)Qe&LJ2=a>Wl&(6bC!h#Sz{8@V@yuA@h>U5ll_pHNBi|A_$ z2W1@I6AHY;3cC^CH74SU7=GGAGu2{&Es{rMIq4+^i<1 zS_f98wI8ux4LKS_6<+TAn@XF1yl6kxnEa{5hKU|=i~Z|xzXT|}>OOQMHvgHy#Hy|3 ztC9~t3Rk7m^5dd1no&hbpYr!G16~Tn^pI4$`I$b%jNt_S52D5eUP1UBH2J(>5Yn8` zoJ)u@d*MDK;9UV~ghq9i;LMiQltm09-!q{p6)!Cd2K(DBa+2OXw)&Z(qi))bKw$MP z?fF6JBvX*n_poWG!;U8=SZcaWC){y^bKZW8N~1nlFs;#U!~3Dud~w8f&#R$&$aXBa zoYMS4$yvpq)GqM?T==cL)vTx=k1b2yJ)jK)DGH^SliLUjARA?rT6!VcY{oXb%I00L z89UFet2``<19~D6*;QfWZ>Ch@Mg^07@r>8XO0Nc3c74-+F@6iG>{M1;;)Jpb)G+HD z7!(+{3;MJlMtuO0QTVt9bOA%C-yi5s329xJZmJpPi_j@>w*v<5gL zdiv8lsEZfOiR32DG@0hlw#H2C(%bLfrBiSWTu)Teq8798B^o^m98|dy*P5e$*`E(s z6n~-*35QWtHqG#aWJgRgzTK=Zy4Cn<%S zqm&p=#KltyCKHN9H|EsK7zpbM#ABQG$bLq{Wio||XqQWJgp5CMLI2a&47>n@5Z@#A z&3v{}&pF1MBE1eN9(W5(uRn(LkM0Eu8x)q@MpO$wD^8~JFa-%x22`h>=1e?#r2Ok4 z>Q4-DfO4fGN6oL{|7VS}I)nO6NBJ8pCWEJ2L7BWDcB;uBYX9i-mie$VSw;JT_20b6 zDsbbLS(W2>dwybLf%UXdzTMBi*};kcRZp|Y%DYV?nk9frp^Rs2#O6nJ&dP{mUaaI_ zwpErjujO9XTF>C}5<#swVY(9vgJ$w8;VWQrfcHJZrKtHNi$g5kP8M(FHh!u^zqmr6 zt~A$X0mS(9|E5)-zzBnQ{CbJ)UnS(l9{-+KKtC$NYFx>t@SiHj|0_$5Az)Dj0`Hqf z;rr~H6EKVq&?r*2$z`Wz#6o`g61VUeNUM4o24w8Bp)}ZL`}!qVaw9{;DQwU*Dro^B zC4S~Hk#%MNBgiBAfdP89UpvW^g$>2=B`^`F00Rmo9`gJ{GXMM4 zH2{I~#Q@tf2-#@%p9PH1m0?+?(P(gd`@7fI(|Upy;3ek1^b=Cf6s`2gA-sjAxE z+T#6xP<7T}QEp$nhhcz0q(P)pN)$yphenW48WfQbM7pG5Ko}Y%M5H^UyGvS9TDqjA zr0ecc&;8x|Jo69d@yzUZzk9E})_1M_Swb_M`DQcG(XC^U^4TO&nPab1&qv?y$vL@L z{j%$JK0-hH=TxxqAQJMQ0Mmr$SgWHwR2zrAa?lWA_$#iotm}$v#9G$Le}#VJcJ8Y&exSR=X_`e0!$Q!cd(oGwi!?ChwsT=*O(vgVdB$5iQC++ zyR*@%W>5c^`TZutVn~1kSOqd##xSSPeqRXk!8T+=z%kL zI`n0EzNMKcr`}|#M{e7DN_I4E|Ki@$*kD+1 zQyg3??VP6yH2F2Z8PWLe?&rB(N}wu+=)P3J@P8p;|>zVMe z`21|0{XJerM(@&fJxxyRN(*%SfYQaoaql7-!Nt4g z==&KWIsG58&GkU)Z$#Z-5vmwx_tsh%`OobUgN`T|jim&mBJU^?BAVc}6Yeh3w@8qr zFWHzvOg!AtN;sJacZtT+eYMT?wb<}?l;#ZE<;{0-wRzenrp<5GE=H``^ok0)tFE{7 z`Dn^s){x#8eioRI`!J9!WVADtrd$5X1a?&Tnf;g=DZd_mkA0Jz5u~Dw14~ivLy2eW zxzopUUMWua&nAyYPF$DY5^s^kvfT*7m=H4JtPJGN1mz|(E+YS2Wh}5I5*`jvXGXzt z7(wx`dEDObGI(qhy2dkVAt(^uZtt@c%X4x!Qe#fL2lG?RDDU^{!I5Fj7zF5elmRhq z=-~2;4Euf<8^f}DwTM~ceaneRme;M}@;WmL-}j_b#_^hQ3cSB(A}-GnD&m#-@SlAc zAG>@VsdlgphI<{JR_1`D)<%NceN~lwsq%X<*=Kq|cp9I6*I)~rQ>Vz{8BA#!(J8@x;6dP{IgbQ0UtGeKW^Sx18xSYH3*+rn{H)e3z$~AH!W+wT<)7w`q9R z_5!plwVYTq!(48W#lMpozRr!2wKvYp3Lsvf%D;G>lCbkNuY@c0QtD;Rrim$a>vYRF zWxH(qt?YEq<8#%*p}>I0Y^*(@4cZ4wn`zUxgBtE$wKVcxr%FaeO*?K!(b#+Of?=GL z<~V&PV|$U#&QT%IXFT9_tf^Q63bUfVBufAVkfBlG1@(^#BH^E5o!k`R92WobzWtzz z{SP|D5rPo3&wuJ9OtNM!3i}MLaH`H?1^G%N<8M7%IxaC(y2U#7CB=j?{#Hg$hwia! zy}-*?tla2}bWJL?NzMGN8XPh%J(O}>B16V`JWFtK^rhlbI)>vanku~ss#^}#r-rH* zS&d_jFdC0N**wEJ9WCy>Q{(b@)Yz?0<0ItfeJiDI%&(77C)h4avFRjFRK&=hA@Ft) z!jE+GX*QDFvP)*Q#rH6_boaGrSNw0?*K4{bbVS8kr=9P}Q29h=vcpao-%6id??w4+ z@-Mza>3iJj>#4UGw`nhkUFq~3HWi1H28>W{eWPh`bmEFuP06q0cP4rY*yS~=YHeDD zMp{VS_gnfG$Qmn`Zx2R<8e)aH8lT-OYmg;-P^MoynAlJ9_k1-)LANLldY=jfdh7D$ z_`T=AblQe+XzbHAoHbUb)EyrY4iImAZPm1J&PsMm%AA&0DXXlGzH7SjF$lv!PhO@b zJlpVVnN@!DQy^|(?Z?(~D>K&T0AlWA?(fae(2Y=3_1Z8*u zFa-i+W+p83r2(5L!Cypk=S+(tpG+T&&=Th_JXOd0c#V)zF}ahoM&2? z)aB#T8#TGL?d_UY<>q*63wP_@eLMOTGp6}TiOR#ip{T;^iaLgcRc}RE{A*KdyVVNy zWRK*Ezmo4C12!*1zurdMLGE`}`f02RBQM!K)Iv#6kCR16>e2xCQpWg23bf>nS3Fz! z6WlS|YJ2Z&RV$fvdqP1Sn*_nj$10Nuj7{=a=@x#1)i)zu&m7Fk zqs>bTzG@j0IKkVI03)&qDoj8w0Tb7I^vgZdG$&7W(i9?z4q$6Z3vK6p|7@lvVLg)_u4dWn{mp!}*Z5vc~!19=y0K`12@ zM9bgCq9_E4Z36`uW|pj27`YWBhkiEQ&k4u#igeXxf0p-bw$b(D@pR8(T>oEL5!@c6 zpEE}7>sNB~k&1{%{b*NLdSUC$UiRLJ!uRCTH!K`R*iz<F{+)51mQqvutQE!&3uqhpiYpo;0$qd>(mOg#o-vbUCROAg#$ zG8q|qL^&({O-uenIadcbKX<90z-bV9)_9`o4sW|u#L?$Gjr^QE)-m4-2g*td61yB* zf+*@n0Fn-~HkgEez^16Be0x9(d3Kp|hoI_hyMxQY&*lP)8A^fVn!lc*A>t_!Z(Ef= zEr&TKsN!KOSv(nh@ATrL`?54Kqs#)k+tkf4;wEox5C`8#k?aALe$4q{?|8DDr;=>@ zo#pOlO?2QIOfb{%ia@#1izN2}XlTMe?t;xN^l#f0WgWdkk?<2@)t9%Mf-!hSpQ$x; z>Zp-ozMICW=jl>-SCb#WKAU;ND|10x<31@e*&Jt4rKH3Rh)!-kG{K?K>Z*&?(f4d$bo|T-FUUIu5PG$$?hV2KKH7*>H36q zsPcuCI*C^h?7HG2JI}{>3F|I^8(f?4sORgx%#q)d45T@8j5|5#8hEuTvBHbwPkk*k4U2j?y|m8KeS;l&1U2dW;KJlf+$Vp6FgcO{Qe+C*=f~08E z3g{|NS6V^IF})tAh&l&f)W8f=1WbnQsJX}}qNPlk|_jvI7i zFZK|&SvvIARX5`ya=ge*)j6g<()8MP^lsQ*l1IyNY@g>x3;k$Ua&nBy`)HMFzulU_ z-9NhD}zV1;0+v?C0vSKpM2I{#C;hfhlfB-N5!P^PEwCS?65)qit}+`=(48OZ+)%5s@lj zVMdB0snKzqNmS=3ce*iVSm`iGJh3CA3clXYTreoACa!)Swn(TEnX}T?MWlH$K3SHp z`RyZS;X2N=ee(26QW~!(B|Y~(y{xVq%Vjb;eZp8+Z!Y8cbftG|Qj==*3x)?>lqKJy za~QbmXKV5X{4NNR<|q@-!aFx?iwrlLj7$`}ZrD$tfe=3t6s2#MSye=ZtyRpPdbHUs z5=M3sJX(19j_Y)|oIFY!-Lb!cM5)N7pG~N;kJglv^Qe%sxhPV_n)nNE+v9;JE#JA$ z46~(Czy2%JEACmQ^qcgjFVht7JgZsw8XAk*jfB6TZCMHb)fF|4N;b-*ms0y7msO7h8!>mSRraY3jKH%j?lV^LnKfy!4BL;^TGb zOqzSn3*j9P{GjfG&!cdur-f!OgE)j}56e=Y>Z>E{$}>R)QA}JQi{ZUejkHo`9P#s< z=QZ4U>bZ-KX1W0VL6keB9~M<^F-ffWkyD4YioA?+O{J9GE(bYjAqW(g{{KgD5#cz< zz?U6%vU>2!0Hj-Gb8n13%`@yvw<}8Mby)IZ5;ehAXIOAc&X+p31t}VW~N& zTtJcpxK;bWBuG1jl)iTLITdHX#Gx5*B^?Q>rlf@B@qV_!4PW^#$vuiJ?Xn}D-u-gLn zvy|fZMFEhC-rsC$`bH==t#fnfnNaOn=&n!i5HJ|1PxC7vSM0w0gJSVvR!x<}ya0ro zwb_O`Xc`fAoxYE+!uxZhA-l9hkE5khwNHq)V_Z4o^ic}Ym{_kdN zGYTan26ujp@()@Fb9BnGp4yCzbv;0rPWgQ-r$zvq-pKnXiy5?d%#@fvZCS~mXG#evEQz);O8cPO?|&LE3S0Qn%G z!R`2`-bV`Xi+7Tn0SJIn$zSpm9Z%bbKYsT>2tN_p>f>!Ctaj%$ARyUZUmi={tK28i2*^`2 z3~iMD8=He*j7b$hN)#th5r_@*Ul$@K0vZKhy_*3@q9;ir6o8}4Z#4YUiq3V7 z;aDI&EzRbjkKd@p2lE7=T86TesOm3|rp$hnvx7nS)!47re%lB4_*0AtRWmyPd1)v` z0^`7QptrXzN=58VYs(oZmu}D2bq1^goQ>#-+)_~ak$+}I`0O3CFd2I1Ur*2`8aC*0 z{S@-;^<#Pq7^tXlUcHA3^c}~s#q-N~=G)1=yI@}mU#NK~yYt`DE}nC1xX8lFYSQ3iX>u^{j>xatsO?;qW)lH zHPT_pi7e>Zs8It}${qmMz1kcr$WhOYd}{uCb9}%;LJfbzO-dYBUA^RK;d4Qw5~3&3 zchuP1uC_iMQ#svFcpvR}@S)_LFmWC)eA3ktT==OY)T^{pZRk^0LDkuj(|^eETEO@v zL~;J^)l#%d#x~|e(<`F1rEd%+vPSBg4iupHy7@&IE9#g)ofH9JeIeE!ghN4e*!pQP zz6cPY5hIea=c->qeX+g2xIt`fZ*@~#_mEsaYA#HiHIBWH-f;~n-$ARt;83m1f9Hu) zYd&5`;|bn=%9H-X*cluWmfLUfHSFBMe>vIfq)QFM!oGRi8<4H$-o`89v^Ez;!nKKE zvbnDaro$4_6ndX!D!jTl*%MfJPqMW|eDFEO2~APli!?qvbbrHs?xp`RF`F-;0xR3DQAN#=-D0gtYvA@(Dxsfq`VQ z&O>i{7CTwHep|#l<$eNbaNS^ z38}J$0jA4g$Q*&yTPN%UwsCzrfwAMcxXZJ*4xLJS4FXoze&qTO#2(Dpe8sB-7+GP! z>Pn>d`-&vs7c=Mv^G+;BubU0x;iFlDflv|^k)apS&yqKPDwp074|mwFziyW*XvaL( zi@7DIcPiAEm_#WzbC4J_n_am*8)q^+S6|j#YWbX}Z>*ovI_5LAIoU>~LuKM#KU&#L z9cP*5BjJnwfqbX)gM=Ls*-d?w3C~rmgqgdZ=Oda*Efo?h|2;b)cy=@a-8+y*rU%4Q zsn0yZg@G8d63B2k>I69Al4eza9I2J>snq^h1R`zwUTwvDLfS}AuYxU+G6 zxoWF1G=J7Mc01v#uO-pLUBEyBv(sn|qb$MDOlgBp(|bG?UeLJk^+ zDEKLdd#v$`#0_$^b3X z3L2!PNF8%4MFY&%n zP6X@9C8E~v-apsQ>_8f5l9tafSbPgQ?f+~@ap!?_rsiP5N2QZNCyVDs6;p{u6q(-y zr?!4nwm+`3D`WCG`e8$2H_R1$Ldy+|1?s~V01x!hG*5&Dc*CTh82rXYyqJckUF>-C zz2<6oX7kMJI#fqT6SH!X3#gm(G#6ETOiW(^LU?@*7>0LQsqXvTtlR6^$(6YGtDCP5 zlCN)@zWosB4SO%=tsHuxRtvCVb5=DgA0s3rzbXofR+`Ujc1z-xx-YW!^WB8$)=TH6 zOmSWC4PK3JJ3Q_`>*>RC7tXBO3$~p!rcAOS z#emuMgTgf7RXo4-5jz=~9<-H-Fh3hBk@DuxZ*SrNZ<>=HNtxy02wlEmx-ae)dZgDXP5k)e++e)Zk141Arob!mKl4e55P>dFclk

f~#`&?YC< zqt_t^-dLQ}Q}*QpdpReJ5lHU@N*5dH%K-o(w>(-yTJ$Ie3_-uxnvs8rqfun?N(*lE z7}bfc8<)MZ+`m>qc2(Dfzgx60j^;2exPQ)N=($!KksS4Ja9xV<{?K}n-^pMhl|xHQ zd&N$>@-1}AqI|HC-=cV|!a~c+epfeJT#1wcHg1wL#{&P#JmIA)J z_DWCx7G#UAUu#;ScC#PrfgzhfMznN@> zvHg^N^}bvmDorl}N!V~aG%ve919_B5T27uNP;vh2Avm3yHRY(^IIX^Tef~>tP+ylE z3jx^IdfPuL1Yc}PT3p?Gf8(K;i_P=^#^9HTjsi5FUUFyvIB(Ql4gkc~CCV ziw6789$PxazbZQ=>PHnbnVkoSFDB~+w*Q&I=Z1}WuPAnWqp zYP^XAqdNY%IV&SNS|LuBMtak#{LgaTRV6RefICTv9evy~G?(5a9z;BpQ zFI6ayu<*$u7WmneIK|blVlKDEd8rW5x)4=-Np2 z4MPV-C(J3G0$twszvI5pr+O2GTI!sJKe}mhurC<4lzM2QqJ8LlgU+?uWCfykdaR|H zs=o<$RD#)C_N>6Z9#>;{Wb| zRURH~zYagBp>(>VpXX%iuBa0UOKIIQ$-x_;F`SQjoAQP*Np{Nsyw+UFW=Hd03IEBi z!Q7Ul$4IFy7ObK#oVpO)Cbv;O&XaEn8u?bpqjSRK(N+cX3+=EZxVeO}rK4jk4nb@M>U#V6oAaU?;(p=S`trdX3XtCnQqG zZ3e|d?ZFU)?7+$c=Qugo1JxcPRqVC9Ym#gHYfSRPT@at%GN_Lr?=J}}DhXx3(YGS|+RuQNH+@q}0JUSE8#EYJyAe>HG(lEmmVL&fr2kPGaIZVP3CgX6@T zw^IK$0ZaxEeI!2&*#V2a3wD9NLEWQ)n+=!CO&FVemfXUHMr+&CyjwH2Z@7-dJvx$` zwW$}G0~!QRIAUU4gm~~5F_v!2TQ{Rid~|>~m_Nt2mDf-*BWs&uL#Cx5Y_mk}BUN;W zaEvV{z1Ou(1#-c*kVwB#ZrHxPtOa=(L_r^|d8{4gjcc{}TE#(gYr3|jv9{D|H`MN) z$xax{3+wI*>6gHlMI9~lT!=-N>rpT8N*M{!5oC9JJfts`3svv0RJpXC*uw8oF6*BMbpnecfJQNs8FIxmSe#06M^Ov% zO|_Vc!RdSl%H6jy~3wD>FxR{7%o?zl8y?LU|;3MHZb%pgY) zs8TBj4x*pks6|`kfze6HNpTawWK-y~JK$R>Nt7))Z{o6H<(5-ppZTqmqOyVTB%~Y$ zbcRX5$+noO?ZPPi$(#k+{-Nv*5lMtSlcDhsiN6yBLzHMc5x21u%SvX$pC93lP;g*@ zm#d6-iS<1Q4!HaFJ!OEWzj_Y(3T+qG{jzhlyGboDxb`~&^}@ViZOOp+@0sQ^{h5bk z*hOG#62`-;jswC(nq+ts!_kI;w|v|+a5NX1gpOqb>i{M_E-uccXG2I(Kx74=Ir)y3 zQOH``lc0a^%oDYw==3rup?_bEI%;q? z;+=>QX(XTcEI(ULRTiw_uwxR*<3Tn*^hu~ngNaGb5}#EM*q4Jh&2!x5z+8(-vKLOqQ*X3bgzxXX&!<713+1= z5+v_4F)=MVl>5Rma1e=Xop%W_!jf>5d`=u5;2BzkRYvrlqYOG0;}jl)r@4X_JR(yZ z0rW%^v^G>Z#6S~}NzWIMbXx&T0``^vUMMYU&agEy*&(Mecc6-m0a;*ks;bB~NZ`j~ zR@<>BVA-R=+|g)oDv*uu1-|r1uNeh=GoEEqjZj;%GS*VY+Zm0sBb9?}1$NI;S-FQ^ zAW?0CNK|>Q?fwOPgu-AuwuNOrBStj9dXvTH`o+#IvT-*?a@Fj|z)j@mGtcbJ)`DTn zgeOFE%Kks#OsQ%qbSW=WhEwK?q)Q|}J^&NQe6s>S+8laYo9Eg_tz)SgNRyWP%iT@@ zeL4k-ET0u<_Z0ovkvGAK{VXdldPtqIIi9@Vav=Wm({x!uoqWt?j|6+}8&G=+0XR4n zw{gIX^HOMkadcU~&^Mq$FIc6i{5b*wzB~-OfUheorF3;z)$s;&KDo7>hn25E-X&xbrOYVUEW#PiX zn@qSiQIb9b5Uoe2tI2v`D;z|-f~FTv{er6kYAyNznd>sPPWSh5(}1l?U%MIOGk^oA zz8xs$XRl9d6;HW=GTwbtQped7Gx-~Dx}@n1At4=ZTqA@Vczf5_DVUSVTayfTDbWRa zgE~~)g1tonm|nAzQzf&$!&wgmZZ`Ths0JZnB+NUv&Q$(d2{xz#sfp?KT?nIra@DYD zvd38Kx}ie09Kh`#7#eYfAN2RvJ-K#4qHSU?<(MQf7L(PSa<$D;kxTMRZnf;ut;wvt zGGk%JO-4&@6M(Mq<*<|!Q^N54cn)yrnZNUKCBHQ~r%CFP+R!E0 z@yPuU(EKZ3ef8G?NbqgYBO^qFVf(LZSWFHU+L2@$CL@gjjJt zLJGfR&hxx)J!TKUnEgahI_eITT(8P1(O%6JG@GNCn?>Qx`7D1dL+Ksqiltc6$ZVO4 zEvNXhzdh)begu2|`gNB5unrAA{xL)AWr!Y2v`slw)<|x{EWlQAB*KCK zyyl}~`7{2i7Yn8W<9awDSCs@>^ffZoG-({U zt*oASmd^=PEnRnBQ*3$5AoI7g(1dFjmqb%SViMjG&PR5 zEG52~Ej*Y{aGMo%kTtT>ulnzJK_G;R5P)5-T$(WZo)HMT zC33068qVI_%W7|+u%n^eO4{gm8889Hu~e2(>uc-?aL#(W=dtEwo@Nl3vuT>juT0W z5aWSDjvE3EFcIDS@^7~`13?|ub3s;Zv;0C_$IY<{kmLI<;JD?onrMpZQB)F}{qI1q z113ef|A|`wTouxEb@m0-vk28MVreisS1nuUt2+2&+rn!xkX5KG!)6VGhNk{uno+v@ z>3+ZKEDz}3&|O0mZMArd&w6&+)@nCkr}4b-o_29Fs$pVwqqeo31#{tjLx+qSdE`R@ z0d>FOZf>)mumC9y+V}eYd{XP+HDtj1N?)+A)i%HV`nIUyXPcQ_@GwYmnNB2}H;FAN zHWx4**Y>ICcN*Eg07;wNM~)WR%8B`J#|y*Cmb#e2L#z?ewSUe62CzlH7eCX;Xd}_C zb;wNwDp9}h4(Q)taQ>^Vh^b@2AY}-&@E8_g_T!hk$)O-83%WkoQB&1R{Al#3OP~rY zs%;4CN@a5ounATEg3DDusPDXhx!Om$se+FmtE}L0U&_kbUx3GNxQp9$E^2Uw&{WF> zOBW+ExI6a)Bd^YcBjVv_AebA2RvJBOX_^JP6iD7KVyR|Ft7XrJEt>*Gt0mRa3?&)T8^nIWa#-+V^QOQgN0wd&P)mCFw_O%lQePC1_|c2Ab>$ z@<(ckmI&;-GTU*r#i>sOr*k=oI|xggb}kJXvS}zOhmwA&@hQXwr27&otLm1Y1IfwR z>eI;a&)+i@S9J=#7Uk&QiSE@Zbz&7Q)h8W%ZaI|UJsJaC^72PtfZLF0EPk=;TcU;h zx28nEntI>)ST7JR4?!iCmzt|V!fpHFKu|(Tf9YfzWU`hymz4qV|l z6O0H7uPL_Tp!#IJ?)~)Pk#xY)lny`tj6)Rg_WRe{Tbiv)qgjBhzvlW)>cHT@;QvoJR1_%R=$kBvRzwfWy?4%6`D*T#Pg=D{! z2---PlzRyIB# zwQy`V6<^~Buoas1pZL1O-}U9=Ty6=3q5*eNHC{=-uY5ly7Jjt}p|dJ#IMSV}i4q^E z?|6%Cs<5D*hO+o>uZs((H~E|Df=rYrh7Ul})FDAHWL!p$JexK4ujfzt1n7T0r)CAP z_sqMXahZVi{_&KxxEpZrm|X3bfC)mxS$j-luSQ@G2fZg2SPWp=|6SSbwHx=yl#+%X z)%NRAyu#JtnYal~h-`muIs+EHi5BOET0o1Omv_A(FqN4shS3Dasi(k$EoPAN7a$mH zDa3m&yNcz8*%K+{-H)fW*MpwT1k(BqFF+d@CmsP-4IF`=pe$hvc<0<$nOT_j7S)qxlckx% z2LqQclh+^5qjYuWYcT$|(97`0t0wLzZ zA{8?Hlvp8pD0hhJGlnRZ37p#VJ1p(1jFc)GgN7w&VKLU|aZcSS`)UPTO_F&PZ|3ot zuo@)(w>zk;9sIKr9LIDz=9m60#d&ai!}?_uW$4+^#Rj>Xsj{6sKwyv%@Kb_+q&1qQ zne8Ou^#{?nK5=ykVFjR>+w9rV%6qC(9+n5>CPp{Ium+`6EzO`jRJB+^t0!oJyl4V5 zp=DOQ=F;LC&Meuk&I#X5zNAVab&K>HEP6jZRNkS*q*ICgtRV z2cud8S$@~5Iy*7W`6z32=)gv=icKjJ8Mv>B=B<5?8L0TdtwW7G>E-3`jxAnI8w(v>=SzF z>0i)1=(=<8*aLG9q1}%M4=5|~s79;BF~_h(>icwgVUdQR^oz{as>jQYsk_tCO_(&M zWDHjyC#2Yp4=r%7O!b%#xDY*TuIvH$8bXD*+%L#)q@_CoN9BzS{c$^Y!Hule6CX5d z!&7*3u0o#VR<<6^qKr7nqM!J!Cfix#QUbPN4~owO-QSbZyk1hf!sRBd2e0YH)Ombe z;cBV;3`MuC4NmFXs?Oi0k_|pHBP+)@oE^2tAY8O~5#cGm#ulKQ^$w`q(rlkF?ahl< zJDDO}&7n-1yQR`#B>gf&W8dS3unl!+{Z6iN23rV=-n@j1!?~}C*+SE|K(}uu<7j1b9ZWBzGK+jI$$d?Df+0gg z(DzMax%;Gl1zU29N(^t`r-r}e;<9el$IZ%_!OOGzE0T@Pk;j46;l2Yc50)$88EqOE z;O|a5L0)PRnX9F%qZxaKkK=xfriBsEuol;&A17#bZf1jzWsx20goz)2@#4QU5o2z) z>!Bq3j|Fh%g}xXZ+fV3b^TcOA9&{xST=N3*K{<=+2zFDse|oD+653o&nU)M@Km$jf z6t!$~e5)PdoDB~H5e2^kmBa!SJ; zyrac=jPIHO1n#A6!~$r@I*c>&HvYXPNq5*fWc0nZT)32-Ihy26PNH$Y+s2QKi7oVg z`HaB{;4S?+A0JNGaeh1vLEG`L@;-TN+L^3sHSEa1kLLlhTJmlX8IN5>H#kR1HqI_q z_=`vDUGsRa$a09=KEd4Y_9Q))ISsA)dZi^b%rI-5rzlR}We1~1T3`E%`RWhoe6HuJ z4_whR7kYkj%bG?S8=Hp4!P&z1HyjFkT>4YsS1=*+OYsZC7anMbKH6HP6NCZBjtqP? zHxqAUQ~yE}g|2w1;x)vMoy5WMNR#@Ho&-qEXTb~CYv=#uR=l}jq$@20$V+DQj0iBKvXVdSS zR?8-StFApadwo*(Mv=${W&@-4lCs$0@_Q{&dmqgQxL?iU&@`9b0|J^D9x~V!cC%By~U z%M0Qs$h*Jl9X9zbl`5r!5J@VEIYd+niUuX$%Q#Oc zKTf-{QUR61G?4!iIAuHSsP>X^K9VrX{TfuETK z(F)srDx}B)Ypa*CE274hhF(a%G1f>?OL>2A|b*b_zln! z{?427w60@G+b!!o^0JUJm%x_z#sCCl#d)4Bc~*+VbXLh=h8N^$|F@zd zR~qDHc4Z$@9=3S8vt5uH?u46t9~8SnilLAe%n&<5kP;E|1Ng zozCSs`Ri;RL{FEfZ|{BYllaI$cs}ew5b6;$jYJF>yHz;-b^h>yT0M17 zBR`H!koHw$BXU{f;57}q*1z!*K7zQzE-?(?Zf47R z`rws#m*rX@PK1qO4}w!7BRCIXJTf1vBtohfpUituhaZlMwRDayT*%D}lEyVvR59ph|2MTNouj? zs*>8k5VFy>>?Ij6>KoK@1N6~W57auWsxk5)-StY|C2G@1CF@o78bxeK{jH&rmN{w~ zn!hd>N(1I$9Kt(H)SZ=}&lj z@2b!#A)^Q&l{Pc5k6ldae1Ae^+-!{BBn*wb6N~1hbq&8uuOyr_UAwzEB$yC7{I%4wy1$4} z-RfV?Pu>yY{^eA#4Ht;>`nOla@4$8ukxf;@YK&D^J;oHIv0weCoyorKiqa%dp1v)i zxmA2u2)VFVC4jX7>yDH%K*Rs$TFoMe9Dgd)%%Do+B{)Sqn`a_&OILr$I!!6f_Z1B0D|7ba(F(Kj#K>Fb$$8 zf;5rJ(l9my$nc+tQq+9=@q`4|w*IAxQ1&AP&c584cyAsOzM?d_M~B{cXO(Jy^HS^g z`;Xenp=&m=9aiI#W%O6drzwx1c^KV6vMG0+Scgr6KZYFLLR1sa7n9rU5@O1k*92tHKw;kD2@lgLD00)XU zLMmH=ivEZ>I?y{7$3g0+z%BGR|W>5jYlBx4hDUMnH+h65xi6LeuqE|(bS3r^`gFO#J;GTVqv@J9|(lqZK7pVKuTb3Ql{2acndD~kk0vJ>hdO7 z!#7x(HwlbBJBB#&OSDUh*o>e`+9YAD3Hyyf_3n+@k7Qe zA(c3u?JbV0ut|>wY<1#DOc+l~CtmANzMkzKE~gXgo5Y9p3j32jp+Q$$R^y*^|Z*G0?pUKF$iMZ-w=W4Xhd=eJ(w24H35N)Et zi#mgN%u}2&cbOjhUl<;>>N_IKye-OlNBm-u_{_i%mJ;L8i*r*psQ%URl$DM*C|AloS7Zy#e~HoOb5b`8 z6NQc;D7t+-~N?hesJ^rfW^+4Xn5!s<2)v$A_50rDGodH{0WJjc}k>* zMZwb}etN876Dl#kFlNv?i~5{Az`C43!BEW5!%UFZ zP>HAE`xJS*3!yH?P>05#PXJp+`wwv7i>8Yi(-Ap30r9*PaU#_cy^!#;$>kCGl^=@kvv=nsswCV=p0Hy3+Rfr< zAnL7NGWa@JmG8OA#)uOIV4w@}u1%*z`S#=gZ+S656<%G26I#1+u^Hz^~ zF^#AO>;W5t*FpL-`mZeslvRv#4&Q-Yy#;j^3-NPSwmIn(CS2mWGj*Hly$fJeQ4H)gWpq2j z`zChe2kaNBX2gc|YNh5>lzH`~KHtAQWHS;vk$L$p;6;SW@vBn}V3&VDsw~6gIFyNT3wz3kD=+;|6%O!(Gvjo!Kt31aCr!xX%1&F zS7Nn8fb9c664-;w*jU$Lt-bbL6}U1t7f^v9M(776OljG*T10Tl#RFIayc{Q+2BXvQ>*g-`{i!!M~igah%R zgmqoxenJ14+Q0t;TLKy~!^Z5!fJ_qdKeM=tiMV{KfTj8zpLbU#P(z*w7LG>aaX_C$ zSCQ)cDS>h?C66cSF+B!$6IxnX0)gffL!+n_zKoG*G0CtC+vmiJaM;NwxK8w?Tfi!T z+iK5$eo_IBjcx-4p|zNBmg8d$0iuAg<~I|e{Ql`_Q=-fIsAX&Lb~dC3B}MOEoT4uZ zw!uet;7G{@$^q<-eQlgKy8AY{Z-j{^q`Ze)mESw#>sOhUH2yo*;jTpUQxyaB`n!e7 zMQOP_jD%_sclsrlR_uzj&k2+=cXBS(J~Th=`rS#06yJY-MlE!UqxW{56oHA~)9|0YR53=oFaX|SKQH(7o+?glfYpN=pwzNq2Yt z9zO5){hjx`*E!eu=iy@Fnf>foYwfk}``%g8bm%*4XB(P@Afi{6_Ux|~k&d!UQC|YX z5t@NyvN#x2O`GBT@6MG4p^$!m*-}M`l^~z`zKHe(z8K2%3#$wsmuJ#|TX-X>fVd78 ze?oN1V#(8eVol-lw3{}xyw#sm&%LQx{BfW5_QfcX7iU17WFILD!`?r@AK+DLx{jk^ zUD_y<^1ok&SwJtAfW1JCFzzzQLWxlK+wUS(GP*KSWFEI9KP155e(l_OZ@-Bd$|czL z`O&CU(K{f;>fktnbLdTyO5Z>K2Ly^A&`p@d6T~0wt?MScj8{P)CN59-KY?nXYmUX= z)d^ZOU}r0xg#vCT*1-nMIPJ8H9p#fp-||txUkOK$e{(gfM(W>`2^JauE@cRIFn<(j zuHJrvF~K2v04xS4Ly9s#7O^_Zxdm{7iZpdI9ecA{>N61yo05s|gd=cdOf)JsBCw;HRi#iP` zy=w+jyKQ64=}=IUF+bBf2qlhxk+A5&!Y}YyJF{Zw21L)YZ{3JUyit!RSK1zeqW)>j zTlytIVx||#VQ&@W0Tc)3vnh7oH1H4Ww*xAEnKb&Z4sWZ;Auq zXX(>IpC~@n=-j?=#~s+d-t;=OsJ1^R-npS3!uZaTRqQ8CDG7xk{IGT zcHuSJG-@$0jo*W?D1F)hVcSeR^Jsx|1 z4u`ejKW3$U0s7HkRhe$k%{{fz5Qf{ML)$RQK zcuaz;9wPunh5)DhZn-Y{0L6admh36U2&57Hj4t>F_lQ-pJLamDNj;!KcT?3zjlf|F zcBY;zR5iJXp_6j6Uuf~*r~J^2fkW{Fgr_zvNMu|w0)UGccHQ%yd=W1u@UQWya1PlZ zpw3TQ1A1uwZr*({SiKYgZXC4j)gjz8%k|)jaUw+a{{2$br?fEzKDaxu?8xsgI&4UN zxtm9Ta?QMu`+fu~&fZIuPRvbehzP|BpkQ~J9fGhB-+e~4)s}snwPy7W2^R+>;FSA! z*|dJq*NY;Fmm$2@`P<_Iq>n5V{+zRgqI+2Zq|uq{)2)@3ry*)xTWGF8{Sxu{DPgH% zDtM~h&kxvEXG71yZF|>1ZSKM#n*mrQz4cuna09*UF-j*+Rm~D`^O`6sAsqlJ6xk~L zta^L01Ob#~?XgH+Rr*|*Rd`wy_IF*T;o4Up*q!%G@17{Qp50Q_UDA!fO?$U zT7qWpm8JXw4D}Q44mAXaO?0d`8BsHh-j^)hWKXfI_Vw9|!#^@KaDDfAGYl%1nAX!V z1vJ}Y*WYv<|9+z1MM1YRW+JANt=WvgnevE%BGo#mw%6$&nLPmXQnfW+Aav7=_i>_=aRZ&W-~Dau$y zUpCPbbw(6uo5Yz$%O^(=*T>K4p4&y|w1S_ z(3whOw}aLTMmkbK)0F4H`xKhchPNhN`?zv{6&oHY4JU9n?Bc0f8%?QqNRK>5KFiS# z3)J`d4Z<)Fj^_De0PdtTS%o{J7KA6f`*YZ{k>(m^@8>MUS2@Uq>I`;+`v6$X$VIH0 zFN6Xm8IK_pe?Xw8U4v4Wu3cl#g4cn_z@RCtICRL?73m-WzAMr z=t|vkh!jJyQ~op1^I!&Wk~+%j7pwsydZRwT?sgmEn_kr}@DrBt-*`P@cF{5=kp7MJ z+4@8SIm`~_j&vCt%eWq}fc4<>K_h^na}VRYugLn2OL0d4aHSyL+*&OZgKi%Uea;fj zcHulD=c>1ADxiS{kcBvgc!gA-q@hFvk~bsK?>lmEQcp6{&RDBP$h>HufO4CB2C`TC zC*wz}4~_Oy!pWVy10Q?~(67de{$Nr(vkkT+sIS5{dSrtmR-(oB zTO5b?mX^8@@hocCYW?W%rwxIOC(|zu3%=j>H+%J&E7+zT3uIal+gGrBJygEnj6fZU z0|~XP41BNY-z`$K%ja+*ZRNYP*ZV_BhQ3xVE|p2I(_^>^LSw}td(QE4Hua{@jp(Vq z9Kpn$#hyHDRoG*Wswpw`s>)EAootMZu<$QZnkYFjEl~uC1%uQ@nUHjY^}`(Ji4ogQ86XnA{pRUiV-f2{=RJm zD+wRH0g3sx@LBy-`oP8Wcqb_aVH&pbV6xmWE_ZA3G;=6;0;B)4>2HRmsqCBNxw*^J zO~A_5KHMGnDP?)~y5Fpkx&;E&hYzT8&hWbkR)W1%c9gGz{4EJyYm9$UA&Zc`5wQoC zKp{Y)!M%5jm0U5yK78x8P&Menl$+pCBtSNef-A_in?MFqlauDu&*CxmLFJDytE-**hd5dR$M{3Qd0i_NYzu%FM~1ZvIA1JDp~$&gubK~Me7L*0 zaBn&ogVSwD+X-IpyQ7R3DD(Fg11Lu2h^|IqC?cl*uZr`gw$?4X$^_w@^?oWDOoB}Ci;i|f8uytGOGL-7uk=!OKaE-YsJ%*g-Y~36 z0|cinxyNiNBT+3^yjZ^U&!>kW2Y|xGYe5KbM$@;$l^y>VOK(vSb+*pE+Z!}(YdtOI z8&;&Ei-EI}>Digt3UuhyRA^K{VyD9$74qu(QM&;h(|nE+C}RO`_?{sqUi4cKI*yZY z*?rIcU2HQchIDRzRbOuexwkRVEC8BeV#>0748VX$C7XXD2 zT_JroB71*Sdo*b@C?-D!3AI9}@*G~`p_u2*wE%*pIu_3Wa6AYP>}{%F?Z}^4g@FecM&O;@S-4-PX$!X5>$8;R(!UsR|m7+zlqtsE7vh>TB$sCvU z5}znFh8_+$k!7ZdrdA#iLBMB0{>%K&P7zU&^^$nbp@l@V5I0mUu}@t#R0!S1?a0!* zpq2C=OxXM8B~$Edw~!v5()**Ftw$SI!xl%CN7)je*Q=)7P03E>t08S4i$Fn;}cShZ06W^d%XA+`5 zaJ}OIXmB;NuCHGcRk_nYB`Q4^kVyD&+Lfzvqj&m~H8>RW&DL1NCfRT=bb{R5sV|Dk zi<0syQ;x{8oPVTX0-Gf>UcFBjt1jQ!4N4T{H|nPT`#!B^ufExPk&s_isz_y8<;xyD z&S4uQ?E}k_0Lyq`lyZ|RRO}Ofe(!5<Rf=~TfP({;r84T$Q=D>Ia+mH0%f-<0A`AaA{D;PupMCZ{v0l~x!-J7JW=%i7vlWs7)ofN$7B3LzzPMzqC z7&|)HxaZ&SI(nX0CRgt}<@@VRi3g-*wfZyR640^zdjvhsiW;UUENc@+R+GIXjl(OM z;>fnpTfB1mHwv^ph#4H_3;aR?7vdij6qr>ojO!g;5m^!oVEk9GAzH@~wT>gL<}$TB zH-}G78v(tjfKQZeI7-2bOlap62_nJ|**(?C3 ztLXXr;iA8ZiD%{Dsl2~`M(}M*Wt>Z(W`weF;6(dZAyPwh=v3**^GHJ>27q;}h7hRS zP%>2pGYn{f@P5|2k4glPQ9c9*t%MEWe=(oU5NW@fg|79B+nZgW8J2r3iGa3KNuTD* z-R!>o!IEC^8Nx3c8Y$HL&R!-b8{>8Kz4@n;A!C+J&mlS7JG}K$H$J8q z`VYkkWg3g~?=t0HbU1(6AR1v$*HqP%d|l8RDO<+1n*5C-I6=u4YW!KaQdAbS6QIX} zg@!L%15PA8sn4z(EgBse%^M}MByu5p8U6&hcEr)2LMhhF!Gi|9K$>1}SjLE~-`h>6 z-^|6b<-x!xV0J^#=>3XKO6^DyvIv?}GX4g-pZ96Gs(07mgBkTpai)-+#^LguycHCw z_ww;&mh2o9P?^PxICWc(2n?Qskyl50PO#j=+p$LCKL-F^4ZRua^+LrG!tfph!ezjN z66QvpiI>L*qhRJbf^Nve<_UnPl588-(UTXIrE-N%I|38fWE1?djMJ{0fa1uQqE&s| zjJ&;X4xocuX9gi1pMU4I%WTD2SZP!Ub?!1-5`D|k}x1&)UWtTCr7x2f9|MgM-PPA&3?EvIQaM>adzXUfQJLtMVgbC$hWs< zES;#&*XLv%8eJO5@Qt!Y!*K8-B;@5KQDfz4%hD<27)pYGBug#`8J(el+8SvdLmg)D zd0iY^QimGI`_w-z?STHOzu$R)J29cn-B0tUJz);mKKvc|rY|SlD0L`Hx3Tn*4_R7`}Ap*)IK;=XBEi(vs{RI+AySRS#)ToV$ z%qEs4iVI1um!f!#M+6eA;Pz(BKW-((W{6b-+w?H}!Q)jmPrp8xnaK>!Yv_5<((xau!IZb>zRRwl)zlRDZm@-OA`>VP0YY=50IvibF(T zzGRZbw|+?y(#I$ykT3B|u}>6(VqOAJdCIlv{K-K|g~cfx{WZgnK6mof=Vr;463?@t z6n=gojHB{dN;1Ksx8Z{ZzE6|%^ujj=xe!lfdPHxGh_>h&crV^ium>IpxtGHc_Q^gM zWpHbBoRPX&*TE>Ug;LVMFG+u$vm#J8wZ4gXhL)v@bb@br$dy8DHOi$4OJ(X z>Xd8fB?D($K4uJEIeaV26#5P3NU?5Kl>yWgYJAQQj3SlWJ6XZl2JIkH#ri8lVUqzS zUnpXleMGJudKRLwQdvHAI_R6I+N3w^=Zf#NI znfF;Sa3Q|p3c^y^To@Hmq1n{IBQL$fX5 z>9#!En~;P$>>JfG4zHivZo6M$yW3V`v!Chaq4h$1?AwLF4yMjzoC1Mj@w-KtrUpea?c{(((YXxszZI_iCV>D?uP)xMDr_LiY#?No4_H>!bbQ62)$M;)(L{ zAZj-i;vE;PEWYP_SsMEz4+t9qrj6B7{Qc5F54SIDux5we|)QZ$Q(cinB zH$y34Kmv-j@<5{{w{1%dxeq;ha^+(Lhe$`M_mzL#>Kf^gbW4bjAAc%0e77U$oN^qt z2xd>UC4^wP)w>9A)JbmcvP3Oy|+-FP1zEI$0uED1$(lWyiU#zP#1Ksa?Qqc>sx9jGnO=ZU zKou<5ul;fU&6hmRKb9ab%*v!*l}*RcE)BboL$`jrKuGH=D(Z%+xZt-y2Wx$cZTvdB zVLCQlrIsw-wvSn)7ms;zk6Iq>0{u`PvGCmfTkQcp*|XTM##5!_9GRc!Myu3l|7ZSw!#}m6HAT;h_C$dK$d4ha6>+V;WpMWjCsYxL8t4%ckuKv(O5@ zF54STU99&(WURkVx$6w7GvV=U2hQ?vKXBe=t~vL0yG+m2p43P$eUp4Ve9wm~Fh8qz z@|~DdeG201coc2l+$N18{7U<7_Pc4=2_i9M3B|K!`qTIXKz1HZhUqs#Bal#fdPK&thrNi~PPDv2o zAi*WSy3FYynY(DVf3nEgz`PXytDVy;a4t0IDR4M?fec*4X29szfLZI{_4f;DxZc18 zlz@z+>w$mGDSy@AAnqU{p~_p*-(#G|zUDn~$)36su9TH;r;o;~oZP*chF+2fBqriP zIB6BPC603If}=d-Vf9)K9XITblLA^h3n-E~cs3-eR+sW^xs}v5EcvpjQi!+(lEHdL z-R$P^P0Kh*0ld@F>Q}X{q;_-?9oy245`)K!EcvT%9UP^;7=oyRXx9YWPqPsvtr&mH z;$ATW?pdPhlU4St1?ObQajZ4!t-vcY4#QU#sR*I2JFyFk^d>TmG3*1h{AGP!!)G02 z*b}uY$`0*&cko&eBVUjc0@-%3@V3sg7G9ccP7~ED)%o+*#^>sxH`tq;u+j%lVE(RY z+M>}1B+v?(^Ut0$A`A>@>RrmrF10IAi0HRXNTss-;ESm94Yt)gtY^R7=R;HrPutTz zgxp_eDn+to`Nn{l^_+|eoT zPLy48KaYPFkcAykKk&I)eb}pJ)la5Dj#=45l!eaSg zv@)IWcd>oruCHj8=sW`=Agu6HRr zI9ceO>GZj{1@f@9B%3PZWo2yO7$O+*%1?BDAXzRL@u%^9)gOuBiJCP)Vu!U%aCSn| z-VG&;Bwi_{9Kek!vVqrooDd4NRa`Ql64c4w-mu)UBiaC#UxdwO=djn(GaNzEl039? zrf?g46ZgyU2~n@Mg-PHN7B7KEcv#>Z)XHVKGxdEMHI0~<4gDVPeEQ5`x`IXv;KeUF zPqOj|vXRBnf2+C(7Mf>!5;>OP%e^$i9EoubvIp;Yr{sI##K3p#&CBwKtEoVmLTZ&x ztWzrM=QGTjHD7AvfH@_Ui2x3jO(*W*37jVIGTfadaBwk;=h)cgabBH|G6}+syW{lz zWliIgk?-}kP_N_9A>jt;cQIoEAUF3Pqk>9cl!O3WK7LS?0hS2fJdOyq2-X#6NAj%2 zhvs_4y_)(RnZ^@v!1BVdL7LMBc=ug~T3rbDa16JzhLLR$6eimWWx^;3xWnp_mxOmo zaUtZrKG$zoS%lO0VtR}1MTiUI4yVK}POVXVkM&EBz8@+5qQwNSGru0$pKcYcsVl9I ze0c3kX1-fSJktkAyHL#3^1v1N8Wer@G#&%u^Amq10Z{2T-rg&Ng05hV`js9DDv(P< zZgC`tG({$c`%h6Ps7qkf1inMA7PLTrU(rATS2_I}?LRx?E#7ibF{^bj`NfHdABF8J zx0mA~dX$Jjm_!$?pxEYSA^KI{ae(PPh*|J(Fz;7gRbq= z{&a{5qC({vO?~kpA{?GW!z~Dbn8=sq47=4;XgwgYspUnT_$(AeWArrm+{x>n`efY~ z)3{XE4B1WmSvT{Jg5{v9{X*wadT6pbaWVb0N(`K;6h9mh)Cz{= zSMg>LG`rwH2TZ`0WS959`!+Fu+dw&P&UHVxH+hT%-tJtK9`)3JT$;S~t_-%-;T>Yc z78tK9(@#-z!{txrI;KwLUvD3-MrYEu0)D9%LN#>Xb3qMLxky8GvLzn*q}i-w+Qy-pAHW zjvPtK-mN#h)CdLYF9W`JXVa2($1~QoBh)1^tu;C49`=j?g!=DuOzH^QxmGog<{(6h zel*>;-9|2C(tIAlR6eUf&GWS(Nb2uIad8Eq{2Xmg!Aoa&(M%#~jq=~z%OxLR>Ca`* zrg6KC=Xdn-cWVC?pSeDWX;Ing5NZh zoO&JBp}MSAGPZ1qIB1dRg^-mLcv~!`_tDSZIozLphA9J)G%#(uG zdb)gTy1K&Win^D319g_WAdRBG62o8+iBfI4Em}TpXQn_=v4qz1)>=Y=HjQ%l(`tv(C4eI$cr-x-<)+Y5FqnM8 z^&K9;OmIuY;FR2D3r-Vi6Bz}L2qrNk(HW2z09#Km-(2rh>@5Z1^{%sgw9XVURDHa> z?az{VdO6+L`K+-JrmYJRpqA$BAt$qb?QEWvvN>rEjXW9VznO-6X;Mw&ky%car^62| zP`VVI`W7vtl92gt`yxoA=iKhm0;s8W34U%FZhgq+%Gzq&NY;lXt-H8*-zXY2i#=eL zUK}k-r+a}wmsa%G@arqT*$}^b+);6=*LBiCapiT_OsmeK##o-Mcn!FAQUjpN@ZO0xo3M|po)rn6-FND5?rMMc_B7Cf8g$+EhOE3aPPSXhAw zKB}*7U8$#(r8D z8A&&I`7#d-7S&6z?kBjUae7CEd=_*F?oZYr;Ws@;I^~P+n;|?-&@^!LeDf$te-40E zbO9Cb6X?p8A7J^KJiiG5!2*!a8x2A|jHj+MGa7Kh&*)BHQpf@1+2KZjC{i$EB_S6( zAi%LJY*f~L7|o&IG!uyZ@`8kFotmq6{+k%*=+1XJwfOkAIO8*bA@GC}VJM+nvqmq_ zppnC>Uf=g_^ErH<=rUo*WOF9>@~y;>&As}S@TS|){Vy8&;;#KH+2&g(IlO_=KNGWJ z4SjU@jwxIV0gn7&_KW~y22iQD$mtTZ8s>&x--skr3S4O zhEF&@0D7U@5XkwW*g+x5{1c(j^V>fnhtwnl-axI06v*BJ;;M_(DEIq~Rj?=2M~8ZU z8szpF{`zt1PA!$=`FZv3Emc7_-nb%OG!|v&Kkv zOqkDQhY_tV+C7SRZpNmyW)6cT>n!u@24&UlR;l`%1SNkQOz#BLL7!QI0CMCdKCldF z%DLXD+d{F8S&Kd}y(UK6d^(Rdk9iho5WFs4A<^TJ?iwjVe|5Y+V-n4Wdk38=O>PG4 z!lS#)cyydVuoI1cXd$i|-JuD7LraZ1qfiW>;Yg$_SkLED&;TNNbeKxYzY??gm@iO_-};kBTQLc%ax{^BQ;ur?jREo|AJBpx&1)H+9drSzRb&5@n5|H zUz10xfLvdgk`d{0zMl6{&!Zp}ES1d5juNF7%N+v{L-|(|MHxF9STI%0TYN))0Ouw2 zZFN=wY6>w3%iw?T<>I@K?efZ(R|NzMv+Nh!hT~lbDAPpvEx*nF|Mn%Fgr6ZP_C%Ip0Pb>e+m2Os{gw%FnOf4Cg9e}R0nEaX@Zqx(mCLw|CD zIG@VfS}LJIw|(A???v?&Z}YZx+im*5h*p~d;sIu>9tEOSneOt^J)opSGR78gD&O-m zTK)}?B1J{6t)lDrWKjh~lM}&C\xJ`@_XHst`L=vVQ^ZMhA4vOZMjd*|7Q44^NQ zTI-J1P#!I-+rP}~YfNM`A{-P3<8Zr8+o`Hnc+E)ndd!o9XSw{Q4$v_*R^sFY{zqJ5BCb1dMf&3}6KM=#9EPXObnANhM2y&uBdCHV^ zF7Wpky-3#VhU^zBFHS(+y$h>0OZb0q`~T*S{p;U1j{+Qpf2?Hw$PWET3!OYP$P2ldov5TaT^?E)5P&U@3jmQlmPhQTeLF4XUHN zIbq%DHZof+T736SV=57+&iN+M=yynMX(e}XLMZrozz13tEiYi7xK*q5ufOWK*rDRy zk2OKOX|nv{bAf-P@K)*ZgIENQ^S%}OJzUQ3{*f89p5$(Rkf<^2OO5pI+Z_?Zj*Cj= zvwZaT+6QeRS6o9RgZy4w3^j6uLx4)hpwig)V0d2ZYh8gr&yebhHr^X(1pIW%d>}E! zERo-4VXo9xx8|`<#X)7QTtm(?Xy7-5eUd8Fw~?rrlHw{fUV5XQec?RqeNqdD>jmoW|Il)3~dhoI*zumDpp-5v9w`t$(F>$GT*i%#IwhlqG8RwZzS({JJ zrdbY+w2y>kM}gUs&74IktV;^+EOj+G9L$-EW)8(|#?CZKd`IQz1{z&%Z|W`!+@W2! zVzxlMrSTp5wq!IbQr+s$0!6xS5OC-Nq0{0S)_t`{rl{S{`mK3t(y&t55be5#0lSZ(6cP0A#6{jX{ zUzej_P|)Y=b5P9J+HdMx%$$foh9CHq0|IBwp$GsyCW7H?)ef!%;{UJ zuH&)E4wi#1`ESZ5_bF3fH--C-s;tR{!QatI`)j5wV8M zv)QivWC8Eh>OvS}Wp+>Ne|Fk!(dQ$G)Yh5qIZ_(QJ zsDrBlDgkTQLgmw|`%9yN8Lx3udJdOiY6+uw8ZhB%*N~@IJ+nKE7!stiaqCexkE9@H zB*KGWh+dEG)@lv{DjynzBEr=EHOu}_2E&M@@S??r?3#&jqt(qv#Omvv>FZ5`MA$jn zx=WSxYg~5*+wPyWN>hVV(PYrl8R-{!*q*??*qjzFiqIY!o{*S7*dK^4JqgvS$*t=j7kwT`LL#F$ROZu*e%lhM$jLB{lL$ASWk_bK{{z89RK%7cw zmt*&S#F^9G-Td1gkC3~$*v45(GU6cpBH3fc19FkJXux!~bCBuY>#}v!ms`<>UAJ&| z_NDKxTiRq=ezj?5{;*s|9^7jB#jot(L8jW+ko~L4j-)|dqXwCnub*>qVAE>GyKwVudtKIX5O4C6Ym>{Z~mIr+|FFt&Pl)8@_&abswk`@^0ix>ckg z(#WWPp`K~DkYnzJ>qHYNI172e7^3urX((P+et>It9h^P~!6Q=|eGb$SoduuRrR&O5 z4$99A*zdN9;guEeGf>G;R-A$eE*m~(D-sv@RQ*i^LvP5vR6iP{jQ?dCSOsiDkQy#S>$ta&h>5{#DI+L$;!Qo#sZueTXS@nF$O{jiS;I zk3`I0jhx=xU-t2Q^$Kxos-zv6*Ki6vZ{7Cb+n?!Epf%-(eixN`cww`*lq5)Tdc$8U zb09Ux&Z@m&%L<;~>n>@XOx3dq`yMJ-jgU+9KC7-T?^CvW{}4`n{2WwATi}T0D!Q#H zxMV)<**od8(3F?*5Kt4rT=Q^eVPEdp*Cu$l{?MSSg?naaSfpFgmZV4S-KA?Su(TN} zBG1UrnXztH;V~w3p0)E(76r@PVE{wJrOTSFIw~?%3TO;hC;#l$&u_#AiVI@ln@AaY zbY^ksVyB{>JpTTYUkq z_0IG6_=N1{L zhfflR>-XFfh7|H`_I_}-HkLEp=R>#%O6#Ya-~!ucHDYf#SThU`j~4a6n-xqy&+{00 z5|$*RwCj4lXAo;X9!R67fIl4z`+s&%Jblc4eV&#=1g#vwjX9_h6_%isemgz0k>wXP zvo_`Vv!4zYKX1GllXHtJW=oWyUNXmdI_29=cE2#DDu$cy=C_BKw#TV9bvzZ^iR+TL zNs373hG#}y%ylxLkg&%}`>|{H71D7`JzbxUVpF8wjP zK~~Z8cK6z-ZEs0!zKoOe*Y(3pn_V)+?jHl)#9?ee;~u&B`G+0_dBzmiMP)aAgvKzN z_GUh~QCGio!o5%D;2M>sj2AkuuB$mXSxj?Zl0UDfK=%w~i&-34mWh8@c(}k+?Gt*= zy_oo!Uq-q&BCZ5aq@M#%{u{>z2Y>v95kCdi{gJ#?SEtTchy1kNfuDCrD}B=7Iwi-77Vv|Zwl}1GJGAO=1${YwfGN=Qxa^b}ljoU)JHP+nGt1E4bM`yA z^>0p2RY*C6hn;;@)%Ze*4kiPBo~7z}WX#dyAf}DmDxzte;`wLjWeDZ;vm{Z;_=}E^tJ94<4{S!pK!Kp>ouSwNE z!zlKyT~qV%HGud)_>S|Rx?qUme8S!7T0&S}9|9o^m3r(W0)_qs%64jt?Ht2p|{b?j| zM(efT1s^!Rj2P6JSITXjEA~Bn9#;)3oPn%#CZ^mh!IkHJqpUnmU3m&4B;l8?`qaCt zL0ysqE5!C&4v+U^VltJHIA%5sn_l!<)HhAH&Gogb14#pl9e^&UFbX-Rcm$3QF(F{i z-s;h!{3f6kW$NP#kYNyI!jF?OOGEk{`AMB3BJg9*SMOWZ7WQ1eax>}IsJn6nmpMr@ ziC%kag8WYD!8%vdVs~GaZyPr^NHe3`_q*e6_kh6Stgq^&XQt79Nkwj|ee8bAJk2D7 zmO;3I5Vd2*g`F*Ltl44(qYKOSh?4h8fq$+zw=mUWxrqUDZHq6?0gg1c=K{^ zu1y=fEMLK6AE{&C<)jT?k`}tU52#VE%*!>=0L6K)9}3kvtg5Nrpz?sBd(=L-C)W#P z&KE2?{5uGv1WoN*~Y!uRw zfI#_EVb&vofNzwnE2wjKMDYzM{0|b>?H~r5)}sxfJQ?b9Seky@JDrx9#U8S|A4R_scM;QgLrh94%zS#Wc)i;2mh*K7b!o@2?jHb0-=wN8< zz>jGt^B~H(%I|i6K3Bl*bBt@V@8^1_joXv?DzB;HtNSkMKc8VENwo{#jt3J`uC{3= zx@-Gq_U)Khs0n&p*9DfQl?#1O>QupH7Xlh0!>(CCr zeSuGfH`q;coj|+;HMA=Gf%4IR|DPs&JQ6#x|`*l0Dt-X*rhe2m^g*e8htyf!{Cb=+bSE-)$-o@#C zckkiaH;@yH_u(Y_BxDyR9=lp%RmNSj8RvxVwhyth(ho+_ewBmzvxTnwRJVO!x8270 ztVh4+JOfuu&K6~Q0YAE2d)BdXmLuj@2EI9Hnq&(3zCq4!)xOx44}_GCQj=3>&k1)d z{5Iy>#?Cu_A)knsZMo@dIW#06TEU^naT;IjD1ol*AnSnzef4PxxH%rlH~oz}bxzI{+66C|#Nr zfT&$dnuASpnng?i!l>ZhkMSR)zPas!ExS}*Kc-?q&Kr$yWNdziSk-=2WYvLA#B0~+ zad$DiZEETcW)Nakd(~)M203^55`|0EPaTlQ-4{7G74lHMGgqyxbFi?3CY6AwE5L2O zX#Q0uDU#l9Rc0f{rGIu|c_4_lw_aSh>}r~m57mKaClXafrIhk-_s6%Kv9%1#u>Ucp zN=oEIQ80_rovJgOcgfN*w;vBBpr7ckU9|#anr$d2WVO%dSqmP|uLbgl`b%J;d67Ac zccA&Md5b)K6Y^#jc|{=J`%=Ysms~P&6`b0jd4Y5C6|no)`th@Cr`|Sne z*JVyen6&;#DF1nh`GWG2pSw!KL-{{nef-eG3&1hf7FK(d{P#Pi-vJx{u2g-2@PE4% z@TFQ=z*e#sLl;>8`<-NFaVL60n}mzc|NHOzi?ctcZBz|sO|1|6?{^rV#mxQc)>#vw z`>#f;grip5{PCKIsXuUp!G#cr1j+go9@ za&Z|4xf3M@B96Jz24e;}96Rr~!NvwvzEk`QX9zFmxhggS;&rU`S$x@CV8`_}MNWW( z3(QhVNfdC1g`X*9DAT?1xB(!O446%y!!!rBx9C`aR*~qYWy)I1=)IUNDn^ zeveVdObYt1o_YQlv2aV3Ckv2B{Uff+! zMxRvO^)jUn_=x-w%Jo)@DYVMCjJzBx;3e=>aoBoyU4)q>_P+@5Lz!Z(vyFb5j z030oII$JWfktt_09S)wJBC0rB{nmO_8vE)`_83D_Vj5dy{wumFht+YN>ovC7rhd5g zi!3F5K5K+N74BVd6Q%Q)?KQ!R_x)j9^t=Z_uv+6`E&D~!NZmrKi@S-9Ij@NZlY<5? zTLW$>toxhK8c{dVz`p*~LcZ~iowZlT*0x8lVi?sIv)iBzR{J}rGm!^hK7_L$`}L{z z(8H3dt-h%49ZV!&NhWQBs?W1JNz#K!k;V+J3LnGW)c9F|VR6p4#iLT!&yQ2z)l5zyQqVOwe@L$cZ~EuQ zB8Bpn!xmnv@27=A_4>*x=EE_G>~Y{`(;mZvBm?)~UuQnDi`%)1MfEk&97$NssLVF0 zQ(PH}B|9Be8Y$|u=&(g{U1WS83eW#Et;-}N1II}%?5K5{Y4VOTf4CXZ;rlB^EO2yTISngR(yYmKRcGJFRV724lm$rtjDnfeIA7`=nE&uvuVrvk$uQ!v> ziV2^ym8GA*J*qCfZsh2!(jI8{QDf8N&gmq~x68%phev1{B>K9l@_yl4^tx5G8a3LQ-N_ZX za4CBUR-p0QjeQZ_{PJ_E@9bT}U5SviN?o%qVYxeEw(MmPUQ9>^=bnhT^_5#L zu|z%fyfWN;CcIc@wX=Xcl1g|RtmJ~TEJ50EZ>H%F1Hr;*DgXU$KELI8>V@UQjse?( zy~RB^((jPe;A76)cDilfu}SNx6QoI-i9rVbDVzOfKiA)puv*`BOWSWZt>-g4OUph% zU({E`9#d2}sm1f17R8RR1#9>Lru`Y7U;B(Qv_WuoXBRQK-@!9c>8c6jzO_nv{j#?9 z@3ek~1d2;zJ{suA|BHdfjHinc_#NrEa$D~)5uMj{N0YgLVzcKkJOTu$Tyyi)+-`&W z|6hCG85GslwW$b-C|QCaL2^d%0s@keC^?5F3QB5XlN&@7L2_0ksN_tO8fbz@k{}2| z6T6Wd8fX#8X*PFiYQFFKzEksas^+yy0gFCo?|t@O>v^8F&RRXBQRTlWlOt7t5ieFERu~ow1 zPTFRRvD1vqxOo-rZM&B3Wj7x|8ZD{j$B-$X&E53sW5WK$RhV;cie3|jMr3T@}uQ|>RP`@3Im=^DLwd*tsO*Vp}q$LqHQ>Ij1ubT_>wRQ^ZviDdxm;&DX zuFbv^0b%O$cOI>@*}N}&HDUR%rEsQHbHskv9d1=4`#Ym>N|8-`W%aOZ;I*H zJ>dr36*DK3S6LF}EM)wqI}F7@x--!Fj@FHg0Yzoe&Og4IS>4P_X%;>RG_Y;2tA6}C zCA_S$cz(9^cfMV=*>CAbWmlN-(AonYADV6A-0X7^6sCO6UOhsIIsUf>nj!ay?4{gI zg11T2`&&c7RyWXTou*G3mBO8GS|jteXTSOJ#*u=L)$*^ElIviSzT80vA`d$ zYhp!fg}I};5#k+-5?!Ppvz=o#m8F-^Gz*CPvC-MmwOc@0I_Nme_s07K;^Kk4e9#lK z)J&EvZL5LpGMsF3ucAen+tihLXQrp>t{=|93>xiMAGuYCg=xjUbH7S9vHAX<8kCT} zDIa-0;~ka%rHK2drVlR7+rK~tJrw36F%$=D*} zGP3VN7)15b@9U=D>&8F&%OpF6b0UHpEQuYvrD@Z+l@s!Pvf-pJkF)Cv+BExyln^#| zRT!JgG$rOVR9Mjb3i5P*;AGV4(_QSwEmCtBc4HD(FIlyqV%XbtAp7*d*9~Q^-7>n= zsSn}K#l0riFMy^pzOMT%StTPjmr{mGWFs}(3@%HUQo;68HDovv4r~3*KG^@YvZL_+ z`o+BxnXukf*@V~!dWSP-)AP=&M;^m#qroaU=!um+VGVIC$AnKp=gx%bs%>UGb}5lJ zb-dCRnQzj{lnH6QjzKlV%0r?;TTdTWkv-Jlcb(d7$*4xOug!A+U$CZXDdG!Xv!sqxtWn)A~_)f{t-tp1& zL9nG$2y-5jNJ(%G#>ljlC1%pwm+9@mw>raXKISY9pOm#eu*U12^A}_8f6ZTR5O@$C z+i=g7s+K%N({**1e^(B4!7#vAnIIp34EN;*ZtkSf(HyYnE4djSNHE5Z_V$#wYxkvg zYq+?K|8UcvSQdhuNOuL9?`F*e>+c>?l-15!rop$v_r?*@jbVlwH-1ZQ(}LKI|JiE@ zQ)g_DUJBB-9btwblNB_#9E(Y(GAFUPl55D@un`b|Dtr6O^ew95?QgZWIMYZuv%ejc z=D&4R?D02yNHqkePoAujBUNXjW6x5rY#NIct2HSp9XHPj*An*3Jv?oMV&>fJ6g$mu z*Z85^>M7PMW2@z|al?|=Aqa)TA1CTeVkwMcU&||a&+5&nSz1j>xMN-i@&^7sO;|@$ z@^4X5die6$+a#3al<_YGID1L0)A;f;o{pMXnIJa!^(7l0N2bJVbJvq8%EqE_yR{lM zzj4O%<#x9T968U?CXDjmq0Lx12M+P7>b#TPy_sSdkCR&H7h72it04KR^|}=Ku;Z~} znK6$OS?`S{{7DvS(ca5jgAl&^7B|(7DW*yu-)Z|bY)LL%0gVPyFBuMB?yta)ERF#5UBj*=9ydUP2ELk4w zId!B)idxLGcJ}5^rQ-+C_)zBY5D^nw4WnG`5VJPJ<`<)GX?o%o-Y-4S?-_MIZ${G@*JNTiB){ydp z^4hM$oncF8a!lK1H(rHB-R;EO3}JEDcS0jO&w@`bZ`CrpW7W8NIpqFS^K{(K%ks`I z>SIproY%b36T?5p_=EdzxJ~%3zz=<}h+cD~L5>?Eo3pM>PNYe9w%A|Jr(9zPCH#=M z*3X<=0Y*noWw6fZv@`8@w8!rUD>e#1j8G5A->yDoq9kHvx=H0nh4!tt&BP92=RCyK z_rz}9ccYI+2LDVWdw9P!Ge@w`!SOS@V)LxaJMWu6%%b}fY9}ixEgNrbI}6@AFnAf) z6HffX2-w%j;;TZ@^o*)MU&{$nv&y_``5ZZT4wvsJ{tX=9&JSjTm1K^zS6f*$wwI6M z-hXttKYlNS>tsZlN>@P5R*>5=V2I@uwoZRmsQFHP`Pky{>*(gO(9qUI!Kn{*Na3_8 z%G(3KC?P)A3qSoLoSJb{3N5RAv^z{H-Z152@g&VLdAhPil&op-($||F*{LDh`6$lE z`Fq6H;6_5SEhd?9E%!|f%EqAF7PDqrF5i#InYY42AW}PCc)uObJ)N|9A$`2p=5Hz{ z{;t@cW+tWChFUD#ca4*PL{HXoqOTb^d%4Vqz176i;au!0W2n!Ki8)Nfu%<02M>AGC z@|_=%&VPGE!k?j9KYr%yb6pMA>^liC=HK%x0NsKh1&`j&arFs-S}EQqLC{gq z2WJUxibXYe7Y!$yjXM|iQ){zeytP4G-c1LXXg40&+ z`FB?jq|{YdE|6Y&H~JoN9;O1Jy+^$bB-eg5or?bwD^KL)upO+no8oe{e4rU4wdd6n z(Ph{Y?pi|Eh-T3ksn=(zpixO7H)vyOZCsVB?Z@VW_Ev=;mvPf>XawHDz}weizLs9caxS|J)5FCi$-R`M;}q?SVt_9`&ufrI0B35m@I($hJ~q@G?U zAD(EKLS{SBX6?*3^{OD9V-2Hd%+(3gc2yq|M(DV8KEjP*AV7`_(Ke$;0Lmq(F_0oDg*ufaaDex z5;+<|ECl|kv=Mgz*(`!at>gI1nfl-ZK|#~Rh#Ze-4q}0`UCpn-I7?TEsO(jh<#=M& zcmseU^!%wk+-+UhZb)1vaO9B-B7^5q0;TGqg|#SQRSCk)0e`qjyt<{5_hDIMIm>u! zuMv*&-h=32Bu`t~vT$P>Zq2-3Gj4xP=M;qsz;Ikl@M!OS$~L2>RQ5`z&njlpKR8**4GwW|bj z_DunKwSKck+}(@89Zmn*FH2 z1(wY6JRI5!5wT)Yh*29ZHh+~sf6vNlkfOArwaXK%*mYsW$K5eLYb zU&x~#?c*{3?Of-IRFZ3Xo0z<9;pD@;J!)=zD|d{a%3*Lu$iUI58NzGib6LJjU|DJm z^JepQIk!a-0H&oGTxB(Yc=-nQNKdu|d~lBONO5*Vq^0lCTjYP6(>_})!+Vds#Nw7X z@BOveaxd?r0y4yH!@m~>+-m$Uu~P4zWaFBJD7qTxp>g`}-!zkFlg3SOKLs!w5-=O$ zv}w%$n$1fvo8(cYATs+}cir1GtPM6a_m-r5QC}W8Vl7s;2XkW>ON=01nA0RK^+3Gf z&dMswJVNETLwO83oHVHVHT|H^HMsR8ZvetEwG=gofkh{f-T!1Y=!N(c?mr(z%?KZC zwSvSphP~z2Q*kOh%Vr(_#a(4zXj-)9Rhoq1;GWX;<|HhJ4?vCb+Boq*LC;%Pdw+&W=4%oa~L6 zekn-n@5d|_m!dx>rrgz!l08a8=6=_0{bjigvW1GbSp2?Yv`-)&w(aJXxS}bwF&>nz zmm9Qw8qaQgH|7EX4JB)rkrJ={6os9>@?a#TYBP8cJ20It8j9?`Pc}3FMn7$^;VH=A z+o*Q?Yw+h-?qXj~T8d~|ws154aPZ65lvwG38PA7GXQZ!~m_zWAFb?O=yZ(vJ5+bsv zVzoi5yP{Fjq39##<--dx*>Tmhw~Tz2+(yCwo+~(RGQWx{S+zAmvZk?u?#y zX9&W^ydWY@=9Tko_lu42de>HvZ6MTOGv($jC>nftuPAgzE$>&I#)-ul!$hn^Jrk!T zOaQVb>^dWaoiuALwVan8aT%0{8YI6VGO66F~fl03yFf`d8m^#&xSNSL=Zq2efvD_;&zR*+dH6(f}7I;ub-i!UP zTZRc7w_9vkO_{e?E-g2C^&VF3H3x0ozt<`x$>cA;bqE;#NE;H>VQ|y_Bm)7xk&=R? z)oi@ZW2a$s`Eo;KjxOW?xG<{uJ4KX|plbdunOKNh=0&8&@z-Q5j=A)tr1nONS;oo@` z+4_gNtTF}3*-5W-fU9KV%e!v}SMH2+IHR}l{(yU8mleX;_Z6vbYDzPcEoivVWt+V{ z08c+$7tK-zIcr6j@|M4B{ZoT>n~*$Jqsr@Ii0xF0gWcGi=3p^kQH#zxo__yj6piw`spx<|`JyXdeo8|2a|f#iV? zHcwwlk<@^E2au6zyg}{*|Cp@5&Jz7FC(Eb369fm&f`{jueFY zoav`!neK{lu4mi`{SC%dklPY5$ZNBl>&PbOnbSyPuh?7;;m_Qq4#`cfibcAA@E*$? zesT5G=n9hnCg9yjzNTp)LP!A^ILOZ}>hztiOuqh^C1n}MQkm8!zuqv^tWZ>QRui-8u!HT(U_+** z5KYSA_wYifCsD-UA{D3Sr8zG)Q3<1b02B(LUt3O6LtcD|Rhnno22qvP6T{xVB(s|2 z@m%(Pel2Y+Yh>z1Rpv6rb%|*zKqWmEU#JN?rmu zs#rg?FI8=5SM#3X&Jvk$}+gN+7k%(`{NxA5k><7jgPk%E7WUq-j8R0Vg+lgPO zKchFd82ntwH~Ir_56ji7G1~$SZU&pba^F zzJ_*wzbh-ry~+HN$Ul8+jim_&vQ%GoC|hvSvc(es9~Se0+RiyTo%(kCO<-RcN>t{& zz=QaBowD%VZG6tsOxM$~*#*FNhq_aPqatB7HQtdkLn{)kg$K_4@%M!GzqIBRY!A^H z-$&TE!FM_m0685YB@a|CLcS@zb=CqxGI&WY*N^4hkgVIbO{9ciF~LHosQ$}^R(h7o zXjt|I3AysYXnPA^1T@wh(T@%-7owl?MO4KX0RoX#ilnI z)OW~NAOMLndq<*0-_V?lu2#rLB_aDJ=V`xjmRC|rBi>Hl8PkS#2sqG!t<8JBwc-DA zYs(YVO!Jh9oXwt9giMd3spcnV)B|@SQH_4HN!oPUBD$iXsVA}Dwg%k_e-ET?Y;J@{ zx=z+cFC$i~fUh zC4jlIiM~<(2Yb2@T0;#e*Z&{Wk@H!26r&|cZVrLb70`s;uv)C_@eC&k0Ro&klLYsEYP)t_@Q2IBT83TYD&tYv`bc^WGPFyU^!g`YCoPsj0w` z663+64-PoCd--D8MW&&wl~c|?EBT4n`Yi?9Y$}pRt`de~TBO?ccnWhyAT&?NqU3&> z63Z}lj8~3CH`P1!%(J#kmZZENSWlsUcX-IwmES@pvP1G5FS#WVB{}AYu*b%#L0WME>N|#LRZsSPxyd-Yqq`2Wy$s^e^=98A#pz&! zq-oU+mC#RFW@oRnFa#)g+;hB6{14VG1KqZMjL^OdxTa}$$GAIMW8IKw2570J*SAM0 ziDitzcH-f3vzqhz~^b)^SLEJZf{9Quknr=o7(k|9@)6aeiE2+;8b-PFXP?KnIyQz zSxX@A$?jKMay7m2N<~q#!T>Z>dS7V5_X`!{Ze|reRS>z8JjqRQ&;YUlkf&ke3{8_M zml2|Vr1;UXSpKMEAxb-El}zpjh996$LEve{jU#j(GdOAvmIM_qC*DN zy_z06;b{haqu>-Z8bjpEFApCV;irA{+#XAirN>5L%F^b`OZ}S=bu+#;}vPRYe+ymr+3;%^U}aK z&1)rG6?MuCy0~ zpA!UrZZb^bPd|6>+|L==SP1>;=dJ-i_y5IoSf##}r}dS+kP8lO@mU}Lu)glHxUgVg zt1b4ou*#ziKw%aI4gibLk@Kwf`2kZ()mo4{s|UHH_TUV>7F{ttZE@ZlUG0H}6Zr`! zD_#si`2>LG@Opu_Gng(CI%Ue`ca8611GSB<;I(A3$L=6+RO7r}Ikb7l2)rI&j2^w~ zMh|3LeOBsuFI`m66ZSN7U zs##`0xqr0$6{Z`g*9T4{Ch;Nbi0&HD_rw+3)R3-X=SxVTssjAzoYU92m5SgU{!CE# zJzyb0<@czumE-RE#33jX;XMZNEqXGT6|mW`Oy#Q(3sN0#3WApnEOyL14?3u|M~LVw zess=m&38M4;^CG-=Y2><%FM;#`?IeirEaSzuxb*iLOB}d>fC0^M@x*IfooHf`M&vz z<&O2UScxV29o(4PUW#K&M>-=Jlcd3XSB%EC@8?DKXL2wg4~z~)VfThLghyLXcZ7kL zRsqAwlsP;0(#(_`HFpTP?>1epn`Yq|*>bWwwZioF{JhM8M=}52z+=eJ7?Kw!bEpb# zv6uK2bYsbE0c{FSc9E@ypp8Y1P5b4{z|97;N^5zV!B0@)$Ke;?(8tQsU2r<&v}e?M z0Fyeo9BDop!q;+S*`P!L$CQf9PncXlE-?{wMtw36-{%apL615ib0!=kM+uj zyBj6?+R}zco2Uv}Vaw8)v(r5+%V;O#olt=NUOZw=n%fqi3CbC>4E#!6*BYXIfA)o~ zD+>Noy!9aFGTE(_6O}a$;b!ojKud$4G&S*v7{K%|YP!nopW8Rhr#yd7fX6 zPYC5A-bX8Fw&L4*l!-h(!nYdR*vvwwD$mV~>I-1m%!bTdOeB7PQ_P`aY%>DOQ$QxQ ze#Z?xcaJD^@_cKl)w+EREX$5*Jzf;X-UQcL_&QY;h;K&jmp;kx2uB8ib|ToSf~BY&qrjqt zJ;at^^j-T@VsEW->Ik$4{%pHaVp{Fs1mFS4E|T7x)?Z?&IIje+C3El?v?x0dxQuqj{UNCz7q8r+fd7b)IKo)J?C>HaNdoV) z#4n=7J>p_F5fy>`z9B31XDc5%sNKJIyw1>_?yd+q!OcOY;r&=KAzqz@fP%P5Y^~1E zI&hrX!%e0b3yHfX#AWUmw3rxn_pffCTo?rak4O62rTy~V<5 zyr7ISI1W(-8})S|E*@&hY^c*8O`F(twzO%Utij%1zJRSxIy+zf;x9+jt_36l_dy$l zCa)QfZYG-XbWMVbN1Kh38F=PhNFBZgj@*#aQp*GsYX6zhj}&^w7y}G_;Lt-l8YaIV zp%2S1eOkGs(l5Q$L2cPoW?X@Vo?(?-x^jU>%NL60TJ7&xRu8I<92_zkjTzjOy&p9l z@|+fwH5WMigj%v_4aVXL_9o(v#Qml-owgL_yb?b?W?)YJWEm;ooMpkejRs%@WI&acx0xtwC;ik_93~Drerxjga>IbOJ&=U} z1tHINY5ybhJ6?M-fjEKp4Otsean96>oZ=Sz@oZyyIV+&8jj(6)9l8b=w;|Bd!_@n5 zwSbQ0JLVM=tpA7PM!H3}7nmTs+sSl;SCj?$05&+)+hY%x=V^Q40!@aRlDq+-@`f?? zD0;G<>K=zUUnmWNH&i;A@(n%X)=fdTM0#`Z8e0|fG#qOgJp+2Qu+iOPQUOuqUV8J) zC^nhygN(Ggh7mLkj~8_BfJw)*jcs{3j(+|HIVUDBKKDOtiN1?3Q|n)W?xqw#)FxhATry1Ln}amwlSn0D5K33KR}Bskr3iwV@5=o@ab;-@|fb91STdCh}` zG5mKGJ#(l`DBn?yE8fhAL{MfAz1E_#gBSfe6)Y-z;yBe{&ec2)J%iTAZm3U1FQU$0 z42edJHt@`O(X?PXp0EfGZTTX^`3^rQBzu+@~DB~4^%C&f| zpS5hcm6D#po-EKzQUoF3s(-nR>xc#KTq1w)etLSk9*+X$F0h`A`4u9aL|yev)TXiw zDon6s-@+dn59M4nwP`G})d+)^|jdD&`DJioV{HnP-Z!2gpT^jcBfu z+16#^^QN$c+qgXH6}I>7ldPiZK^fF~rpY;mGSCCG%;OI3$>0fhj`(^>=Tm+$yczua zvU2Cm2@ypidJ-`X8s~ON(}D~{Cl$qRgdo3hc{1$I@3@d-MbIKF5(4?)Ki2qhWWx*z z-6)H?b=7`2R}CvNrl%y<;^UYXxVM=u>YQp(LeZI(Q?q9}i7<%;x8+cHUo);4EeILI zKnpd58kDkwenldOzI{+^9x7I!2@^dZ{UQ)5Q|@^Qk~is6mu-K$dCy^G_7w}7(XHt= zqv|KjVfm9R|9sCYkDUoc&fhpt<-2ET%l>P)D~_dNw*3_@sG#uJDBvL(M;r!;YDT}L0%Cy4Z8 z_n3$<*_C}>{(UGh=8Xcd`cxc1LAsTT8!?4iUv%SUlpccp;1lNwW%gg7#LG%oG%PWY z(%*USdBq-#OX=@%4a4p;?5PVlZuME6>Q%I!HX^YxPd|dk+p{w}Tl?ERGkUGKq&yKv z{MY8Gy`cBrYD%r7Q>JJjKM&8()Cs>yd7YR^9*3^*)EGgqMaMV(4meZs>X@zH?o#GQ z$)qi8V$i^uZIs=|l~y-Ru}Q(!ZZzNaI$y-+0Md`XYQmMHeuEl1{>Z%gwktqx%uZ&{ zEduB_-oV1~#Bn-k`>UmG`{p%dbgAgIfu5eCt&%z=RXny?)aEncIUH-;Y{V6NlY-mx zF-2zrz#K|Kl1v*If#7_0jZP?;vmYRyGSG|8U-O-|%)vq;>ECOW$O?k`Ot7uFPRZSI zyJk|E!xeVe>M-YK7y9$S>Gsgv9;j6^4{cW3O zOh6WLY;O>Ly&Tpet}c~^*#kXp06AH*nB)>TkGm^?MERYPHt0i-_Gf`1^mr7$#WGuq zLrh_da*>Uqu$G|XNfG>Lx9T)*I=%uMawm1yrqU=vBUNC_XblUhQGxg2;9xw_zFMLTLkxUVqc+Lyh!h0uR)QTG>EKN;K zJ6kWF0#KC_@Se^pNEaZJZ8VGfr;B;eZW2Nf2-q`;&i7Y*Z6~ViEthIVE@GiXR}&e~ zRJQHmq`JmO09xS+WKSGDsBVM&?XIhDB!Clw0T{|BD?7u;hn5s4ecfE)G2i9S#4u5% zNG?5sis3XF;Rkl#|K*{9dcUaz& zu8M7!?Ux5Kv4Ss*Ej(T#SL)|uzkdq76gJw#rh@|niKC%64Eq|i=KIJj9Z(3xG&X`x zar{9>D4Nb*P?bg)=e`QM!3}{%eUF>jvpu?QV&OyR*}@d=?4V)urLP>TJp(q#0s)iC zPa4+a5&S-Jb(m~vzn|8^p>}$fFa=$h02>lBhoJJi@t!ljKO-7d7+7kE*CW{OwJfn zl`3tl?UJt0u5f8*t&^TSZtp}JAZ#v`C9ylJ*o8~rwD|M^Sdxo{CCA(?%(utvUTY)1TTKRG*l0Y?LxL5lzl%1IJ5+uaBPT} zvRj*lDcH$X-oqhGt{UHS$;w7w!EbI*R*X~Vpq0wpufv@c@t ztzmZ4yGigq`oMZ)xt8brc2iSRPJvdYQ{oTQM;m*4IXJ4O-!e3++}nG)%Dge1Qych~ zZ=A*67mv8!TZ67zAOVHpM&e3dyFuyecr$(&48^k~~ zd=iFt`w1tT-$A3DN!Bg{+{_5;d=Jn+-(dtDU!&Dy5lF$`hsAWsSdwtv{#8Q=j3#i*|(eu06Jj=qNsYg zAHWH4P2x^>=CJ)+0|B$e6orK#!1htku?1oeq(`HTFq^Y?Pi0@PcMm!2RocqNubXg) zKMH#;cc17v7$t_3>eg;%9V~g|8X-(6S`!X&Cn7d5V%w}$g^GMhL3Cur$`-2C4Rh;)$PPJe%hV*2BCrg1>x=&+5UBu%Z%d@ z^#69KNsQMS?q+1f2#p9)BYYnm7JZXHYeD_;nGe9Z(s z`I^Z0U7fk^b2+z)WOuNqKt#ZTh#&>LYj0#Ea8k!DQ(%?p(MkLUJg(Frvf`xZN_d!x z;Ov|$Tl`%D+J!KcuBM{vf$Z`@Wm*{ncc`#r1di+!9b-Sp*J>Kvg=!SQ7)b6SC<#6Q z_3%}dSv}7`Py0U(|9^h`(_Z~yga7j@|5=j%oTERH!+(s&e=Ng)EW>{+!~dI>fi;Qf aRF+60qV`#@@0$zYkJ>{mrE&%9=l=tnkqu@5 literal 0 HcmV?d00001 From 20b851c10f91e884b5174d7c6b2870e615a6cbbf Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Tue, 17 Jan 2017 13:40:58 -0800 Subject: [PATCH 36/37] fix according to comments --- doc/howto/usage/k8s/k8s_aws_en.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/howto/usage/k8s/k8s_aws_en.md b/doc/howto/usage/k8s/k8s_aws_en.md index 10f5a2ef2f..c144bab649 100644 --- a/doc/howto/usage/k8s/k8s_aws_en.md +++ b/doc/howto/usage/k8s/k8s_aws_en.md @@ -392,14 +392,14 @@ Now we've already setup a 3 nodes distributed Kubernetes cluster, and on each no Distributed training job is represented by a [kubernetes job](https://kubernetes.io/docs/user-guide/jobs/#what-is-a-job). -Kubernetes job is described by a job config file. The file contains lots of configuration information. For example, PaddlePaddle's node number, `paddle pserver` open port number, the network card info etc. These information are passed into container for `pserver` and `trainer` to use as environment variables. +Each Kuberentes job is described by a job config file, which specifies the information like the number of pods in the job and environment variables. -In one distributed training job, we will: +In a distributed training job, we would: -1. Upload the pre-divided training data and configuration file onto EFS volume. -1. Create and submit the Kubernetes job config to the Kubernetes cluster to start the training job. +1. upload the partitioned training data and configuration file onto EFS volume, and +1. create and submit the Kubernetes job config to the Kubernetes cluster to start the training job. -#### Parameter Server and Trainer +#### Parameter Servers and Trainers There are two roles in a PaddlePaddle cluster: `parameter server` and `trainer`. Each parameter server process maintains a shard of the global model. Each trainer has its local copy of the model, and uses its local data to update the model. During the training process, trainers send model updates to parameter servers, parameter servers are responsible for aggregating these updates, so that trainers can synchronize their local copy with the global model. @@ -411,13 +411,13 @@ Parameter server and trainer are packaged into a same docker image. They will ru #### Trainer ID -Trainer id is the index of trainer within all trainers of a job. Trainer needs this information to do things like reading the correct shared of data. +Each trainer process requires a trainer ID, a zero-based index value, passed in as a command-line parameter. The trainer process thus reads the data partition indexed by this ID. #### Training -After container gets started, it starts up the distributed training by using scripts. Each node will use job pod's name to query Kubernetes apiserver for information of all pods in current job. +The entry-point of a container is a Python script. As it runs in a pod, it can see some environment variables pre-defined by Kubernetes. This includes one that gives the job's identity, which can be used in a remote call to the Kubernetes apiserver that lists all pods in the job. -From pods information, script knows static ip addresses of pservers. And assign trainer it's own `trainer_id`. The workflow of the script is as follows: +We rank each pod by sorting them by their ips. The rank of each pod could be the "pod ID". Because we run one trainer and one parameter server in each pod, we can use this "pod ID" as the trainer ID. A detailed workflow of the entry-point script is as follows: 1. Query the api server to get pod information, and assign the `trainer_id` by sorting the ip. 1. Copy the training data from EFS sharing volume into container. @@ -550,7 +550,7 @@ efs └── recommendation ``` -The `paddle-cluster-job` directory is the job name for this training, this training includes 3 PaddlePaddle node, we store the pre-divided data under `paddle-cluster-job/data` directory, directory 0, 1, 2 each represent 3 nodes' trainer_id. the training data in in recommendation directory, the training results and logs will be in the output directory. +The `paddle-cluster-job` directory is the job name for this training, this training includes 3 PaddlePaddle node, we store the partitioned data under `paddle-cluster-job/data` directory, directory 0, 1, 2 each represent 3 nodes' trainer_id. the training data in in recommendation directory, the training results and logs will be in the output directory. #### Create Kubernetes Job From f4ff8d26d6cb50b6749264f8db460732fda1e37b Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Tue, 17 Jan 2017 13:41:45 -0800 Subject: [PATCH 37/37] fix according to comments --- doc/howto/usage/k8s/k8s_aws_en.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/howto/usage/k8s/k8s_aws_en.md b/doc/howto/usage/k8s/k8s_aws_en.md index c144bab649..a6422b9be0 100644 --- a/doc/howto/usage/k8s/k8s_aws_en.md +++ b/doc/howto/usage/k8s/k8s_aws_en.md @@ -390,7 +390,7 @@ Now we've already setup a 3 nodes distributed Kubernetes cluster, and on each no #### Distributed Training Job -Distributed training job is represented by a [kubernetes job](https://kubernetes.io/docs/user-guide/jobs/#what-is-a-job). +A distributed training job is represented by a [kubernetes job](https://kubernetes.io/docs/user-guide/jobs/#what-is-a-job). Each Kuberentes job is described by a job config file, which specifies the information like the number of pods in the job and environment variables.